In [40]:
import pandas as pd
import numpy as np

In [41]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [42]:
def predict(features, weights):
    return sigmoid(np.dot(features, weights))

In [43]:
def loss_function(features, labels, weights):
    m = len(labels)
    predictions = predict(features, weights)
    error = -labels * np.log(predictions) - (1 - labels) * np.log(1 - predictions)
    cost = 1/m * np.sum(error)
    return cost

In [44]:
def gradient_descent(features, labels, weights, learning_rate, epochs):
    m = len(labels)
    
    for epoch in range(epochs):
        predictions = predict(features, weights)
        error = predictions - labels
        gradient = np.dot(features.T, error) / m
        weights -= learning_rate * gradient
        
        # In loss sau mỗi epoch
        if epoch % 100 == 0:
            cost = loss_function(features, labels, weights)
            print(f'Epoch {epoch}, Loss: {cost}')

    return weights

In [45]:
df = pd.read_csv('framingham.csv')

In [46]:
df.head()

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.1,85.0,85.0,0


In [47]:
df = df.dropna()

In [48]:
df.head()


Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.1,85.0,85.0,0


In [49]:
selected_features = ['male', 'age', 'currentSmoker', 'cigsPerDay', 'BPMeds', 'prevalentStroke', 'prevalentHyp', 'diabetes', 'totChol', 'sysBP', 'diaBP', 'BMI', 'heartRate', 'glucose', 'TenYearCHD']
df = df[selected_features]

In [50]:
features = df.drop('TenYearCHD', axis=1).values
labels = df['TenYearCHD'].values


In [51]:
features = np.column_stack((np.ones(len(labels)), features))

In [52]:
weights = np.random.rand(features.shape[1])

In [53]:
learning_rate = 0.01
epochs = 1000

In [54]:
trained_weights = gradient_descent(features, labels, weights, learning_rate, epochs)

Epoch 0, Loss: 4.657408251695932
Epoch 100, Loss: 16.75302304018309
Epoch 200, Loss: 12.65219995546599
Epoch 300, Loss: 8.495806048531025
Epoch 400, Loss: 5.464928988134665
Epoch 500, Loss: 12.789338803430867
Epoch 600, Loss: 22.838448360184763
Epoch 700, Loss: 23.214422038503145
Epoch 800, Loss: 21.240076124486826
Epoch 900, Loss: 22.470161356982825


In [55]:
print('Trained Weights:', trained_weights)

Trained Weights: [ 0.1570064   0.14699895  0.32494649  0.25846175 -0.04309074 -0.78669564]


In [56]:
def predict_new_data(new_data, weights):
    new_data_with_bias = np.column_stack((np.ones(len(new_data)), new_data))
    
    probabilities = predict(new_data_with_bias, weights)
    
    predictions = (probabilities >= 0.5).astype(int)
    
    return predictions

In [57]:
new_data_point = np.array([[1, 60, 10, 120, 80]])
predicted_result = predict_new_data(new_data_point, trained_weights)

if predicted_result[0] == 1:
    print("Có khả năng mắc bệnh tim.")
else:
    print("Không có khả năng mắc bệnh tim.")

Không có khả năng mắc bệnh tim.
