In [17]:
import numpy as np
import pandas as pd

In [18]:
data = pd.read_csv(r'C:\Users\devang\Desktop\WOC 2024\heart.csv')

In [19]:
# Separate features (X) and target variable (y)
X = data.drop(columns=['HeartDiseaseorAttack'])  # Assuming 'HeartDiseaseorAttack' is the target variable
y = data['HeartDiseaseorAttack']

In [20]:
# handle missing valuesby using mean
X.fillna(X.mean(), inplace=True)

In [21]:
def sigmoid(z):
    # Sigmoid function
    return 1 / (1 + np.exp(-z))

In [22]:
def compute_cost(X, y, theta):
    # Compute binary cross-entropy cost
    m = len(y)
    h = sigmoid(np.dot(X, theta))
    cost = (-1 / m) * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))
    return cost

In [23]:
def gradient_descent(X, y, theta, learning_rate, num_iterations):
    # Gradient descent to update parameters
    m = len(y)
    costs = []
    for i in range(num_iterations):
        h = sigmoid(np.dot(X, theta))
        gradient = np.dot(X.T, (h - y)) / m
        theta -= learning_rate * gradient
        cost = compute_cost(X, y, theta)
        costs.append(cost)
    return theta, costs

In [24]:
def predict(X, theta, threshold=0.5):
    # Predict class labels
    predictions = sigmoid(np.dot(X, theta))
    return (predictions >= threshold).astype(int)

In [25]:
# Model Training
# Assume 'X' contains features and 'y' contains labels
X = data.iloc[:, 1:].values  # assuming the first column contains labels
y = data.iloc[:, 0].values


In [26]:
# Add intercept term to X
X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

theta = np.zeros(X.shape[1])  # Initialize parameters

# Set hyperparameters
learning_rate = 0.01
num_iterations = 1000

In [27]:
# Train the model
theta, costs = gradient_descent(X, y, theta, learning_rate, num_iterations)

In [28]:
# Model Evaluation (Optional)
predictions = predict(X, theta)

In [29]:
# Output
print("Optimized Parameters (Theta):")
print(theta)
print("\nCost values during training:")
for i in range(len(costs)):
    print("Iteration {}: {}".format(i+1, costs[i]))

print("\nPredictions:")
print(predictions)

Optimized Parameters (Theta):
[-0.07849212  0.14976823  0.13783055 -0.05653788 -0.05526819  0.05239529
  0.0764754   0.18401414 -0.07931386 -0.05309289 -0.06014622 -0.01525809
 -0.05723568 -0.00579969  0.14084824 -0.00492945  0.02122207  0.07940915
  0.11523079  0.12514751 -0.26693088 -0.16662776]

Cost values during training:
Iteration 1: 0.41413119109057983
Iteration 2: 0.3588963217195747
Iteration 3: 0.33203034201233483
Iteration 4: 0.3242658344439938
Iteration 5: 0.32179815318549393
Iteration 6: 0.32005186864712604
Iteration 7: 0.31856436999154275
Iteration 8: 0.3172848774606675
Iteration 9: 0.31618094061781044
Iteration 10: 0.3152239910240255
Iteration 11: 0.3143891138373
Iteration 12: 0.31365497919631713
Iteration 13: 0.31300360721558257
Iteration 14: 0.3124200296900877
Iteration 15: 0.31189190882703166
Iteration 16: 0.3114091544706902
Iteration 17: 0.310963564972873
Iteration 18: 0.31054850485531565
Iteration 19: 0.3101586243132225
Iteration 20: 0.30978962056420273
Iteration 21: