In [10]:
import numpy as np
import pandas as pd
import random
from sklearn.metrics import f1_score

data=pd.read_csv('synthetic_diabetes.csv')

X=data.iloc[:,:-1].values
y=data.iloc[:,-1].values

def scale_features(X):
    X_scaled = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
    return X_scaled

# Scale features
X_scaled = scale_features(X)
X = np.column_stack((np.ones(len(X_scaled)), X_scaled))

def train_test_split(X, y, test_ratio=0.2):
    num_samples = len(X)
    num_test_samples = int(test_ratio * num_samples)
    test_indices = random.sample(range(num_samples), num_test_samples)
    train_indices = [i for i in range(num_samples) if i not in test_indices]
    return X[train_indices], X[test_indices], y[train_indices], y[test_indices]

X_train, X_test, y_train, y_test= train_test_split(X,y, test_ratio=0.2)

def predict(X, weights):
    y_pred=X.dot(weights)
    return y_pred

def gradient(X,y,weights):
    y_pred=predict(X,weights)
    grad=2*X.T.dot(y_pred-y)
    return grad

def stochastic_gradient_descent(X,y,learning_rate=0.01, num_epochs=1000):
    num_samples, num_features=X.shape
    weights=np.random.rand(num_features)

    for epoch in range(num_epochs):
        np.random.seed(42)  # for reproducibility
        shuffled_indices = np.random.permutation(len(X))
        X_shuffled = X[shuffled_indices]
        y_shuffled = y[shuffled_indices]
        for i in range(0,num_samples):
            grad=gradient(X_shuffled,y_shuffled, weights)
            weights-= learning_rate*grad/num_samples
    return weights

learning_rate=0.001
num_epochs=1000

weights=stochastic_gradient_descent(X_train,y_train,learning_rate, num_epochs)
print(weights)

y_pred=predict(X_test,weights)
y_pred = np.round(y_pred).astype(int)

def calculate_metrics(y_true, y_pred):
    # Calculate true positives, false positives, false negatives
    tp = sum(1 for i in range(len(y_true)) if y_true[i] == 1 and y_pred[i] == 1)
    fp = sum(1 for i in range(len(y_true)) if y_true[i] == 0 and y_pred[i] == 1)
    fn = sum(1 for i in range(len(y_true)) if y_true[i] == 1 and y_pred[i] == 0)

    # Calculate precision, recall, and F1 score
    precision = tp / (tp + fp) if (tp + fp) != 0 else 0
    recall = tp / (tp + fn) if (tp + fn) != 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

    return f1

accuracy=calculate_metrics(y_test,y_pred)
print("Accuracy(F-1 Score)=", accuracy)

[ 0.39448635  0.08070697  0.15736182  0.01085519  0.00892143 -0.03630084
  0.08615843  0.05026227  0.01024114]
Accuracy(F-1 Score)= 0.5352112676056338
