In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Reading the data
data = pd.read_csv('diabetes2.csv')

#Test Train Splitting(Train=80% Test=20%)
test=data.sample(frac=0.20,random_state=12) 
train=data.drop(test.index)
test_x = test.loc[:,test.columns != "Outcome"]
train_x = train.loc[:,train.columns != "Outcome"]
test_y = test['Outcome'].values 
train_y = train['Outcome'].values

In [None]:
def mean_normalize(X, mean, std):
    return (X-mean) / std

In [None]:
#Mean Normalizing the Data
train_mean = train_x.mean(axis=0) 
train_std = train_x.std(axis=0)
train_x = mean_normalize(train_x,train_mean ,train_std)
test_x = mean_normalize(test_x,train_mean ,train_std )

In [None]:
#sigmoid function
def sigmoid(theta, X):
    return 1 / (1 + np.exp((-np.matmul(X,theta.transpose()))))

In [None]:
#Cost Function
def calculate_cost(theta, X, y, z): 
    m = X.shape[0]
    hyp = sigmoid(theta, X)
    cost = (1/m)*(-y*np.log(hyp)-(1-y)*np.log(1-hyp)).sum() + (z / (2*m))*np.square(theta).sum()
    cost -= (z / (2*m)) * theta[0]**2 
    return cost

In [None]:
def calculate_grad(theta, X, y, z):
    m = X.shape[0]
    hyp = sigmoid(theta, X)
    gradient = np.matmul(X.transpose(),hyp - y) 
    gradient[0] -= (z/m) * theta[0] 
    gradient += (z/m) * theta
    return gradient

In [None]:
def logistic_regression(X, y, alpha, iteration , test_X, test_y):
    #initializing the weight coefficients
    theta = np.random.rand(X.shape[1])
    m = X.shape[0]
    costs_train = []
    costs_test = []
    for i in range(iteration):
        costs_train.append(calculate_cost(theta, X, y, 1))
        theta -= alpha * (1/m)*calculate_grad(theta, X ,y, 1)
        costs_test.append(calculate_cost(theta, test_X, test_y, 1))
    return theta  

In [None]:
#predicting with a threshold of 0.5
def predict(theta, X, threshold):
    pred = sigmoid(theta, X)
    pred_result = (pred>=threshold).astype(int)
    return pred_result

In [None]:
train_x.insert(0, 'One', 1) 
test_x.insert(0, 'One', 1)
#regressing using alpha 0.05 and using gradient for 2000 times
theta = logistic_regression(train_x.values, train_y, 0.05, 2000, test_x.values, test_y)

In [None]:
pred_y = predict(theta, test_x.values, 0.5)
result = pred_y == test_y
accuracy=sum(result)*100 / len(result)
print("Accuracy: {}%".format(accuracy))

Accuracy: 81.81818181818181%
