In [262]:
import pandas as pd
import numpy as np

In [263]:
data = pd.read_csv('Healthcare-Diabetes.csv')
data

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
2763,2,75,64,24,55,29.7,0.370,33,0
2764,8,179,72,42,130,32.7,0.719,36,1
2765,6,85,78,0,0,31.2,0.382,42,0
2766,0,129,110,46,130,67.1,0.319,26,1


In [264]:
y = np.array(data.iloc[:,-1])
X = np.array(data.iloc[:,-9:-1])

y_train = y[:int(len(y)*0.8)]
y_test = y[int(len(y)*0.8):]

X_train = X[:int(len(X)*0.8)]
X_test = X[int(len(X)*0.8):]

X_test


array([[  4.   ,  96.   ,  56.   , ...,  20.8  ,   0.34 ,  26.   ],
       [  5.   , 108.   ,  72.   , ...,  36.1  ,   0.263,  33.   ],
       [  0.   ,  78.   ,  88.   , ...,  36.9  ,   0.434,  21.   ],
       ...,
       [  6.   ,  85.   ,  78.   , ...,  31.2  ,   0.382,  42.   ],
       [  0.   , 129.   , 110.   , ...,  67.1  ,   0.319,  26.   ],
       [  2.   ,  81.   ,  72.   , ...,  30.1  ,   0.547,  25.   ]])

In [265]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [266]:
class LogisticRegression:
    def __init__(self, lr=0.01):
        self.lr = lr
        self.w = None
        self.b = 0

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def Z(self, X):
        return np.dot(X, self.w) + self.b

    def loss_function(self, X_train, y_train):
        predictions = self.sigmoid(self.Z(X_train))
        loss = -np.mean(y_train * np.log(predictions) + (1 - y_train) * np.log(1 - predictions))
        return loss

    def gradient(self, X_train, y_train):
        m = X_train.shape[0]
        predictions = self.sigmoid(self.Z(X_train))
        error = predictions - y_train
        dw = (1 / m) * np.dot(X_train.T, error)
        db = (1 / m) * np.sum(error)
        return dw, db

    def fit(self, X_train, y_train, epochs):
        self.w = np.zeros(X_train.shape[1])  
        for i in range(epochs):
            dw, db = self.gradient(X_train, y_train)
            self.w -= self.lr * dw
            self.b -= self.lr * db
        print(f"Loss = {self.loss_function(X_train, y_train)}")

    def predict(self, X_test, threshold=0.5):
        probabilities = self.sigmoid(self.Z(X_test))
        return (probabilities >= threshold).astype(int)


In [267]:
""""
Pregnancies : Number of times pregnant.
Glucose : Plasma glucose concentration over 2 hours in an oral glucose tolerance test.
BloodPressure : Diastolic blood pressure (mm Hg).
SkinThickness : Triceps skinfold thickness (mm).
Insulin : 2-Hour serum insulin (mu U/ml).
BMI : Body mass index (weight in kg / height in m^2).
DiabetesPedigreeFunction : Diabetes pedigree function, a genetic score of diabetes.
Age : Age in years.

"""


'"\nPregnancies : Number of times pregnant.\nGlucose : Plasma glucose concentration over 2 hours in an oral glucose tolerance test.\nBloodPressure : Diastolic blood pressure (mm Hg).\nSkinThickness : Triceps skinfold thickness (mm).\nInsulin : 2-Hour serum insulin (mu U/ml).\nBMI : Body mass index (weight in kg / height in m^2).\nDiabetesPedigreeFunction : Diabetes pedigree function, a genetic score of diabetes.\nAge : Age in years.\n\n'

In [268]:
model = LogisticRegression()
model.fit(X_train,y_train,100000)
X_test = X_test[:100]
print(model.predict(X_test))


Loss = 0.47026097865090793
[0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 1 0 0 1 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0
 1 1 0 1 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0
 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0]
