In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("diabetes_prediction_dataset.csv")
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level,diabetes
0,Female,80.0,0,1,never,25.19,6.6,140,0
1,Female,54.0,0,0,No Info,27.32,6.6,80,0
2,Male,28.0,0,0,never,27.32,5.7,158,0
3,Female,36.0,0,0,current,23.45,5.0,155,0
4,Male,76.0,1,1,current,20.14,4.8,155,0


In [3]:
from sklearn.model_selection import train_test_split

In [4]:
df['smoking_history'].unique()

array(['never', 'No Info', 'current', 'former', 'ever', 'not current'],
      dtype=object)

In [5]:
mapping_gender={'Female':0,'Male':1,'Other':2}
mapping_smoking={'never':0,'No Info':1,'current':2,'former':3,'ever':4,'not current':5}

In [6]:
df['gender'] = df['gender'].map(mapping_gender)
df['smoking_history'] = df['smoking_history'].map(mapping_smoking)

In [7]:
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level,diabetes
0,0,80.0,0,1,0,25.19,6.6,140,0
1,0,54.0,0,0,1,27.32,6.6,80,0
2,1,28.0,0,0,0,27.32,5.7,158,0
3,0,36.0,0,0,2,23.45,5.0,155,0
4,1,76.0,1,1,2,20.14,4.8,155,0


In [8]:
X=df.drop(columns=['diabetes'],axis =1).values
y=df['diabetes'].values

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
X_train

array([[  1.  ,  73.  ,   0.  , ...,  24.77,   3.5 ,  80.  ],
       [  1.  ,  80.  ,   0.  , ...,  24.6 ,   5.7 , 145.  ],
       [  0.  ,  38.  ,   0.  , ...,  24.33,   4.  , 158.  ],
       ...,
       [  1.  ,  42.  ,   0.  , ...,  26.14,   5.8 ,  85.  ],
       [  0.  ,  37.  ,   0.  , ...,  24.96,   6.2 , 158.  ],
       [  0.  ,  23.  ,   0.  , ...,  27.99,   5.  , 159.  ]])

In [10]:
y_train = y_train.reshape(1,-1)
y_test = y_test.reshape(1,-1)

In [11]:
y_test

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [12]:
def weightInitialization(n_features):
    w = np.zeros((1,n_features))
    b = 0
    return w, b

In [13]:
def sigmoid_activation(result):
    final_result = 1/ (1 + np.exp(-result))
    return final_result

In [14]:
def model_optimize(w,b,X,Y):
    m = X.shape[0]
    final_result = sigmoid_activation(np.dot(w,X.T) + b)
    Y_T = Y.T
    cost = (-1/m)*np.sum((Y_T*np.log(final_result)) + ((1 - Y_T )* np.log(1 - final_result)))
    dw = (1/m) *np.dot(X.T, (final_result -Y_T).T)
    db = (1/m) * np.sum(final_result - Y_T)
    gradients = {"dw" : dw, "db":db}
    return gradients, cost

In [15]:
def model_prediction(w,b,X,Y,learning_rate,no_iterations):
    costs = []
    for i in range(no_iterations):
        gradients,cost = model_optimize(w,b,X,Y)
        dw = gradients["dw"]
        db = gradients["db"]
        w = w - (learning_rate *dw.T)
        b = b - (learning_rate *db)
        if i % 100 ==0:
            costs.append(cost)
            print("Cost after %i iteration is %f" %(i, cost))
    coeff={"w":w,"b":b}
    gradient={"dw":dw,"db":db}
    return coeff,gradient,costs

In [47]:
def predict(final_pred, m):
    y_pred = np.zeros((1, m))
    for i in range(final_pred.shape[1]):
        if final_pred[0][i] > 0.5:
            y_pred[0][i] = 1
    return y_pred

In [57]:
n_features = X_train.shape[1]
learning_rate = 0.001
no_iterations = 1000

In [20]:
n_features

8

In [18]:
w,b = weightInitialization(n_features)

In [21]:
X_train.shape

(80000, 8)

In [22]:
X_train

array([[  1.  ,  73.  ,   0.  , ...,  24.77,   3.5 ,  80.  ],
       [  1.  ,  80.  ,   0.  , ...,  24.6 ,   5.7 , 145.  ],
       [  0.  ,  38.  ,   0.  , ...,  24.33,   4.  , 158.  ],
       ...,
       [  1.  ,  42.  ,   0.  , ...,  26.14,   5.8 ,  85.  ],
       [  0.  ,  37.  ,   0.  , ...,  24.96,   6.2 , 158.  ],
       [  0.  ,  23.  ,   0.  , ...,  27.99,   5.  , 159.  ]])

In [23]:
w

array([[0., 0., 0., 0., 0., 0., 0., 0.]])

In [24]:
b

0

In [58]:
coeff,gradient,cost = model_prediction(w,b,X_train,y_train,learning_rate,no_iterations)

Cost after 0 iteration is 0.693147
Cost after 100 iteration is 0.356164
Cost after 200 iteration is 0.345307
Cost after 300 iteration is 0.341268
Cost after 400 iteration is 0.339596
Cost after 500 iteration is 0.338837
Cost after 600 iteration is 0.338453
Cost after 700 iteration is 0.338229
Cost after 800 iteration is 0.338073
Cost after 900 iteration is 0.337946


In [59]:
final_result = sigmoid_activation(np.dot(coeff['w'], X_test.T) + coeff['b'])

In [60]:
final_result

array([[0.15608828, 0.15194016, 0.44746876, ..., 0.13404462, 0.18615257,
        0.2384033 ]])

In [61]:
predictions = predict(final_result, X_test.shape[0])

In [62]:
predictions

array([[0., 0., 0., ..., 0., 0., 0.]])

In [63]:
def accuracy(y_true, y_pred):
    correct_predictions = np.sum(y_true == y_pred)
    total_predictions = len(y_true)
    accuracy = correct_predictions / total_predictions
    return accuracy*100

In [64]:
acc = accuracy(y_test,predictions[0])

In [65]:
acc

91.19500000000001