In [122]:
import numpy as np

In [123]:
import pandas as pd
df= pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [124]:
X=df.drop('Outcome',axis=1)
y=df['Outcome']

In [125]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Activation Functions

In [126]:
def relu_activation(weighted_sum):
  return np.maximum(0, weighted_sum)

def relu_derivative(weighted_sum):
  return (weighted_sum >0).astype(float)

def sigmoid_activation(weighted_sum):
  return 1/ (1+ np.exp(-weighted_sum))

#Initilaization of parameters

In [128]:
def initialize_parameters():
  np.random.seed(1)

#weight and bias from input to first hidden layer
  weights_input_to_hidden_layer_1 = np.random.randn(8,3) * np.sqrt(2 / 8)
  bias_hidden_layer_1= np.zeros((1,3))

  #weight and bias from hidden layeer 1 to hidden layer 2
  weights_hidden_layer_1_to_hidden_layer_2 = np.random.randn(3,3) *np.sqrt(2 / 3)
  bias_hidden_layer_2 = np.zeros((1,3))

  #weight and bias from hiddden layer 2 to output layer
  weights_hidden_layer_2_to_output_layer = np.random.randn(3,1) * 0.01
  bias_output_layer = np.zeros((1,1))

  return (
      weights_input_to_hidden_layer_1,
      weights_hidden_layer_1_to_hidden_layer_2,
      weights_hidden_layer_2_to_output_layer,
      bias_hidden_layer_1,
      bias_hidden_layer_2,
      bias_output_layer
  )

#Forward propagation

In [129]:
def forward_propagation(
    input_features,
    weights_input_to_hidden_layer_1,
    bias_hidden_layer_1,
    weights_hidden_layer_1_to_hidden_layer_2,
    bias_hidden_layer_2,
    weights_hidden_layer_2_to_output_layer,
    bias_output_layer
    ):



    # first hidden layer calculations
    weighted_sum_hidden_layer_1= ( np.dot(input_features, weights_input_to_hidden_layer_1) + bias_hidden_layer_1)
    activation_hidden_layer_1= relu_activation(weighted_sum_hidden_layer_1)

    #Second hidden layer calculations
    weighted_sum_hidden_layer_2= (np.dot(activation_hidden_layer_1, weights_hidden_layer_1_to_hidden_layer_2) + bias_hidden_layer_2)
    activation_hidden_layer_2= relu_activation(weighted_sum_hidden_layer_2)

    #output layer calculations
    weighted_sum_output_layer= (np.dot(activation_hidden_layer_2, weights_hidden_layer_2_to_output_layer) + bias_output_layer)

    predicted_out_probabilities = sigmoid_activation(weighted_sum_output_layer)

    return(
        weighted_sum_hidden_layer_1,
        weighted_sum_hidden_layer_2,
        weighted_sum_output_layer,
        activation_hidden_layer_1,
        activation_hidden_layer_2,
        predicted_out_probabilities

    )


#Loss Function(bINARY CROSS ENTROPY)

In [130]:
def compute_binary_cross_entrpy(true_values, predicted_out_probabilities):

    return -1 *np.mean(true_values * np.log(predicted_out_probabilities + 1e-8) + (1-true_values) * np.log(1 - predicted_out_probabilities + 1e-8))

#Backward propagation

In [131]:
def backward_propagation(input_features, true_values, weighted_sum_hidden_layer_1, activation_hidden_layer_1, weighted_sum_hidden_layer_2, activation_hidden_layer_2, predicted_out_probabilities, weights_hidden_layer_1_to_hidden_layer_2, weights_hidden_layer_2_to_output_layer):
  number_of_samples = input_features.shape[0]

  #Error at output layer
  output_layer_error = predicted_out_probabilities - true_values

  #Gradients for output layer
  gradient_weights_hidden_layer_2_to_output = (np.dot(activation_hidden_layer_2.T, output_layer_error)/ number_of_samples)

  gradient_bias_output_layer = np.mean(output_layer_error, axis=0, keepdims=True)

  #Error at second hidden layer
  hidden_layer_2_error = np.dot(output_layer_error, weights_hidden_layer_2_to_output_layer.T)
  hidden_layer_2_delta = (hidden_layer_2_error * relu_derivative(weighted_sum_hidden_layer_2))

  #gradients for second hidden layer
  gradient_weights_hidden_layer_1_to_hidden_layer_2 = (np.dot(activation_hidden_layer_1.T, hidden_layer_2_delta)/ number_of_samples)

  gradient_bias_hidden_layer_2 = np.mean(hidden_layer_2_delta, axis=0, keepdims=True)

  #Error at first hidden layer

  hidden_layer_1_error = np.dot(hidden_layer_2_delta, weights_hidden_layer_1_to_hidden_layer_2.T)
  hidden_layer_1_delta = ( hidden_layer_1_error * relu_derivative(weighted_sum_hidden_layer_1))

  #Gradients for first hidden layer
  gradient_weights_input_to_hidden_layer_1 = (np.dot(input_features.T, hidden_layer_1_delta)/ number_of_samples)
  gradient_bias_hidden_layer_1= np.mean(hidden_layer_1_delta, axis=0, keepdims=True)

  return (
      gradient_weights_input_to_hidden_layer_1,
      gradient_bias_hidden_layer_1,
      gradient_weights_hidden_layer_1_to_hidden_layer_2,
      gradient_bias_hidden_layer_2,
      gradient_weights_hidden_layer_2_to_output,
      gradient_bias_output_layer
  )

#Update parameters

In [132]:
def update_parameters(
    weights_input_to_hidden_layer_1,
    bias_hidden_layer_1,
    weights_hidden_layer_1_to_hidden_layer_2,
    bias_hidden_layer_2,
    weights_hidden_layer_2_to_output_layer,
    bias_output_layer,
    gradients,
    learning_rate
):

    (
    grad_w_input_to_h1,
    grad_b_h1,
    grad_w_h1_to_h2,
    grad_b_h2,
    grad_w_h2_to_output,
    grad_b_output,
)= gradients

    #Update all weight and bias using gradient descent
    weights_input_to_hidden_layer_1 -= learning_rate * grad_w_input_to_h1
    bias_hidden_layer_1 -= learning_rate * grad_b_h1

    weights_hidden_layer_1_to_hidden_layer_2 -= learning_rate * grad_w_h1_to_h2
    bias_hidden_layer_2 -= learning_rate * grad_b_h2

    weights_hidden_layer_2_to_output_layer -= learning_rate * grad_w_h2_to_output
    bias_output_layer -= learning_rate * grad_b_output

    return (
        weights_input_to_hidden_layer_1,
        bias_hidden_layer_1,
        weights_hidden_layer_1_to_hidden_layer_2,
        bias_hidden_layer_2,
        weights_hidden_layer_2_to_output_layer,
        bias_output_layer
    )

#Training layer

In [139]:
def train_neural_network(
    training_features,
    training_labels,
    epochs=500,
    learning_rate=0.1
):
    network_parameters = initialize_parameters()

    for epoch in range(epochs):
        forward_values = forward_propagation(
            training_features,
            *network_parameters
        )

        loss = compute_binary_cross_entrpy(
            training_labels,
            forward_values[-1]
        )

        gradients = backward_propagation(
            training_features,
            training_labels,
            *forward_values[:-1],
            forward_values[-1],
            network_parameters[2],
            network_parameters[4]
        )

        network_parameters = update_initialize_parameters(
            *network_parameters,
            gradients,
            learning_rate
        )

        if epoch % 100 == 0:
            print(f"Epoch {epoch} | Loss: {loss:.4f}")

    return network_parameters


#predictions

In [140]:
def predict(input_features, trained_parameters):
  prediction_probabilities = forward_propagation(input_features, *trained_parameters)[-1]
  return (prediction_probabilities> 0.5).astype(int)

In [141]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [142]:
trained_parameters = train_model(X_train_scaled, y_train.values.reshape(-1, 1), epochs=1000, learning_rate=0.1)

preds = predict(X_test_scaled, trained_parameters)
print("Accuracy:", np.mean(preds == y_test.values.reshape(-1, 1)))

Epoch: 0, Loss: 0.6929744263567997
Epoch: 10, Loss: 0.6740516129234058
Epoch: 20, Loss: 0.6625096766025292
Epoch: 30, Loss: 0.6551761378448931
Epoch: 40, Loss: 0.6502358501214965
Epoch: 50, Loss: 0.6466361631030334
Epoch: 60, Loss: 0.6437398648765749
Epoch: 70, Loss: 0.6411397465898699
Epoch: 80, Loss: 0.6385577365746482
Epoch: 90, Loss: 0.6357383414330553
Epoch: 100, Loss: 0.6325304704032045
Epoch: 110, Loss: 0.6287544490330425
Epoch: 120, Loss: 0.624343371576797
Epoch: 130, Loss: 0.6188401476753768
Epoch: 140, Loss: 0.6122850918644732
Epoch: 150, Loss: 0.6048410059233874
Epoch: 160, Loss: 0.5966386155949674
Epoch: 170, Loss: 0.5884198895656387
Epoch: 180, Loss: 0.5805564951329167
Epoch: 190, Loss: 0.5730248914176521
Epoch: 200, Loss: 0.5660629510971826
Epoch: 210, Loss: 0.5593727777111518
Epoch: 220, Loss: 0.5529676491509526
Epoch: 230, Loss: 0.5470242782888877
Epoch: 240, Loss: 0.5415297141149801
Epoch: 250, Loss: 0.5367508403533358
Epoch: 260, Loss: 0.5325768782359986
Epoch: 270, L