# BATCH GRADIENT DESCENT IMPLEMENTATION

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## LIBRARY IMPORT

In [3]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras

## DATA IMPORT

In [4]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/03_GRADIENT_DESCENT_NN/05_insurance_data.csv')

In [5]:
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


In [6]:
df.shape

(28, 3)

## TRAIN TEST SPLIT

In [7]:
x_train,x_test,y_train,y_test = train_test_split(df[['age','affordibility']],df.bought_insurance,test_size = 0.2, random_state = 99)

In [8]:
len(x_train)

22

## SCALING THE AGE

In [9]:
x_train_scale = x_train.copy()
x_train_scale['age'] = x_train_scale['age']/100

x_test_scale = x_test.copy()
x_test_scale['age'] = x_test_scale['age']/100

In [10]:
x_train_scale

Unnamed: 0,age,affordibility
10,0.18,1
15,0.55,1
2,0.47,1
7,0.6,0
20,0.21,1
14,0.49,1
6,0.55,0
11,0.28,1
13,0.29,0
0,0.22,1


## BUILD SIMPLE NN

In [11]:
model = keras.Sequential([
    keras.layers.Dense(1,input_shape = (2,),activation = 'sigmoid',
              kernel_initializer = 'ones',
              bias_initializer = 'zeros')
])
model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)
model.fit(x_train_scale,y_train,epochs = 5000)

Epoch 1/5000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.8636 - loss: 0.5021
Epoch 2502/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.8636 - loss: 0.5020
Epoch 2503/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.8636 - loss: 0.5020
Epoch 2504/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.8636 - loss: 0.5019
Epoch 2505/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.8636 - loss: 0.5019
Epoch 2506/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.8636 - loss: 0.5018
Epoch 2507/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.8636 - loss: 0.5017
Epoch 2508/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accurac

<keras.src.callbacks.history.History at 0x7a7ea3d34350>

In [12]:
model.evaluate(x_test_scale,y_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step - accuracy: 0.8333 - loss: 0.6591


[0.6591015458106995, 0.8333333134651184]

In [13]:
x_test_scale

Unnamed: 0,age,affordibility
17,0.58,1
22,0.4,1
16,0.25,0
12,0.27,0
19,0.18,1
23,0.45,1


In [14]:
model.predict(x_test_scale)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step


array([[0.76848066],
       [0.5774307 ],
       [0.11323879],
       [0.12352505],
       [0.3159371 ],
       [0.6361673 ]], dtype=float32)

In [15]:
coef,intercept  = model.get_weights()
coef,intercept

(array([[4.9306455],
        [1.6307154]], dtype=float32),
 array([-3.2907383], dtype=float32))

## RAW NN FROM SCRATCH

In [16]:
def sigmoid(x):
  import math
  return 1/ (1+ math.exp(-x))

In [17]:
sigmoid(18)

0.9999999847700205

In [19]:
def prediction_function(age,affordibility):
  # lets make the formula of weighted sum
  weighted_sum = coef[0]*age + coef[1]*affordibility + intercept
  return sigmoid(weighted_sum)

In [20]:
prediction_function(0.58,1) #from x_test

  return 1/ (1+ math.exp(-x))


0.7684805943345715

In [21]:
prediction_function(0.18,1)

  return 1/ (1+ math.exp(-x))


0.3159371033392661

## GRADIENT DESCENT FROM SCRATCH

In [27]:
def log_loss(y_true,y_pred):
  epsilon = 1e-15
  y_pred_new = [max(i,epsilon) for i in y_pred]
  y_pred_new = [min(i,1-epsilon) for i in y_pred_new]
  y_pred_new = np.array(y_pred_new)
  return -np.mean(y_true*np.log(y_pred_new)+(1-y_true)*np.log(1-y_pred_new))

In [23]:
def sigmoid_numpy(x):
  return 1/ (1+ np.exp(-x))

sigmoid_numpy(np.array([12,0,1]))

array([0.99999386, 0.5       , 0.73105858])

In [24]:
def gradient_descent(age,affordibility,y_true,epochs,loss_threshold):
  w1 = w2 = 1
  bias = 0
  rate = 0.5
  n = len(age)
  for i in range(epochs):
    weighted_sum = w1*age + w2*affordibility +bias
    y_pred = sigmoid_numpy(weighted_sum)

    loss = log_loss(y_true,y_pred)

    w1d = (1/n)* np.dot(np.transpose(age),(y_pred-y_true))
    w2d = (1/n)* np.dot(np.transpose(affordibility),(y_pred-y_true))

    bias_d = np.mean(y_pred - y_true)

    w1 = w1 - rate * w1d
    w2 = w2 - rate * w2d
    bias = bias - rate * bias_d

    print(f'Epochs : {i}, w1 : {w1},w2 : {w2},Bias : {bias},loss : {loss}')

    if loss <= loss_threshold:
      break
  return w1,w2,bias

# 0.3853 is the loss from my tensorflow model

In [28]:
gradient_descent(x_train_scale['age'],x_train_scale['affordibility'],y_train,1000,0.3853)

Epochs : 0, w1 : 0.9679458843047224,w2 : 0.9300842621913651,Bias : -0.1415772425202991,loss : 0.7530551024331085
Epochs : 1, w1 : 0.9434771836486412,w2 : 0.8729647934908438,Bias : -0.2644214322447066,loss : 0.7048601568127304
Epochs : 2, w1 : 0.9260434089487847,w2 : 0.8279387868534902,Bias : -0.36998245044901135,loss : 0.6699044083963612
Epochs : 3, w1 : 0.9148687401251204,w2 : 0.7937984363725068,Bias : -0.46022291496871826,loss : 0.645152341540659
Epochs : 4, w1 : 0.9090826878838325,w2 : 0.7690847682019516,Bias : -0.5373026169108802,loss : 0.6278448792840212
Epochs : 5, w1 : 0.9078225830253752,w2 : 0.7522972461375633,Bias : -0.6033370677725053,loss : 0.6157429005746855
Epochs : 6, w1 : 0.9102967130908239,w2 : 0.7420292816818951,Bias : -0.6602526823186899,loss : 0.6071694111834
Epochs : 7, w1 : 0.9158136625546598,w2 : 0.7370365971443691,Bias : -0.7097226164653945,loss : 0.6009371765277062
Epochs : 8, w1 : 0.9237889532974427,w2 : 0.7362589525537891,Bias : -0.7531556340416182,loss : 0.59

(np.float64(4.832322774622927),
 np.float64(1.8140488973450755),
 np.float64(-3.442403858447309))

In [30]:
coef,intercept

(array([[4.9306455],
        [1.6307154]], dtype=float32),
 array([-3.2907383], dtype=float32))

TENSORFLOW WEIGHTS AND BIAS:  
W1 = 4.9306455  
W2 = 1.6307154  
BIAS = -3.2907883  

RAW MODEL OF GRADIENT DESCENT:  
W1 = 4.832322774622927  
W2 = 1.8140488973450755   
BIAS = -3.442403858447309  
