# Gradient Descent

### Predicting if a person would buy life insurnace based on his age using logistic regression

In [65]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [66]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('insurance_data.csv')
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


#### Split train and test set

In [68]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df[['age', 'affordibility']], df.bought_insurance, test_size = 0.2, random_state = 42)

In [69]:
df.shape

(28, 3)

In [70]:
len(X_train)

22

In [71]:
X_train_scaled = X_train.copy()
X_train_scaled['age'] = X_train_scaled['age'] / 100

X_test_scaled = X_test.copy()
X_test_scaled['age'] = X_test_scaled['age'] / 100

In [72]:
X_train_scaled

Unnamed: 0,age,affordibility
17,0.58,1
22,0.4,1
11,0.28,1
13,0.29,0
15,0.55,1
1,0.25,0
4,0.46,1
5,0.56,1
2,0.47,1
16,0.25,0


In [73]:
model = keras.Sequential([
    keras.layers.Dense(1, input_shape = (2,), activation = 'sigmoid', kernel_initializer = 'ones', bias_initializer = 'zeros')
])

model.compile(optimizer = 'adam',
             loss = 'binary_crossentropy',
             metrics = ['accuracy'])

model.fit(X_train_scaled, y_train, epochs = 1000)

Epoch 1/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 640ms/step - accuracy: 0.5000 - loss: 0.7428
Epoch 2/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.5000 - loss: 0.7424
Epoch 3/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.5000 - loss: 0.7420
Epoch 4/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.5000 - loss: 0.7416
Epoch 5/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.5000 - loss: 0.7411
Epoch 6/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.5000 - loss: 0.7407
Epoch 7/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.5000 - loss: 0.7403
Epoch 8/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.5000 - loss: 0.7399
Epoch 9/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x26973f47ad0>

#### Evaluate the model on test set

In [74]:
model.evaluate(X_test_scaled, y_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step - accuracy: 0.8333 - loss: 0.5276


[0.5275540351867676, 0.8333333134651184]

In [75]:
X_test_scaled

Unnamed: 0,age,affordibility
9,0.61,1
25,0.54,1
8,0.62,1
21,0.26,0
0,0.22,1
12,0.27,0


In [76]:
model.predict(X_test_scaled)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step


array([[0.65436006],
       [0.6362884 ],
       [0.6569071 ],
       [0.41346112],
       [0.5494102 ],
       [0.41619965]], dtype=float32)

In [77]:
y_test

9     1
25    1
8     1
21    0
0     0
12    0
Name: bought_insurance, dtype: int64

#### Now get the value of weights and bias from the model

In [78]:
coef, intercept = model.get_weights()
coef, intercept

(array([[1.1281339],
        [0.5930885]], dtype=float32),
 array([-0.6429901], dtype=float32))

#### This means w1=5.060867, w2=1.4086502, bias =-2.9137027

In [79]:
import math
def sigmoid(x):
    return 1 / (1 + math.exp(-x))
sigmoid(18)

0.9999999847700205

### prediction function that uses w1,w2 and bias

In [80]:
def prediction_function(age, affordibility):
    weighted_sum = coef[0] * age + coef[1] * affordibility + intercept
    return sigmoid(weighted_sum)

In [81]:
prediction_function(0.61, 1)

0.6543600539108452

### couple of helper routines such as sigmoid and log_loss

In [90]:
def sigmoid_numpy(X):
   return 1/(1+np.exp(-X))

sigmoid_numpy(np.array([12,0,1]))

array([0.99999386, 0.5       , 0.73105858])

In [92]:
def log_loss(y_true, y_predicted):
    epsilon = 1e-15
    y_predicted_new = [max(i,epsilon) for i in y_predicted]
    y_predicted_new = [min(i,1-epsilon) for i in y_predicted_new]
    y_predicted_new = np.array(y_predicted_new)
    return -np.mean(y_true*np.log(y_predicted_new)+(1-y_true)*np.log(1-y_predicted_new))

#### Final gradient descent function 

In [94]:
def gradient_descent(age, affordability, y_true, epochs, loss_thresold):
    w1 = w2 = 1
    bias = 0
    rate = 0.5
    n = len(age)
    for i in range(epochs):
        weighted_sum = w1 * age + w2 * affordability + bias
        y_predicted = sigmoid_numpy(weighted_sum)
        loss = log_loss(y_true, y_predicted)

        w1d = (1/n)*np.dot(np.transpose(age),(y_predicted-y_true)) 
        w2d = (1/n)*np.dot(np.transpose(affordability),(y_predicted-y_true)) 

        bias_d = np.mean(y_predicted-y_true)
        w1 = w1 - rate * w1d
        w2 = w2 - rate * w2d
        bias = bias - rate * bias_d

        print (f'Epoch:{i}, w1:{w1}, w2:{w2}, bias:{bias}, loss:{loss}')

        if loss<=loss_thresold:
            break

    return w1, w2, bias

In [96]:
gradient_descent(X_train_scaled['age'],X_train_scaled['affordibility'],y_train,1000, 0.4631)

Epoch:0, w1:0.9736899318847281, w2:0.931388810977659, bias:-0.11748951666770448, loss:0.7428288579142563
Epoch:1, w1:0.9536535852311093, w2:0.8740290167758512, bias:-0.21881533456146035, loss:0.7072146449948487
Epoch:2, w1:0.9393731039296968, w2:0.8271852202997496, bias:-0.3053620401943441, loss:0.6814881914786812
Epoch:3, w1:0.930193258899806, w2:0.7897792032048467, bias:-0.37884372361582785, loss:0.6633428084673968
Epoch:4, w1:0.9254091137248937, w2:0.7605726653866934, bias:-0.441082368200183, loss:0.650742850709519
Epoch:5, w1:0.9243325693598606, w2:0.738313053647322, bias:-0.4938425798625155, loss:0.6420508089402462
Epoch:6, w1:0.9263332963572349, w2:0.7218280753843739, bias:-0.5387319906498417, loss:0.6360356979531206
Epoch:7, w1:0.9308580975636879, w2:0.7100747303660235, bias:-0.5771558825717441, loss:0.631816485354411
Epoch:8, w1:0.9374354910317361, w2:0.7021560855322683, bias:-0.6103083840841516, loss:0.6287844495353144
Epoch:9, w1:0.9456716791005844, w2:0.6973185496313956, bia

(6.729458709063027, 1.3482027165467207, -3.5831821575352403)

In [98]:
coef, intercept

(array([[1.1281339],
        [0.5930885]], dtype=float32),
 array([-0.6429901], dtype=float32))