<a href="https://colab.research.google.com/github/Pushpalatha-H/DL/blob/main/6_gradient_descent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Gradient Descent
Gradient descent is used to find the relation b/w two parameters
**Predict if a person buys insurance or not based on age and affordibility**

In [38]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [39]:
df = pd.read_csv("https://raw.githubusercontent.com/codebasics/py/master/DeepLearningML/6_gradient_descent/insurance_data.csv")
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


In [40]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(df[['age', 'affordibility']], df.bought_insurance)

In [41]:
len(x_train)

21

In [42]:
len(x_test)

7

In [43]:
# scale down
x_train_scaled = x_train.copy()
x_train_scaled = x_train_scaled / 100

x_test_scaled = x_test.copy()
x_test_scaled = x_test_scaled / 100

In [44]:
# keras model                                  # output shape is 1 coz it is binary classfn
model = keras.Sequential([
    keras.layers.Dense(1, input_shape=(2,), activation = 'sigmoid', kernel_initializer='ones', bias_initializer='zeros')
])
model.compile(optimizer = 'adam',
              loss='binary_crossentropy',
              metrics = 'accuracy')

In [45]:
model.fit(x_train_scaled, y_train)



<keras.src.callbacks.History at 0x78cd8818c1c0>

In [46]:
model.predict(x_test_scaled)



array([[0.6152583 ],
       [0.6340312 ],
       [0.643271  ],
       [0.5545865 ],
       [0.6270355 ],
       [0.57182235],
       [0.6524055 ]], dtype=float32)

# to get w1, w2, bias from model using model.get_weights()

In [47]:
# to get w1, w2, bias from model using model.get_weights()
coef, intercept = model.get_weights()
coef, intercept

(array([[1.0009999],
        [1.0009878]], dtype=float32),
 array([-0.00099995], dtype=float32))

# if we write our own prediction function instead of getting it from model

In [48]:
import math
def sigmoid_fun(x):
  return 1 / (1 + math.exp(-x))
sigmoid_fun(10)

0.9999546021312976

In [49]:
# if we write our own prediction function instead of getting it from model
def prediction_function(age, affordibility):
  weighted_sum = coef[0]*age + coef[1]*affordibility + intercept
  return sigmoid_fun(weighted_sum)
prediction_function(.12, 0)


  return 1 / (1 + math.exp(-x))


0.5297448458283367

#Gradient Descent

In [50]:
# start doing gradient descent

In [51]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

sigmoid(np.array([13,0,2]))

array([0.99999774, 0.5       , 0.88079708])

In [52]:
def log_loss(y_true, y_predicted):
  epsilon = 1e-15
  y_predicted_new = [max(i, epsilon) for i in y_predicted]
  y_predicted_new = [min(i, epsilon) for i in y_predicted_new]
  y_predicted_new = np.array(y_predicted_new)
  return -np.mean(y_true*np.log(y_predicted_new) + (1-y_true)*np.log(1-y_predicted_new))

In [57]:
def gradient_descent(age, affordibility, y_true, epochs, loss_threshold):
  w1 = w2 = 1
  bias = 0
  rate = 0.5
  n = len(age)
  for i in range(epochs):
    weighted_sum = w1*age + w2*affordibility + bias
    y_predicted = sigmoid(weighted_sum)
    loss = log_loss(y_true, y_predicted)

    w1d = (1/n)*np.dot(np.transpose(age),(y_true - y_predicted))
    w2d = (1/n)*np.dot(np.transpose(affordibility),(y_true - y_predicted))
    bias_d = (1/n)*np.mean(y_true - y_predicted)

    w1 = w1 - rate *  w1d
    w2 = w2 - rate * w2d
    bias = bias - rate * bias_d

    print(f'Epoch:{i}, w1:{w1}, w2:{w2}, bias:{bias}, loss:{loss}')

    if loss<=loss_threshold:
      break

  return w1, w2, bias

In [58]:
gradient_descent(x_train_scaled['age'],x_train_scaled['affordibility'],y_train, 10, 0.4631)

Epoch:0, w1:0.9862047016352513, w2:0.9998727966873775, bias:0.0016857577583059712, loss:18.091740016381788
Epoch:1, w1:0.972216660921838, w2:0.9997424086126546, bias:0.003351651545759452, loss:18.091740016381788
Epoch:2, w1:0.9580309695545474, w2:0.9996087526375788, bias:0.004997131745451301, loss:18.091740016381788
Epoch:3, w1:0.9436425837816043, w2:0.9994717433349787, bias:0.006621633689133301, loss:18.091740016381788
Epoch:4, w1:0.9290463204223388, w2:0.9993312929216054, bias:0.008224577217888518, loss:18.091740016381788
Epoch:5, w1:0.9142368527680268, w2:0.9991873111889963, bias:0.009805366229850909, loss:18.091740016381788
Epoch:6, w1:0.899208706363468, w2:0.9990397054323217, bias:0.011363388214684777, loss:18.091740016381788
Epoch:7, w1:0.8839562546669649, w2:0.9988883803771718, bias:0.012898013774542288, loss:18.091740016381788
Epoch:8, w1:0.8684737145864826, w2:0.9987332381042452, bias:0.014408596131227046, loss:18.091740016381788
Epoch:9, w1:0.8527551418899143, w2:0.9985741779

(0.8527551418899143, 0.9985741779719021, 0.015894470619304067)

In [59]:
coef, intercept

(array([[1.0009999],
        [1.0009878]], dtype=float32),
 array([-0.00099995], dtype=float32))