# __Implement gradient descent for nural network (or logistic regression)__

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/codebasics/deep-learning-keras-tf-tutorial/master/6_gradient_descent/insurance_data.csv')
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


Split data

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df[['age', 'affordibility']], df['bought_insurance'], test_size=0.2, random_state=25)

Scale the data

In [4]:
X_train_scaled = X_train.copy()
X_train_scaled['age'] = X_train_scaled['age']/100

X_test_scaled = X_test.copy()
X_test_scaled['age'] = X_test_scaled['age']/100

In [5]:
X_train_scaled.head()

Unnamed: 0,age,affordibility
0,0.22,1
13,0.29,0
6,0.55,0
17,0.58,1
24,0.5,1


Model building

In [6]:
model = keras.Sequential([
    keras.layers.Dense(1, input_shape=(2,), activation='sigmoid', kernel_initializer='ones', bias_initializer='zeros')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train_scaled, y_train, epochs=5000)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 2502/5000
Epoch 2503/5000
Epoch 2504/5000
Epoch 2505/5000
Epoch 2506/5000
Epoch 2507/5000
Epoch 2508/5000
Epoch 2509/5000
Epoch 2510/5000
Epoch 2511/5000
Epoch 2512/5000
Epoch 2513/5000
Epoch 2514/5000
Epoch 2515/5000
Epoch 2516/5000
Epoch 2517/5000
Epoch 2518/5000
Epoch 2519/5000
Epoch 2520/5000
Epoch 2521/5000
Epoch 2522/5000
Epoch 2523/5000
Epoch 2524/5000
Epoch 2525/5000
Epoch 2526/5000
Epoch 2527/5000
Epoch 2528/5000
Epoch 2529/5000
Epoch 2530/5000
Epoch 2531/5000
Epoch 2532/5000
Epoch 2533/5000
Epoch 2534/5000
Epoch 2535/5000
Epoch 2536/5000
Epoch 2537/5000
Epoch 2538/5000
Epoch 2539/5000
Epoch 2540/5000
Epoch 2541/5000
Epoch 2542/5000
Epoch 2543/5000
Epoch 2544/5000
Epoch 2545/5000
Epoch 2546/5000
Epoch 2547/5000
Epoch 2548/5000
Epoch 2549/5000
Epoch 2550/5000
Epoch 2551/5000
Epoch 2552/5000
Epoch 2553/5000
Epoch 2554/5000
Epoch 2555/5000
Epoch 2556/5000
Epoch 2557/5000
Epoch 2558/5000
Epoch 2559/5000
Epoch 2

<keras.callbacks.History at 0x7fdc4989ba90>

Evaluate model on test set

In [7]:
model.evaluate(X_test_scaled, y_test)



[0.35497748851776123, 1.0]

In [8]:
model.predict(X_test_scaled)



array([[0.70548487],
       [0.35569552],
       [0.16827849],
       [0.47801173],
       [0.7260697 ],
       [0.82949835]], dtype=float32)

In [9]:
y_test

2     1
10    0
21    0
11    0
14    1
9     1
Name: bought_insurance, dtype: int64

Extract the model weights and bias

In [10]:
coef, intercept = model.get_weights()

In [12]:
coef, intercept

(array([[5.060867 ],
        [1.4086502]], dtype=float32), array([-2.9137027], dtype=float32))

* w1 = 5.060867 
* w2 = 1.4086502
* bias = -2.9137027

In [13]:
def sigmoid(x):
  import math
  return 1/(1+math.exp(-x))
sigmoid(18)

0.9999999847700205

In [14]:
X_test

Unnamed: 0,age,affordibility
2,47,1
10,18,1
21,26,0
11,28,1
14,49,1
9,61,1


Create a function that use w1,w2,bias to predict

In [15]:
def prediction_function(age, affordibility):
  weighted_sum = coef[0]*age + coef[1]*affordibility + intercept
  return sigmoid(weighted_sum)

prediction_function(.47, 1)

0.7054848693136117

In [16]:
prediction_function(.18, 1)

0.35569549781937626

Now lets implement this whole process in plain python, implement gradient descent in plain python

In [24]:
def sigmoid_numpy(x):
  return 1/(1+np.exp(-x))

sigmoid_numpy(np.array([12,0,1]))

array([0.99999386, 0.5       , 0.73105858])

In [25]:
def log_loss(y_true, y_predicted):
  epsilon = 1e-15
  y_predicted_new = [max(i,epsilon) for i in y_predicted]
  y_predicted_new = [min(i,1-epsilon) for i in y_predicted_new]
  y_predicted_new = np.array(y_predicted_new)
  return -np.mean(y_true*np.log(y_predicted_new)+(1-y_true)*np.log(1-y_predicted_new))

Implement final gradient descent fuction !!

In [26]:
def gradient_descent(age, affordability, y_true, epochs, loss_thresold):
  w1 = w2 = 1
  bias = 0
  rate = 0.5
  n = len(age)
  for i in range(epochs):
    weighted_sum = w1*age + w2*affordability +bias
    y_predicted = sigmoid_numpy(weighted_sum)
    loss = log_loss(y_true, y_predicted)

    w1d = (1/n)*np.dot(np.transpose(age), (y_predicted-y_true))
    w2d = (1/2)*np.dot(np.transpose(affordability), (y_predicted-y_true))
    bias_d = np.mean(y_predicted-y_true)
    
    w1 = w1 - rate * w1d
    w2 = w2 - rate * w2d
    bias = bias - rate * bias_d

    print(f'Epoch:{i}, w1:{w1}, w2:{w2}, bias:{bias}, loss:{loss}')

    if loss <= loss_thresold:
      break
  return w1, w2, bias

In [27]:
gradient_descent(X_train_scaled['age'], X_train_scaled['affordibility'], y_train, 1000, 0.4631)

Epoch:0, w1:0.974907633470177, w2:0.43182937933981824, bias:-0.11341867736368583, loss:0.7113403233723417
Epoch:1, w1:0.969070594468252, w2:0.3200774531166949, bias:-0.1799334225653932, loss:0.6534390078056765
Epoch:2, w1:0.9697849143231443, w2:0.35056025580635153, bias:-0.23044812195130218, loss:0.6447773990531526
Epoch:3, w1:0.971928580756371, w2:0.3970296719456438, bias:-0.27724984980607453, loss:0.6396461069675138
Epoch:4, w1:0.9748032265335191, w2:0.44303674596280973, bias:-0.32201822578186945, loss:0.6349590091927433
Epoch:5, w1:0.9783130792184367, w2:0.48705667514633644, bias:-0.3649922789161152, loss:0.6306360198859413
Epoch:6, w1:0.9824222237082535, w2:0.529030628471594, bias:-0.40626765254340075, loss:0.6266446059268017
Epoch:7, w1:0.9871011530862719, w2:0.569037409121225, bias:-0.4459242812960229, loss:0.6229551809138231
Epoch:8, w1:0.9923217761486197, w2:0.6071666988905381, bias:-0.4840383012881856, loss:0.6195405758967644
Epoch:9, w1:0.9980570084009576, w2:0.64350666996493

(5.0401920812891365, 1.4683995019869318, -2.962776532468285)

In [28]:
coef, intercept

(array([[5.060867 ],
        [1.4086502]], dtype=float32), array([-2.9137027], dtype=float32))

So we can see the our plain python gradient function returned similar w1,w2 and bias values compare to tf weights and bias