In [3]:
# Implementation of gradient descent for a ANN

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras 

In [4]:
df = pd.read_csv('insurance_file.csv')
df.head()

Unnamed: 0,age,education,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


In [5]:
X = df[['age', 'education']]
y = df['bought_insurance']

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=25)

In [7]:
X_train['age'] = X_train['age']/100
X_test['age'] = X_test['age']/100

In [8]:
X_train.head()

Unnamed: 0,age,education
0,0.22,1
13,0.29,0
6,0.55,0
17,0.58,1
24,0.5,1


In [9]:
X_train.iloc[0].shape

(2,)

In [34]:
model = keras.Sequential([
                          keras.layers.Dense(1, 
                                             input_shape=(2,),
                                             activation='sigmoid',
                                             kernel_initializer = 'ones',
                                             bias_initializer = 'zeros')
])

In [35]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics = ['accuracy'])

model.fit(X_train, y_train, epochs = 3000)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 502/3000
Epoch 503/3000
Epoch 504/3000
Epoch 505/3000
Epoch 506/3000
Epoch 507/3000
Epoch 508/3000
Epoch 509/3000
Epoch 510/3000
Epoch 511/3000
Epoch 512/3000
Epoch 513/3000
Epoch 514/3000
Epoch 515/3000
Epoch 516/3000
Epoch 517/3000
Epoch 518/3000
Epoch 519/3000
Epoch 520/3000
Epoch 521/3000
Epoch 522/3000
Epoch 523/3000
Epoch 524/3000
Epoch 525/3000
Epoch 526/3000
Epoch 527/3000
Epoch 528/3000
Epoch 529/3000
Epoch 530/3000
Epoch 531/3000
Epoch 532/3000
Epoch 533/3000
Epoch 534/3000
Epoch 535/3000
Epoch 536/3000
Epoch 537/3000
Epoch 538/3000
Epoch 539/3000
Epoch 540/3000
Epoch 541/3000
Epoch 542/3000
Epoch 543/3000
Epoch 544/3000
Epoch 545/3000
Epoch 546/3000
Epoch 547/3000
Epoch 548/3000
Epoch 549/3000
Epoch 550/3000
Epoch 551/3000
Epoch 552/3000
Epoch 553/3000
Epoch 554/3000
Epoch 555/3000
Epoch 556/3000
Epoch 557/3000
Epoch 558/3000
Epoch 559/3000
Epoch 560/3000
Epoch 561/3000
Epoch 562/3000
Epoch 563/3000
Epoch

<keras.callbacks.History at 0x7f4f26a22650>

In [36]:
model.predict(X_test)

array([[0.6770958 ],
       [0.45932925],
       [0.2555207 ],
       [0.53705907],
       [0.6905662 ],
       [0.7643413 ]], dtype=float32)

In [37]:
y_test

2     1
10    0
21    0
11    0
14    1
9     1
Name: bought_insurance, dtype: int64

In [38]:
model.get_weights()

[array([[3.1155188],
        [1.1555799]], dtype=float32), array([-1.8794166], dtype=float32)]

In [10]:
import math 

In [40]:
def sigmoid(x):
  return 1/(1 + math.exp(-x))

In [84]:
sigmoid(25)

0.999999999986112

In [78]:
sigmoid(np.array([-5, 5, 8]))

TypeError: ignored

In [11]:
def sigmoid_np(x):
  return 1/(1 + np.exp(-x))

In [12]:
sigmoid_np(np.array([-5, 5, 8]))

array([0.00669285, 0.99330715, 0.99966465])

In [13]:
def loss_function(y_true, y_predicted):
  #epsilon = 0.00000000000001
  #y_predicted_new = [max(y, epsilon) for y in y_predicted]
  #y_predicted_new = [min(y, 1-epsilon) for y in y_predicted]
  y_predicted_new = np.array(y_predicted)
  return  -np.mean(y_true*np.log(y_predicted_new) + (1-y_true)*np.log(1-y_predicted_new))

In [14]:
def gradient_descent(age, education, y_true, epochs, loss_threshold):
  w1=1
  w2=1
  b=0
  learning_rate = 0.5
  n = len(age)

  for i in range(epochs):
    weighted_sum = w1 * age + w2 * education + b
    y_predicted = sigmoid_np(weighted_sum)

    loss = loss_function(y_true, y_predicted)

    w1_d = (1/n) * np.dot(age, y_predicted - y_true)
    w2_d = (1/n) * np.dot(education, y_predicted - y_true)
    b_d = np.mean(y_predicted - y_true)

    w1 = w1 - learning_rate * w1_d
    w2 = w2 - learning_rate * w2_d
    b = b - learning_rate * b_d

    print(f'Epoch:{i+1}, w1:{w1}, w1:{w2}, bias:{b}, loss:{loss}')

    if loss < loss_threshold:
      break

  return w1, w2, b

In [15]:
gradient_descent(X_train['age'], X_train['education'], y_train, 1000, 0.5142)

Epoch:1, w1:0.974907633470177, w1:0.948348125394529, bias:-0.11341867736368583, loss:0.7113403233723417
Epoch:2, w1:0.9556229728273669, w1:0.9058873696677865, bias:-0.2122349122718517, loss:0.6812647787377568
Epoch:3, w1:0.9416488476693794, w1:0.8719790823960313, bias:-0.29775789977965383, loss:0.6591474252715025
Epoch:4, w1:0.9323916996249162, w1:0.8457541517722915, bias:-0.37150947240035115, loss:0.6431523291301916
Epoch:5, w1:0.9272267472726993, w1:0.8262362885332687, bias:-0.4350664302689159, loss:0.6316873063379158
Epoch:6, w1:0.9255469396815343, w1:0.8124402814952774, bias:-0.4899449005893882, loss:0.6234717079975919
Epoch:7, w1:0.9267936114129968, w1:0.8034375029757677, bias:-0.5375299543522855, loss:0.6175321183044205
Epoch:8, w1:0.93047170420295, w1:0.7983920007454487, bias:-0.5790424270894964, loss:0.6131591858705934
Epoch:9, w1:0.9361540784567943, w1:0.7965748796787705, bias:-0.6155315088627656, loss:0.6098518179750948
Epoch:10, w1:0.9434791243557358, w1:0.7973647616854131, 

Epoch:86, w1:1.9958897656796382, w1:1.2551984708420285, bias:-1.5464795429674643, loss:0.5466386828110088
Epoch:87, w1:2.0096769928183367, w1:1.2581405625993878, bias:-1.554409536192387, loss:0.5461130291389041
Epoch:88, w1:2.023445915002755, w1:1.2610290989843815, bias:-1.5622915600184968, loss:0.545590563023641
Epoch:89, w1:2.037196382383489, w1:1.2638651346193672, bias:-1.5701263314476384, loss:0.5450712351014357
Epoch:90, w1:2.0509282490633733, w1:1.2666497026227028, bias:-1.577914553678509, loss:0.5445549975424178
Epoch:91, w1:2.0646413730163813, w1:1.2693838150830055, bias:-1.585656916406187, loss:0.544041803992791
Epoch:92, w1:2.0783356160084976, w1:1.2720684635220503, bias:-1.5933540961140944, loss:0.5435316095193146
Epoch:93, w1:2.092010843520487, w1:1.2747046193465692, bias:-1.6010067563586592, loss:0.5430243705560022
Epoch:94, w1:2.1056669246724873, w1:1.2772932342892063, bias:-1.6086155480469115, loss:0.5425200448529465
Epoch:95, w1:2.1193037321503643, w1:1.2798352408388918

(2.9727204463094963, 1.3786714546948806, -2.0437563101772027)

In [16]:
model.get_weights()

NameError: name 'model' is not defined

In [64]:
# Implementation of an ANN
# we'll write a neural network class

In [65]:
# my_model = myNN()
# my_model.fit(X_train, y_train, epochs, loss_threshold)
# my_model.predict(X_test)

In [17]:
class myNN:
  def __init__(self):   # this initializes the object
    self.w1 = 1
    self.w2 = 1
    self.b = 0 

  def fit(self, X, y, epochs, loss_threshold):
    self.w1, self.w2, self.b = self.gradient_descent(X['age'], X['education'], y, epochs, loss_threshold)
  
  def predict(self, X):
    weighted_sum = self.w1 * X['age'] + self.w2 * X['education'] + self.b
    y_predicted = sigmoid_np(weighted_sum)
    return y_predicted

  def gradient_descent(self, age, education, y_true, epochs, loss_threshold):
    w1=1
    w2=1
    b=0
    learning_rate = 0.5
    n = len(age)

    for i in range(epochs):
      weighted_sum = w1 * age + w2 * education + b
      y_predicted = sigmoid_np(weighted_sum)

      loss = loss_function(y_true, y_predicted)

      w1_d = (1/n) * np.dot(age, y_predicted - y_true)
      w2_d = (1/n) * np.dot(education, y_predicted - y_true)
      b_d = np.mean(y_predicted - y_true)

      w1 = w1 - learning_rate * w1_d
      w2 = w2 - learning_rate * w2_d
      b = b - learning_rate * b_d

      print(f'Epoch:{i+1}, w1:{w1}, w1:{w2}, bias:{b}, loss:{loss}')

      if loss < loss_threshold:
        break

    return w1, w2, b     

In [18]:
my_model = myNN()
my_model.fit(X_train, y_train, 1000, 0.5142)

Epoch:1, w1:0.974907633470177, w1:0.948348125394529, bias:-0.11341867736368583, loss:0.7113403233723417
Epoch:2, w1:0.9556229728273669, w1:0.9058873696677865, bias:-0.2122349122718517, loss:0.6812647787377568
Epoch:3, w1:0.9416488476693794, w1:0.8719790823960313, bias:-0.29775789977965383, loss:0.6591474252715025
Epoch:4, w1:0.9323916996249162, w1:0.8457541517722915, bias:-0.37150947240035115, loss:0.6431523291301916
Epoch:5, w1:0.9272267472726993, w1:0.8262362885332687, bias:-0.4350664302689159, loss:0.6316873063379158
Epoch:6, w1:0.9255469396815343, w1:0.8124402814952774, bias:-0.4899449005893882, loss:0.6234717079975919
Epoch:7, w1:0.9267936114129968, w1:0.8034375029757677, bias:-0.5375299543522855, loss:0.6175321183044205
Epoch:8, w1:0.93047170420295, w1:0.7983920007454487, bias:-0.5790424270894964, loss:0.6131591858705934
Epoch:9, w1:0.9361540784567943, w1:0.7965748796787705, bias:-0.6155315088627656, loss:0.6098518179750948
Epoch:10, w1:0.9434791243557358, w1:0.7973647616854131, 

Epoch:126, w1:2.5319347759000843, w1:1.3398364819448387, bias:-1.832257626074885, loss:0.5277206049775485
Epoch:127, w1:2.5449056381258233, w1:1.341283318725866, bias:-1.8387149090063277, loss:0.5272950372305654
Epoch:128, w1:2.557854627386187, w1:1.3427058189774277, bias:-1.8451445333214114, loss:0.5268714686283814
Epoch:129, w1:2.570781703472466, w1:1.3441044564685243, bias:-1.851546839835858, loss:0.5264498822599073
Epoch:130, w1:2.583686827957526, w1:1.345479695820328, bias:-1.857922163358077, loss:0.5260302615714415
Epoch:131, w1:2.596569964161304, w1:1.346831992691322, bias:-1.8642708328071762, loss:0.5256125903547197
Epoch:132, w1:2.6094310771169726, w1:1.3481617939583521, bias:-1.8705931713284367, loss:0.5251968527354035
Epoch:133, w1:2.6222701335377554, w1:1.3494695378936876, bias:-1.8768894964063034, loss:0.524783033162003
Epoch:134, w1:2.635087101784378, w1:1.3507556543381913, bias:-1.8831601199749577, loss:0.5243711163952075
Epoch:135, w1:2.647881951833145, w1:1.35202056487

In [89]:
my_model.predict(X_test)

2     0.675265
10    0.467547
21    0.219112
11    0.541722
14    0.688164
9     0.759196
dtype: float64

In [90]:
model.predict(X_test)

array([[0.6770958 ],
       [0.45932925],
       [0.2555207 ],
       [0.53705907],
       [0.6905662 ],
       [0.7643413 ]], dtype=float32)