In [1]:
import numpy as np
import pandas as pd
import math

In [2]:
df = pd.read_csv("insurance.csv")
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df[['age','affordibility']],df.bought_insurance,test_size=0.2, random_state=25)

In [4]:
X_train_scaled = X_train.copy()
X_train_scaled['age'] = X_train_scaled['age'] / 100

X_test_scaled = X_test.copy()
X_test_scaled['age'] = X_test_scaled['age'] / 100

In [5]:
y_test

2     1
10    0
21    0
11    0
14    1
9     1
Name: bought_insurance, dtype: int64

In [6]:
def sigmoid(x) :
      
    return 1 / (1 + math.exp(-x))

sigmoid(18)

0.9999999847700205

In [7]:
def prediction_function(age , affordibility , w1 , w2 , b) :
    
    weighted_sum = w1*age + w2*affordibility + b
    
    return sigmoid(weighted_sum)

In [8]:
def sigmoid_numpy(X):
    
    return 1/(1 + np.exp(-X))

sigmoid_numpy(np.array([12,0,1]))

array([0.99999386, 0.5       , 0.73105858])

In [9]:
def log_loss(y_true, y_predicted) :
    
    epsilon = 1e-15
    y_predicted_new = [max(i,epsilon) for i in y_predicted]
    y_predicted_new = [min(i,1-epsilon) for i in y_predicted_new]
    y_predicted_new = np.array(y_predicted_new)
    
    return -np.mean(y_true*np.log(y_predicted_new)+(1-y_true)*np.log(1-y_predicted_new))

In [17]:
def gradient_descent(age, affordability, y_true, epochs, loss_thresold) :
    
    w1 = w2 = 1
    bias = 0
    rate = 0.5
    n = len(age)
    
    for i in range(epochs) :
        
        weighted_sum = w1 * age + w2 * affordability + bias
        y_predicted = sigmoid_numpy(weighted_sum)
        loss = log_loss(y_true, y_predicted)

        w1d = (1/n)*np.dot(np.transpose(age),(y_predicted-y_true)) 
        w2d = (1/n)*np.dot(np.transpose(affordability),(y_predicted-y_true)) 

        bias_d = np.mean(y_predicted-y_true)
        
        w1 = w1 - rate * w1d
        w2 = w2 - rate * w2d
        bias = bias - rate * bias_d

#         print (f'Epoch:{i}, w1:{w1}, w2:{w2}, bias:{bias}, loss:{loss}')

        if loss<=loss_thresold:
            break

    return w1, w2, bias

In [18]:
w1 , w2 , bias = gradient_descent(X_train_scaled['age'] , X_train_scaled['affordibility'] , y_train , 1000 , 0.4631)

In [19]:
w1 , w2 , bias

(5.051047623653049, 1.4569794548473887, -2.9596534546250037)

In [13]:
prediction_function(0.52 , 0 , w1 , w2 , bias)

0.4174844237229004