In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [2]:
def load_data():
    df = pd.read_csv('Social_Network_Ads.csv')
    #One hot encoding
    encoder = OneHotEncoder(handle_unknown='ignore')
    encoder_df = pd.DataFrame(encoder.fit_transform(df[['Gender']]).toarray())
    purchased = df.Purchased
    df. drop('User ID', axis=1, inplace=True)
    df. drop('Gender', axis=1, inplace=True)
    df. drop('Purchased', axis=1, inplace=True)
    encoder_df.columns = ['Female', 'Male']
    df = (df-df.mean())/df.std()

    final_df = pd.merge(encoder_df, df, left_index=True, right_index=True)
    final_df = pd.merge(final_df, purchased, left_index=True, right_index=True)
    
    return final_df


training_data, testing_data = train_test_split(load_data(), test_size=0.05, random_state=25)
x_train = training_data[['EstimatedSalary', 'Female', 'Male', 'Age']].to_numpy()
y_train = training_data['Purchased'].to_numpy()
x_test = testing_data[['EstimatedSalary', 'Female', 'Male', 'Age']].to_numpy()
y_test = testing_data[['Purchased']].to_numpy()

In [3]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [4]:
def gradient(x, y, w, b):
    m = x.shape[0]
    dw = 0
    db = 0
    for i in range(m):
        error = sigmoid(w @ x[i] + b) - y[i]
        dw += x[i] * error
        db += error
    return {'db': 2*db/m, 'dw':2*dw/m}

In [5]:
def gradient_descent(x, y, w, b, alpha, num_iter):
    for i in range(num_iter):
        grad = gradient(x, y, w, b)
        w = w - alpha * grad['dw']
        b = b - alpha * grad['db']
    return {'b': b, 'w':w}

In [6]:
w = np.zeros((x_train.shape[1],))
b = 0
alpha = 0.1
num_iter = 1000
parameters = gradient_descent(x_train, y_train, w, b, alpha, num_iter)
print("Weights: ", parameters['w'], '\n\nBias', parameters['b'])

Weights:  [ 1.27614151 -0.62459574 -0.14640449  2.42967335] 

Bias -0.771000234908711


In [7]:
from sklearn.linear_model import LogisticRegression 

model =  LogisticRegression(solver='liblinear') 
model.fit(x_train,y_train) 
y_pred=model.predict(x_test) 
print('Weights: ', model.coef_)
print('Bias: ', model.intercept_) 

Weights:  [[ 1.17368562 -0.56570103 -0.15347086  2.23227701]]
Bias:  [-0.71917189]


In [8]:
def predict(x, threshold):
    pred = np.zeros((x.shape[0],))
    for i in range(x.shape[0]):
        pred[i] = (sigmoid(parameters['w'] @ x[i] + parameters['b']) >= threshold).astype(int)
    return pred

In [9]:
model_pred = model.predict(x_test)
my_pred = predict(x_test, 0.5)
for i in range(x_test.shape[0]):
    print('Y value: ', y_test[i])
    print("My implementation: ", my_pred[i])
    print("skLearn: ", model_pred[i])
    print()

Y value:  [0]
My implementation:  0.0
skLearn:  0

Y value:  [0]
My implementation:  0.0
skLearn:  0

Y value:  [0]
My implementation:  0.0
skLearn:  0

Y value:  [1]
My implementation:  0.0
skLearn:  0

Y value:  [1]
My implementation:  1.0
skLearn:  1

Y value:  [0]
My implementation:  0.0
skLearn:  0

Y value:  [0]
My implementation:  0.0
skLearn:  0

Y value:  [1]
My implementation:  1.0
skLearn:  1

Y value:  [1]
My implementation:  1.0
skLearn:  1

Y value:  [1]
My implementation:  0.0
skLearn:  0

Y value:  [0]
My implementation:  0.0
skLearn:  0

Y value:  [0]
My implementation:  0.0
skLearn:  0

Y value:  [0]
My implementation:  0.0
skLearn:  0

Y value:  [0]
My implementation:  0.0
skLearn:  0

Y value:  [0]
My implementation:  0.0
skLearn:  0

Y value:  [0]
My implementation:  1.0
skLearn:  1

Y value:  [0]
My implementation:  0.0
skLearn:  0

Y value:  [1]
My implementation:  1.0
skLearn:  1

Y value:  [0]
My implementation:  0.0
skLearn:  0

Y value:  [0]
My implementation