In [60]:
"""
Manual realisation of a logistic regression method
tuned by gradient descent with and without regularisation.
"""


import pandas as pd
import numpy as np
import math

from sklearn.metrics import roc_auc_score

In [100]:
data = pd.read_csv('D:/Work/Data_files/working_dir/data-logistic.csv',
                   header=None, names=['y', 'x_1', 'x_2'])
data.head()

Unnamed: 0,y,x_1,x_2
0,-1,-0.663827,-0.138526
1,1,1.994596,2.468025
2,-1,-1.247395,0.749425
3,1,2.309374,1.899836
4,1,0.849143,2.40775


In [101]:
# derivatives by weights coefficients
def derivative_w_1(y, x_1, x_2, w_1, w_2, C):
    return -(np.mean(y*x_1 * (1 - 1 / (1 + np.exp(
            -y * (x_1*w_1 + x_2*w_2))))) - C*w_1)
def derivative_w_2(y, x_1, x_2, w_1, w_2, C):
    return -(np.mean(y*x_2 * (1 - 1 / (1 + np.exp(
            -y * (x_1*w_1 + x_2*w_2))))) - C*w_2)

# classification confidence
def confidence(x_1,x_2,w_1,w_2):
    return 1/(1 + np.exp(-w_1*x_1 - w_2*x_2))

# initial weights coefficients
w_1 = 1
w_2 = 1
# descending step
k = 0.05
# reguralization constant 
C = 0

converge_treshhold = 1e-5

max_iterations = 10000

In [102]:
current_weights = np.array((w_1, w_2))
x_1 = data.x_1
x_2 = data.x_2
y = data.y
for i in range(max_iterations):
    w_1 = w_1 - k * derivative_w_1(y, x_1, x_2, w_1, w_2, C)
    w_2 = w_2 - k * derivative_w_2(y, x_1, x_2, w_1, w_2, C)    
    previous_weights = current_weights
    current_weights = np.array((w_1, w_2))
    dist = np.linalg.norm(current_weights - previous_weights)
    if(dist < converge_treshhold):
        print('Convergence!', i)
        break
print(w_1, w_2)

Convergence! 405
0.28750237860466965 0.09226567726626701


In [103]:
data['y_pred'] = confidence(x_1,x_2,w_1,w_2)
data.head()

Unnamed: 0,y,x_1,x_2,y_pred
0,-1,-0.663827,-0.138526,0.449267
1,1,1.994596,2.468025,0.690224
2,-1,-1.247395,0.749425,0.42813
3,1,2.309374,1.899836,0.698305
4,1,0.849143,2.40775,0.614504


In [59]:
score = roc_auc_score(data.y, data.y_pred)
score

0.9362857142857142

In [65]:
score = roc_auc_score(data.y, data.y_pred)
score

0.9268571428571428

In [104]:
score = roc_auc_score(data.y, data.y_pred)
score

0.9268571428571428