### Binary classification with logistic function. Using of gradient descent for obtaining weights

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
    
col_names = ['Target_variable', 'x_1', 'x_2']
df = pd.read_csv('data-logistic.csv', names=col_names)
# Target variable
y = df['Target_variable']
# Features
X = df.loc[:, df.columns != 'Target_variable']

In [2]:
def grad_descent(X, y, init_vec, c_regul):
    """
    Function for gradient descent alrorithm for logistic regression
    """
    # Step in gradient descent algorithm
    grad_step = 0.1
    # Tolerance
    tol = 1e-5
    # Maximum number of iterations
    max_iter = 1e4
    w_1 = init_vec[0]; w_2 = init_vec[1]
    # Number of elements in vectors
    l = len(X)
    num_iter = 0
    while num_iter < max_iter:
        w_1_new = w_1 - grad_step*c_regul*w_1 \
            + grad_step/l*np.sum(y*X['x_1']*(1-1/(1+np.exp(-y*(w_1*X['x_1']+w_2*X['x_2'])))))
        w_2_new = w_2 - grad_step*c_regul*w_2 \
            + grad_step/l*np.sum(y*X['x_2']*(1-1/(1+np.exp(-y*(w_1*X['x_1']+w_2*X['x_2'])))))
        if (np.linalg.norm(w_1_new-w_1) <= tol) or (np.linalg.norm(w_2_new-w_2) <= tol):
            w_1 = w_1_new
            w_2 = w_2_new
            break 
        w_1 = w_1_new
        w_2 = w_2_new
        num_iter += 1

    return w_1, w_2

In [3]:
# Parameter of regularization (set C = 0 to consider no regularization)
c_regul = 10
# Initial vector
init_vec = [0, 0]
# Find w with and without regularization
w_1_reg, w_2_reg = grad_descent(X, y, init_vec, c_regul)
w_1_noreg, w_2_noreg = grad_descent(X, y, init_vec, 0)
# Find conditional probabilities via sigmoid function
probs_reg = 1/(1+np.exp(-w_1_reg*X['x_1']-w_2_reg*X['x_2']))
probs_noreg = 1/(1+np.exp(-w_1_noreg*X['x_1']-w_2_noreg*X['x_2']))

In [4]:
# Compute AUC-ROC from prediction scores with regularization
roc_reg = roc_auc_score(y, probs_reg)
roc_noreg = roc_auc_score(y, probs_noreg)

In [6]:
# # Save AUC-ROC scores to file
# txt_file = open('answer_logistic.txt', 'w')
# txt_file.write('%g %g' % (np.round(roc_noreg, 3), (np.round(roc_reg, 3))))
# txt_file.close()