In [85]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [142]:
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2)
X.shape, y.shape

((1000, 20), (1000,))

In [143]:
np.unique(y)

array([0, 1])

In [144]:
# X = np.hstack([np.ones((1000, 1)), X]) # to add intercept

In [145]:
np.sum(X[:, 0] - X[:, 1])

-52.50911097329066

In [146]:
X.mean(axis=0), X.std(axis=0)

(array([ 0.02240069,  0.0749098 ,  0.02771704,  0.0455583 ,  0.04356899,
        -0.04769052, -0.01488104,  0.00286772, -0.06190779, -0.00249021,
        -0.01456295,  0.04602224, -0.04419029,  0.00108083,  0.00953509,
        -0.01032471, -0.00394473, -0.00468178, -0.00514309, -0.00716861]),
 array([0.98457209, 0.98942275, 1.31175364, 0.9708992 , 0.98411748,
        1.00812872, 0.99365397, 1.03950956, 0.98418997, 0.48008089,
        0.98937215, 1.01898863, 0.98279448, 1.00515069, 1.00061534,
        0.53677791, 0.98917561, 0.99082923, 1.00803632, 1.30506221]))

In [359]:
class Logreg:
    def __init__(self, learning_rate=0.05, iterations=1500, C=1.0):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.C = C
        self.weights = None
        self.intercept = None
        
        # history
        self.iters_list = []
        self.loss_list = []
    
    def fit(self, X, y):
        # number of observations
        n = len(y)
        # number of features
        k = X.shape[1]
        
        # 1. Initialize weights
        self.weights = np.zeros(k)
        self.intercept = 0
        
        for iteration in range(self.iterations):
            # 2 Predict
            z = np.dot(X, self.weights) + self.intercept
            y_hat = 1 / (1 + np.exp(-z))
            # 3 Calculate logloss
            #logloss = np.sum(-y * np.log(y_hat) - (1 - y) * np.log(1 - y_hat))
            logloss = np.sum(np.log(1 + np.exp(- y * y_hat)))
        
            # 4 Derivative by weights
            derivative_weights = (1 / n) * np.dot(X.T, (y_hat - y)) # (a - y) * x
            derivative_intercept = (1 / n) * np.sum(y_hat - y)

            # 5 Update weights
            self.weights -= self.learning_rate * (derivative_weights + self.C * self.weights)
            self.intercept -= self.learning_rate * (derivative_intercept + self.C * self.intercept)
            
            #print('Iteration:', iteration, 'Total logLoss =', logloss)
            
            # history update
            self.iters_list.append(iteration)
            self.loss_list.append(logloss)
            # 6 Repeat
    

    def predict(self, X):
        z = np.dot(X, self.weights) + self.intercept
        pred = 1 / (1 + np.exp(-z))
        return np.array([1 if i > 0.5 else 0 for i in pred])
    
    def predict_proba(self, X):
        z = np.dot(X, self.weights) + self.intercept
        return 1 / (1 + np.exp(-z))


In [360]:
#weights = np.array([2, 3, 1])
#X = np.matrix([[1, 2, 3], [2, 2, 2], [3, 2, 3], [1, 1, 1]])

In [361]:
#np.exp(np.sum(np.dot(X, weights)))

In [362]:
logreg = Logreg(C=1.0)

In [363]:
logreg.fit(X, y)

In [364]:
mine_pred = logreg.predict(X)

In [365]:
np.unique(mine_pred)

array([0, 1])

In [366]:
logreg.loss_list[:4]

[583.612082370026, 582.3290667296554, 581.1549558701205, 580.0810523535189]

In [367]:
logreg.loss_list[-1]

567.9209253850593

In [368]:
logreg.loss_list[-4:]

[567.9209253850593, 567.9209253850593, 567.9209253850593, 567.9209253850593]

## Sklearn

In [335]:
from sklearn.linear_model import LogisticRegression

In [336]:
lr = LogisticRegression(C=1.0, solver='liblinear')

In [337]:
lr.fit(X, y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [338]:
lr.intercept_

array([-0.0029074])

In [339]:
lr.coef_

array([[-0.00459575,  0.05368594,  2.39309371,  0.03526541,  0.03582772,
         0.00865987,  0.00280306,  0.05917152,  0.02741336, -0.71499805,
        -0.1303241 ,  0.04506172, -0.06591757,  0.04710635, -0.18238228,
        -1.08510829, -0.03694172,  0.04219021,  0.14303208,  0.81396213]])

In [340]:
pred = lr.predict(X)

In [341]:
y.shape, pred.shape

((1000,), (1000,))

In [342]:
np.unique(y), np.unique(pred)

(array([0, 1]), array([0, 1]))

## Comparison: Mine VS Sklearn

In [343]:
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix

In [344]:
accuracy_score(y, pred), roc_auc_score(y, pred)

(0.89, 0.8900000000000001)

In [345]:
accuracy_score(y, mine_pred), roc_auc_score(y, mine_pred)

(0.865, 0.8650000000000001)

In [346]:
logreg.weights

array([-0.00851758,  0.00754219,  0.31217994, -0.00863851,  0.01033981,
        0.00548936,  0.01148325,  0.00618452, -0.01834792, -0.06318905,
       -0.00081821, -0.00557393, -0.01443565,  0.02149461, -0.00706316,
       -0.12990032, -0.01447115,  0.00861049,  0.00521226,  0.01996927])