In [5]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression as SKLogisticRegression
from sklearn.metrics import accuracy_score
from scipy.special import expit

In [2]:
# load the data
data = np.loadtxt('../Data/Cov_Type/covtype.data', delimiter=',')

# split the data into features and labels
X = data[:, :-1]
y = data[:, -1]

# normalize the features
X = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2)

In [8]:
%%time

lr_sk = SKLogisticRegression(solver='liblinear') # all params default

lr_sk.fit(X_train, y_train)
yhat = lr_sk.predict(X_test)
print('Accuracy of: ',accuracy_score(y_test,yhat))

Accuracy of:  0.7156613856785109
CPU times: user 55.4 s, sys: 573 ms, total: 56 s
Wall time: 55.7 s


In [None]:
class LogisticRegressionSolver:
    # private:
    def __init__(self, eta, iterations=20):
        self.eta = eta
        self.iters = iterations
        # internally we will store the weights as self.w_ to keep with sklearn conventions
    
    def __str__(self):
        if(hasattr(self,'w_')):
            return 'Binary Logistic Regression Object with coefficients:\n'+ str(self.w_) # is we have trained the object
        else:
            return 'Untrained Binary Logistic Regression Object'
    
    # convenience, private and static:
    @staticmethod
    def _sigmoid(theta):
        # increase stability, redefine sigmoid operation
        return expit(theta) #1/(1+np.exp(-theta))
    
    @staticmethod
    def _add_bias(X):
        return np.hstack((np.ones((X.shape[0],1)),X)) # add bias term

    def _get_gradient(self,X,y):
        ydiff = y-self.predict_proba(X,add_bias=False).ravel() # get y difference
        gradient = np.mean(X * ydiff[:,np.newaxis], axis=0) # make ydiff a column vector and multiply through
        
        return gradient.reshape(self.w_.shape)

    def _predict_proba(self, X, index, add_bias=True):
        # add bias term if requested
        Xb = self._add_bias(X) if add_bias else X
        return self._sigmoid(Xb @ self.w_) # return the probability y=1

    # public:

    # one vs all:
    def fit(self, X, y):
        num_samples, num_features = X.shape
        self.unique_ = np.unique(y) # get each unique class value
        num_unique_classes = len(self.unique_)
        self.classifiers_ = [] # will fill this array with binary classifiers
        self.class_w_ = [] # will fill this array with the weights for each binary classifier

        # for each unique class value:
        for i, yval in enumerate(self.unique_):
            y_binary = (y == yval) # set the binary label for one vs all

            Xb = self._add_bias(X) # add bias term            
            self.class_w_.append(np.zeros((num_features,1))) # init weight vector to zeros
            
            # for as many as the max iterations
            for _ in range(self.iters):
                gradient = self._get_gradient(Xb,y)
                self.class_w_[i] += gradient*self.eta # multiply by learning rate
        self.w_ = np.hstack([x.w_ for x in self.classifiers_]).T

    def predict_proba(self,X):
        probs = []
        for blr in self.classifiers_:
            probs.append(blr.predict_proba(X)) # get probability for each classifier
    
    def predict(self,X):
        return self.unique_[np.argmax(self.predict_proba(X),axis=1)] # take argmax along row