In [1]:
import numpy as np

class LogisticRegression:
    def __init__(self,lr=0.1, n_iters=1000, multi=False):
        self.lr=lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None
        self.multi = multi
        #weight and bias for each binary classification during multiclass classification(OVR)
        self.w = []
        self.b = []
        
    def fit(self,X,y):
        n_samples,n_features = X.shape
        
        #training for multi-classification
        if self.multi == True:
            for i in np.unique(y):
                
                #initialize wieghts
                self.weights = np.zeros(n_features)
                self.bias = 0
                
                #Gradient descent algo for each bianry classification during multi_class classification
                for _ in range(self.n_iters):
                    y_ = [1 if j == i else 0 for j in y]
                    linear_model = np.dot(X,self.weights)+self.bias
                    y_predicted = self._sigmoid(linear_model)
                    
                    dw = (1/n_samples)*np.dot(X.T,(y_predicted-y_))#- (2 *1* self.weights)
                    db = (2/n_samples)*np.sum(y_predicted-y_)
                    
                    #update weights
                    self.weights -=self.lr*dw
                    self.bias -=self.lr*db
                    
                #store binary weights which will be used during prediction(Multi-class)
                self.w.append(self.weights)
                self.b.append(self.bias)
        else:
            #Binary classification training
            self.weights = np.zeros(n_features)
            self.bias = 0
            
            #gradient descent optimzation
            for _ in range(self.n_iters):
                linear_model = np.dot(X,self.weights)+self.bias
                y_predicted = self._sigmoid(linear_model)
                
                #Calculate Error rates
                dw = (1/n_samples)*np.dot(X.T,(y_predicted-y))#- (2 *1* self.weights)
                db = (2/n_samples)*np.sum(y_predicted-y)
                
                #update weights and biases
                self.weights -=self.lr*dw
                self.bias -=self.lr*db

    def predict(self,X):
        #Multi-class predictions
        if self.multi == True:
            predicts = []
            for i in range(len(self.b)):
                linear_model = np.dot(X,self.w[i])+self.b[i]
                y_predicted = self._sigmoid(linear_model)
                
                #store prediction for each binary-class predictions
                predicts.append(y_predicted)
            predicts = np.array(predicts)
            
            #collate the prediction for each data point
            predicts = [predicts[:,i] for i in range(len(y_predicted))]
            
            #return softmax to sum probabilities of predictions to 1
            return self.softmax(predicts)
        
        else:
            #binary predictions
            linear_model = np.dot(X,self.weights)+self.bias
            y_predicted = self._sigmoid(linear_model)
            y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
            return y_predicted_cls
            

    def _sigmoid(self,x):
        return 1/(1+np.exp(-x))
    
    def softmax(self,x):
        return np.exp(x)/ np.sum(np.exp(x),axis=1).reshape(-1,1)
    
    #simple accuracy calculation based on amount of model's right predictions compared to label
    def accuracy(self,y_true, y_pred):
        if self.multi == True:
            y_pred = np.argmax(y_pred,axis=1)
        accuracy = np.sum(y_true==y_pred)/len(y_true)
        return accuracy

## Binary classification

In [2]:
#Testing model on the breast cancer dataset provided  by sklearn
from sklearn.model_selection import train_test_split
from sklearn import datasets
import pandas as pd
bc = datasets.load_breast_cancer()
X,y = bc.data, bc.target
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state=1234)

In [7]:
model1 = LogisticRegression(lr = 0.0001, n_iters=1000)

In [8]:
#fit training data
model1.fit(X_train,y_train)

In [9]:
#make predictions
predictions = model1.predict(X_test)

In [11]:
#calculate accuracy
model1.accuracy(y_test, predictions)

0.9298245614035088

## Multiclass Classification

In [12]:
#Testing model on the iris dataset with three different classes
from sklearn.model_selection import train_test_split
from sklearn import datasets
import pandas as pd
bc = datasets.load_iris()
X,y = bc.data, bc.target
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.4, random_state=1234)

In [13]:
model1 = LogisticRegression(multi=True)
model1.fit(X_train,y_train)
predictions = model1.predict(X_test)
model1.accuracy(y_test, predictions)

0.9833333333333333