In [1]:
import numpy as np
from logistic_reg import MultiLogisticRegression

In [139]:
class MultiLogisticRegression():
    def __init__(self,X,y,alpha = 0.5,learning_rate = 1,no_iter = 1000,use_bias = True,centralize = True):
        
        if centralize ==True:
            self.X = self._centralize(X)
        else:
            self.X = X
        self.y = y
        self._check = False
        self.use_bias = use_bias
        self.N = self.X.shape[0]
        self.no_iter = no_iter
        
        if(len(self.y.shape) == 1):
            self.max_index = max(self.y)
            self.one_hots = np.zeros((self.N,self.max_index+1))
            for i,_ in enumerate(y):
                self.one_hots[i,_] = 1
#             print(self.one_hots)
        
        
        assert alpha>0, "Choose the regularizer constant(alpha) > 0"
        self.alpha = alpha
        assert learning_rate>0, "Choose the regularizer constant(alpha) > 0"
        self.l_r = learning_rate
       
        
        if self.use_bias == True:
            temp = np.ones((self.X.shape[0],self.X.shape[1] + 1))
            temp[:,:-1] = self.X
            self.X = temp
#            print(temp)

    def _softmax(self,x):
        exponential = np.exp(x)
        sumer = exponential.sum(axis = 1,keepdims = True)
        return exponential/sumer
        #return (exponential.T/sumer).T
    
    def _centralize(self,x):
        x = x - np.mean(x, axis=1,keepdims =True)
        x = x/np.std(x, axis=1,keepdims = True)
        return x
              
    def fit(self):  
        
        self.w = np.zeros((self.X.shape[1],self.max_index+1))
        
        
        #Use Gradient Descent
        for i in range(self.no_iter):
            self._update_weights()
#             print(i, ':',self.w)
#             if i==100:
#                 break
        self._check = True
    
    
    def weights(self):
        assert (self._check == True),"Model not fitted"
        
        return self.w
    
    def _update_weights(self):
                
        #Calculate Gradient
        output = self.X @ self.w 
        #print(output)
        
        self.dw = self.X.T @ (self._softmax(output)-self.one_hots)/ self.X.shape[0]
#         print(self.dw)
        self.w = self.w - self.l_r*self.dw
        
        return self.w
              
    def predict(self,data,probs = False,plot =False):
        
        if self.use_bias == True:
            temp = np.ones((data.shape[0],data.shape[1] + 1))
            temp[:,:-1] = data
            data = temp 
        #print(self.w)
        self.p = self._softmax(data @ self.w)
        return self.p

In [140]:
dummy = np.array([[1,2,3,7],[4,2,4,6],[7,8,9,4],[1,4,3,2]])
y = np.array([0,1,1,0])

#(np.exp(dummy).T/ np.exp(dummy).sum(axis = 1)).T
(np.exp(dummy)/ np.exp(dummy).sum(axis = 1,keepdims = True))

array([[0.00241233, 0.00655741, 0.01782488, 0.97320538],
       [0.10499359, 0.01420934, 0.10499359, 0.77580349],
       [0.08962882, 0.24363641, 0.66227241, 0.00446236],
       [0.0320586 , 0.64391426, 0.23688282, 0.08714432]])

In [141]:
lr = MultiLogisticRegression(dummy,y)


In [142]:
lr.fit()

In [143]:
lr.predict(dummy)

array([[9.99999970e-01, 3.04546749e-08],
       [5.16443130e-04, 9.99483557e-01],
       [6.09380284e-06, 9.99993906e-01],
       [9.99996611e-01, 3.38882434e-06]])

In [144]:
from sklearn.datasets import load_breast_cancer

In [145]:
data = load_breast_cancer()

In [146]:
data['data'],data['target']

(array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
 

In [147]:
X_train,y_train = data['data'][:500],data['target'][:500]
X_test,y_test = data['data'][500:],data['target'][500:]

In [149]:
lr = MultiLogisticRegression(X_train,y_train,no_iter = 1000, learning_rate = 0.05)
lr.fit()

In [150]:
prediction = lr.predict(X_test)
prediction


  exponential = np.exp(x)
  return exponential/sumer


array([[2.11525337e-053, 1.00000000e+000],
       [8.68872630e-014, 1.00000000e+000],
       [1.05678530e-112, 1.00000000e+000],
       [            nan, 0.00000000e+000],
       [4.68891893e-087, 1.00000000e+000],
       [2.15471318e-088, 1.00000000e+000],
       [1.35882610e-127, 1.00000000e+000],
       [6.02470920e-124, 1.00000000e+000],
       [7.83544338e-139, 1.00000000e+000],
       [7.16434401e-064, 1.00000000e+000],
       [5.10744688e-132, 1.00000000e+000],
       [1.08215090e-145, 1.00000000e+000],
       [1.00000000e+000, 2.10798504e-095],
       [1.29110317e-002, 9.87088968e-001],
       [1.00000000e+000, 6.04356353e-055],
       [1.01536408e-079, 1.00000000e+000],
       [1.00000000e+000, 1.65970137e-196],
       [1.00000000e+000, 7.78233907e-147],
       [2.26277032e-003, 9.97737230e-001],
       [1.36078360e-055, 1.00000000e+000],
       [5.38869436e-066, 1.00000000e+000],
       [1.00000000e+000, 0.00000000e+000],
       [1.76326120e-125, 1.00000000e+000],
       [1.0

In [151]:
y_bool = y_test>0.5

In [153]:
p = prediction[:,1]>0.5

In [155]:
sum(p == y_bool)/len(p)

0.927536231884058