In [1]:
import pandas as pd
import numpy as np
from decimal import *

In [54]:
class LogisticRegression:
    def __init__(self,threshold = 0.5, max_iter = 1000 , lr = 0.01, fit_intercept = True):
        self.threshold = threshold
        self.max_iter = max_iter
        self.lr = lr
        self.fit_intercept = fit_intercept
        
    def _add_intercept(self, X):
        intercept = np.ones((X.shape[0],1))
        return np.concatenate((intercept,X), axis = 1)
    
    def _sigmoid(self, x):
        
        return np.array([(1.0 / (1.0 + float(Decimal(-i).exp()))) for i in x])
    
    def _loss(self,y, p):
        return - np.mean(y*np.log(p) + (1-y)*np.log(1-p))
    
    def fit(self, X,y):
        if self.fit_intercept:
            X = self._add_intercept(X)
        
        self.W = np.zeros(X.shape[1])
        
        i = 0
        while self.max_iter > 0:
            predictions = self._sigmoid(np.dot(X,self.W))
            loss = self._loss(y, predictions)
            gradient = np.dot(X.T,(predictions - y))
            self.W = self.W - self.lr*gradient/(1000+i*0.1)
            
            if self.max_iter%1000 == 0:
                print(gradient)
                print(f'loss at iteration {i} is {round(loss,4)}')
            
            i+=1
            self.max_iter-=1
    
    def predict_proba(X):
        if fit_intercept:
            X = self._add_intercept(X)
        
        return self._sigmoid(np.dot(X,self.W))
        
    
    def predict(self, W):
        if self.fit_intercept:
            X = self._add_intercept(X)
            predictions = self._sigmoid(np.dot(X,self.W))
            
        return (predictions >= threshold).astype(int)
            

In [58]:
data = pd.read_csv('data/marks.csv', names = ['m1','m2','result'])
X = data.iloc[:,0:2]
y = data['result']

In [59]:
logr = LogisticRegression(fit_intercept=True,lr=1e-2,max_iter=30000,threshold=0.5)

In [60]:
logr.fit(X,y)

[  -10.         -1200.92165893 -1126.28422055]
loss at iteration 0 is 0.6931
[ 6.92577438 -0.04808768 -0.05384302]
loss at iteration 1000 is 0.6252
[ 6.8739874  -0.04777621 -0.0534654 ]
loss at iteration 2000 is 0.6211
[ 6.82667953 -0.04749136 -0.05311962]
loss at iteration 3000 is 0.6173
[ 6.78316638 -0.04722909 -0.0528009 ]
loss at iteration 4000 is 0.6139
[ 6.7429076  -0.04698621 -0.05250544]
loss at iteration 5000 is 0.6107
[ 6.7054693  -0.04676013 -0.05223019]
loss at iteration 6000 is 0.6078
[ 6.67049796 -0.04654878 -0.05197265]
loss at iteration 7000 is 0.6051
[ 6.6377018  -0.04635042 -0.05173075]
loss at iteration 8000 is 0.6026
[ 6.6068373  -0.04616359 -0.05150278]
loss at iteration 9000 is 0.6002
[ 6.57769914 -0.04598709 -0.05128728]
loss at iteration 10000 is 0.598
[ 6.55011266 -0.04581988 -0.051083  ]
loss at iteration 11000 is 0.5959
[ 6.52392811 -0.04566106 -0.05088887]
loss at iteration 12000 is 0.5939
[ 6.49901611 -0.04550986 -0.05070397]
loss at iteration 13000 is 0.59

KeyboardInterrupt: 

In [None]:
https://towardsdatascience.com/building-a-logistic-regression-in-python-301d27367c24

In [1]:
import pandas as pd
import numpy as np
from decimal import *

In [14]:
class LogisticRegression:
    def __init__(self,threshold = 0.5, max_iter = 1000 , lr = 0.01, fit_intercept = True):
        self.threshold = threshold
        self.max_iter = max_iter
        self.lr = lr
        self.fit_intercept = fit_intercept
        
    def _add_intercept(self, X):
        intercept = np.ones((X.shape[0],1))
        return np.concatenate((intercept,X), axis = 1)
    
    def _sigmoid(self, x):
        
        return np.array([1/1+float(Decimal(-i).exp()) for i in x])
    
    def _loss(self,y, p):
        return - np.mean(y*np.log(p) + (1-y)*np.log(1-p))
    
    def fit(self, X,y):
        if self.fit_intercept:
            X = self._add_intercept(X)
        
        self.W = np.zeros(X.shape[1])
        
        i = 0
        while self.max_iter > 0:
            predictions = self._sigmoid(np.dot(X,self.W))
            loss = self._loss(y, predictions)
            gradient = np.dot(X.T,(predictions - y))
            self.W = self.W - self.lr*gradient/(1000+i*0.1)
            
            if self.max_iter%1000 == 0:
                print(gradient)
                print(f'loss at iteration {i} is {round(loss,4)}')
            
            i+=1
            self.max_iter-=1
    
    def predict_proba(X):
        if fit_intercept:
            X = self._add_intercept(X)
        
        return self._sigmoid(np.dot(X,self.W))
        
    
    def predict(self, W):
        if self.fit_intercept:
            X = self._add_intercept(X)
            predictions = self._sigmoid(np.dot(X,self.W))
            
        return (predictions >= threshold).astype(int)
            

In [15]:
data = pd.read_csv('data/marks.csv', names = ['m1','m2','result'])
X = data.iloc[:,0:2]
y = data['result']

In [16]:
logr = LogisticRegression(fit_intercept=True,lr=1e-2,max_iter=30000,threshold=0.5)

In [17]:
logr.fit(X,y)



[  67.42704763 3780.20020672 3910.82428863]
loss at iteration 0 is nan
[inf inf inf]
loss at iteration 1000 is nan
[inf inf inf]
loss at iteration 2000 is nan
[inf inf inf]
loss at iteration 3000 is nan
[inf inf inf]
loss at iteration 4000 is nan
[inf inf inf]
loss at iteration 5000 is nan
[inf inf inf]
loss at iteration 6000 is nan
[inf inf inf]
loss at iteration 7000 is nan
[inf inf inf]
loss at iteration 8000 is nan


KeyboardInterrupt: 

In [None]:
https://towardsdatascience.com/building-a-logistic-regression-in-python-301d27367c24