In [74]:
import numpy as np 
from ipykernel import kernelapp as app


In [3]:
%run data_preprocessing/ionosphere_data.ipynb

In [53]:
class LogisticRegression:
    def __init__(self):        
        # initialize hyperparameters 
        self.lr = 0.01 
        self.eps = 1e-2
        
    def add_intercept(self, X):
        N,D = X.shape
        intercept = np.ones((N, 1), dtype = X.dtype)
        return np.concatenate((intercept, X), axis=1)
        
    def sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))
    
    def gradientDescent(self, X, y):
        N,D = X.shape
        w = np.zeros(D)
        g = np.inf        
        # possible modification: change stopping criteria 
        while np.linalg.norm(g) > self.eps:
            yh = self.sigmoid(np.dot(X, w))
            g = np.dot(X.T, (yh - y)) / N
            w = w - self.lr*g
        return w 
    
    def fit(self, X, y):
        X = self.add_intercept(X)
        N,D = X.shape
        self.w = np.zeros(D)
        
        g = np.inf 
        
        while np.linalg.norm(g) > self.eps:
            yh = self.sigmoid(np.dot(X, self.w))
            g = np.dot(X.T, (yh - y)) / N
            self.w = self.w - self.lr*g
        
        print (self.w)
        

    
    def predict(self, X):     
        X = self.add_intercept(X)
        print(X)
        yh = self.sigmoid(np.dot(X, self.w))
        return yh 
        
        
#     def evaluate_acc(y, yh):
        

    

In [54]:
X = ionosphere_export[:, :-1]
y = ionosphere_export[:, -1]


lr = LogisticRegression()
lr.fit(X, y)
lr.predict(np.matrix([[1,1,-0.45161,1,1,0.71216,-1,0,0,0,0,0,0,-1,0.14516,0.54094,-0.39330,-1,-0.54467,-0.69975,1,0,0,1,0.90695,0.51613,1,1,-0.20099,0.25682,1,-0.32382,1]]))

[-3.90876305  1.97134128  1.69623103  0.6887892   2.05534016  1.3002874
  0.56242602  1.68344798  1.23372793  0.13934878 -1.13491116 -0.44780797
 -0.47324959  1.00497047  0.80591848 -0.21114112  0.29745399  0.69524138
 -0.62839348 -0.01751338  0.22698444 -2.40109944  1.12854116  0.68270134
  0.58273099  1.2635086  -2.46871696 -0.33139439  0.62553685  0.84215945
  0.82819734 -0.42234506 -0.29033422 -1.34559573]
[[ 1.       1.       1.      -0.45161  1.       1.       0.71216 -1.
   0.       0.       0.       0.       0.       0.      -1.       0.14516
   0.54094 -0.3933  -1.      -0.54467 -0.69975  1.       0.       0.
   1.       0.90695  0.51613  1.       1.      -0.20099  0.25682  1.
  -0.32382  1.     ]]


matrix([[0.09686991]])

array([[    39,  77516,     13, ...,      1,      0,      0],
       [    50,  83311,     13, ...,      1,      0,      0],
       [    38, 215646,      9, ...,      1,      0,      0],
       ...,
       [    38, 374983,     13, ...,      1,      0,      0],
       [    44,  83891,     13, ...,      1,      0,      0],
       [    35, 182148,     13, ...,      1,      0,      0]], dtype=int64)

In [37]:
class LogisticRegression2:
    def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True, verbose=False):
        self.lr = lr
        self.num_iter = num_iter
        self.fit_intercept = fit_intercept
        self.verbose = verbose
    
    def __add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return np.concatenate((intercept, X), axis=1)
    
    def __sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    def __loss(self, h, y):
        return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
    
    def fit(self, X, y):
        if self.fit_intercept:
            X = self.__add_intercept(X)
        
        # weights initialization
        self.theta = np.zeros(X.shape[1])
        
        for i in range(self.num_iter):
            z = np.dot(X, self.theta)
            h = self.__sigmoid(z)
            gradient = np.dot(X.T, (h - y)) / y.size
            self.theta -= self.lr * gradient
            
            if(self.verbose == True and i % 10000 == 0):
                z = np.dot(X, self.theta)
                h = self.__sigmoid(z)
                print(f'loss: {self.__loss(h, y)} \t')
    
    def predict_prob(self, X):
        if self.fit_intercept:
            X = self.__add_intercept(X)
    
        return self.__sigmoid(np.dot(X, self.theta))
    
    def predict(self, X, threshold):
        return self.predict_prob(X) >= threshold

In [38]:
X = ionosphere_export[:, :-1]
y = ionosphere_export[:, -1]


lr2 = LogisticRegression2()
lr2.fit(X, y)
lr2.predict_prob(np.matrix([[1,1,-0.45161,1,1,0.71216,-1,0,0,0,0,0,0,-1,0.14516,0.54094,-0.39330,-1,-0.54467,-0.69975,1,0,0,1,0.90695,0.51613,1,1,-0.20099,0.25682,1,-0.32382,1]]))

matrix([[0.08134653]])