In [107]:
import numpy as np

In [108]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

sigmoid(np.array([-1,0,1]))

array([0.26894142, 0.5       , 0.73105858])

In [198]:
#X: Mx(N-1), _X: MxN 
#y: M, _y: 1xM
class LogisticRegression:
    def __init__(self, alpha=0.1, n_steps=10):
        self.w = 0
        self.alpha = alpha
        self.n_steps = n_steps
        
    def fit(self, X, y):
        M = X.shape[0]
        N = X.shape[1] + 1
        _X = np.hstack([np.ones((M,1)), X])
        _y = y.reshape(M, 1)

        self.w = np.random.randn(N, 1)
        for i in range(self.n_steps):
            grad = (1/M) * np.matmul(_X.T, sigmoid(np.matmul(_X, self.w) -_y))
            self.w -= self.alpha * grad
            
            if i%10 == 0:
                print(np.linalg.norm(grad))
                #print(np.linalg.norm(self.w))
                #print(np.linalg.norm(self.w), np.max(self.w))
                #print(np.min(np.matmul(_X, self.w)), np.max(np.matmul(_X, self.w)))
    
    def predict(self, X):
        M = X.shape[0]
        N = X.shape[1] + 1
        _X = np.hstack([np.ones((M,1)), X])
        
        prob = sigmoid(np.matmul(_X, self.w))
        return (0.5 <= prob)
    
    def predict_prob(self, X):
        M = X.shape[0]
        N = X.shape[1] + 1
        _X = np.hstack([np.ones((M,1)), X])
        
        prob = sigmoid(np.matmul(_X, self.w))
        return prob

In [199]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler

bc = load_breast_cancer()
X = bc.data
y = bc.target

scaler = StandardScaler()
X = scaler.fit_transform(X)

print(np.linalg.norm(X[0]))

10.710459824440056


In [215]:
from metrics import ACC

model = LogisticRegression(alpha=0.1, n_steps=100)
model.fit(X, y)

model.predict_prob(X)
#print("\n", ACC(model, X, y))

1.0581233355115554
7.794352780869595
1.1042958103451386
7.840238926348953
1.1685592058030088
8.03896731192637
1.2513912947174826
8.39816904063954
1.3611708876687827
8.922032805404124
1.511008604130692
9.614612071397358
1.8047121037308793
10.489066025764991
2.419803215932822
11.605801277031155
2.8307824166220965
13.091737844349206
3.0112843324567478
14.974745579165058


array([[7.64658623e-30],
       [6.55608414e-10],
       [5.19943715e-15],
       [3.45053492e-20],
       [1.94146064e-06],
       [2.13769290e-05],
       [1.03901208e-09],
       [3.16877729e-01],
       [2.94862207e-07],
       [6.14904332e-22],
       [9.35984215e-01],
       [2.19724335e-09],
       [8.43250444e-07],
       [9.99999998e-01],
       [1.90276082e-10],
       [3.56238150e-15],
       [9.03527227e-01],
       [8.36474789e-13],
       [1.46457795e-18],
       [1.00000000e+00],
       [9.99999918e-01],
       [1.00000000e+00],
       [4.74751801e-09],
       [1.87013775e-18],
       [2.61903554e-19],
       [5.69150180e-13],
       [1.10834378e-11],
       [5.50078154e-02],
       [7.70529662e-16],
       [2.47277622e-02],
       [1.58370109e-18],
       [1.10664036e-12],
       [8.49455345e-11],
       [5.68231658e-24],
       [1.32322460e-14],
       [1.96818263e-12],
       [2.95435074e-03],
       [1.00000000e+00],
       [1.00000000e+00],
       [3.80868766e-01],


In [152]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(solver='lbfgs')
lr.fit(X, y)
ACC(lr, X, y)

0.9876977152899824