In [2]:
import numpy as np

In [154]:
class LogisticRegression():
    
    def __init__(self, epochs=100, learningRate=0.01):
        self._epochs = epochs
        self._lr = learningRate
    
    def _foreword(self, X, Y):
        z = np.dot(X, self._w.T)
        a = 1 / (1 + np.exp(-z))
        J = - np.mean(Y * np.log(a) + (1 - Y) * np.log(1 - a))
        
        return J, a
    
    def _backward(self, X, Y, a):
        gradW = np.dot(X.T, (a - Y).T) / len(X)
        gradB = np.mean(a - Y)
        
        return gradW, gradB
    
    def fit(self, X, Y):
        self._w = np.zeros((len(X[0]),))
        self._b = 0
        self._costs = []
        
        for i in range(self._epochs):
            J, a = self._foreword(X, Y)
            gradW, gradB = self._backward(X, Y, a)
            
            self._w = self._w - self._lr * gradW
            self._b = self._b - self._lr * gradB
            
            if i % 100 == 0 or i == self._epochs - 1:
                self._costs.append(J)
                print('step:{0} -- costs: {1}'.format(i, J))
        
    def predict(self, X):
        z = np.dot(X, self._w.T)
        a = 1 / (1 + np.exp(-z))
        
        return a > 0.5
    
    def accuracy(self, Y, predict):
        return np.mean(Y == predict)

In [155]:
def loadDataset(path):
    with open(path) as f:
        texts = f.readlines()
        
    X = []
    Y = []
    
    for line in texts:
        parts = line.strip().split()
        parts = [float(part) for part in parts]
        
        X.append(parts[:-1])
        Y.append(parts[-1])
        
    X = np.array(X, dtype=float)
    Y = np.array(Y, dtype=int)
    
    return X, Y

In [156]:
kTrainPath = 'horseColicTraining.txt'
kTestPath = 'horseColicTest.txt'

In [157]:
XTrain, YTrain = loadDataset(kTrainPath)

XTrain.shape, YTrain.shape

((299, 21), (299,))

In [158]:
XTest, YTest = loadDataset(kTestPath)

XTest.shape, YTest.shape

((67, 21), (67,))

In [159]:
class StandardScaler():
    
    def fit(self, X):
        self._std = np.std(X, axis=0)
        self._mean = np.mean(X, axis=0)
        
    def transform(self, X):
        return (X - self._mean) / self._std

In [160]:
scaler = StandardScaler()

scaler.fit(XTrain)

XTrainStd = scaler.transform(XTrain)
XTestStd = scaler.transform(XTest)

In [161]:
lrModel = LogisticRegression(1000, 0.1)

In [162]:
lrModel.fit(XTrainStd, YTrain)

step:0 -- costs: 0.6931471805599453
step:100 -- costs: 0.5511923786099129
step:200 -- costs: 0.5442163958549229
step:300 -- costs: 0.5426760155997999
step:400 -- costs: 0.5422517630351605
step:500 -- costs: 0.5421234801963281
step:600 -- costs: 0.542082522816962
step:700 -- costs: 0.5420689653921427
step:800 -- costs: 0.5420643634041663
step:900 -- costs: 0.5420627732447695
step:999 -- costs: 0.5420622199793595


In [163]:
testPrediction = lrModel.predict(XTestStd)

In [164]:
lrModel.accuracy(YTest, testPrediction)

0.7313432835820896