In [1]:
import numpy as np
import pandas as pd

class LogisticRegression:
    def __init__(self, step_size=0.01, max_iter=10000, eps=1e-5, theta_0=None, verbose=True):
        self.theta = theta_0
        self.step_size = step_size
        self.max_iter = max_iter
        self.eps = eps
        self.verbose = verbose

    def fit(self, x, y):
        n_examples, dim = x.shape
        if self.theta is None:
            self.theta = np.zeros(dim)

        for i in range(self.max_iter):
            z = np.clip(x.dot(self.theta), -500, 500)
            h_theta = 1 / (1 + np.exp(-z))
            gradient = x.T.dot(h_theta - y) / n_examples
            theta_new = self.theta - self.step_size * gradient

            if np.linalg.norm(theta_new - self.theta, 1) < self.eps:
                self.theta = theta_new
                if self.verbose:
                    print(f'Converged in {i + 1} iterations')
                return

            self.theta = theta_new

        if self.verbose:
            print('Final theta:', self.theta)

    def predict(self, x):
        """Return predicted probabilities given new inputs x."""
        z = np.clip(x.dot(self.theta), -500, 500)
        return 1 / (1 + np.exp(-z))


def main(train_csv, save_path):
    dataset = pd.read_csv(train_csv)
    X = dataset.drop(columns=['Label']).values
    y = dataset['Label'].values
    
    # Add intercept
    X = np.c_[np.ones(X.shape[0]), X]
    
    # Train model
    clf = LogisticRegression()
    clf.fit(X, y)
    
    # Predictions
    p_eval = clf.predict(X)
    yhat = p_eval > 0.5
    print('LR Accuracy: %.2f' % np.mean(yhat == y))
    np.savetxt(save_path, p_eval)

if __name__ == '__main__':
    main(train_csv='../extracted_features.csv', save_path='logreg_predictions.txt')


Final theta: [ 2.12232128e+00  1.94647798e+00  1.51035172e+00  1.26521290e+00
  2.47729968e-01  4.89865890e-01  4.62779309e-01 -3.53734666e+01
 -3.11471327e+00  6.31368492e-02  3.90318065e-01  1.03365510e-01
  1.96699284e-01  1.57516536e-01  2.52706958e-01  3.20159621e-01
  3.10080087e-01  2.22655795e-01  1.94715083e-01  1.12569795e-01
  2.51852614e-01  6.84318236e+01  1.33062108e+00  1.11205371e+00
  1.99826630e-01]
LR Accuracy: 0.86
