In [1]:
import timeit
import numpy as np
from scipy.optimize import minimize
from scipy.special import expit

In [2]:
class logistic_regression(object):

    def __init__(self, features, obs, d):
        self.features = np.hstack((np.ones((features.shape[0], 1)), features))  # Adds a column for the bias term 
        self.obs = obs
        self.d = d + 1  # Adjust the dimension,d, to account the new biased column added in self.features
        self.theta = np.zeros(self.d)  # Initialize theta
 
    def sigmoid_function(self, z):  # Implements the sigmoid activation function
        return expit(z) # Calculates (1/(1+exp(-z)))

    def cost_function(self, theta): # Computes the cost/loss function for logistic regression
        l = len(self.obs)
        pred = self.sigmoid_function(np.dot(self.features, theta))
        err = -self.obs * np.log(pred) - (1 - self.obs) * np.log(1 - pred)
        cost = 1 / l * np.sum(err)
        return cost
    
    def grad_function(self, theta): # Computes the gradient or jacobian of the cost function
        l = len(self.obs)
        pred = self.sigmoid_function(np.dot(self.features, theta))
        grad = 1 / l * np.dot(self.features.T, (pred - self.obs))
        return grad

    def solve(self):    # Minimizes
        result = minimize(fun=lambda theta: self.cost_function(theta), 
                          x0=self.theta, 
                          method='L-BFGS-B', 
                          jac=lambda theta: self.grad_function(theta))
        
        self.theta = result.x
        return self.theta
    
    def predict(self):  # Generates E[Y|X]in terms of 1 and 0, for the calculated value of theta
        estimation = self.sigmoid_function(np.dot(self.features, self.theta))
        print("Estimation: ")
        print(estimation)
        return (estimation >= 0.5).astype(int)

In [3]:
#IMPORT DATA HERE
features = np.load("feature.npy")
obs = np.load("obs.npy")
d = 10

predictor = logistic_regression(features, obs, d)

In [4]:
#DO NOT CHANGE THIS CELL

tic = timeit.default_timer()

#Your solver goes here. Do not add any code here.
theta = predictor.solve()

toc = timeit.default_timer()

print(toc - tic)

1.0626967499993043


In [5]:
#Calculates the Accuracy of the Model

print("Theta:")
print(predictor.theta)

predictions = predictor.predict()
obs_pred = (obs >= 0.5).astype(int)
accuracy = np.mean(predictions == obs_pred)
print("Accuracy: ")
print(accuracy * 100)

Theta:
[1.41837156e-05 2.65373485e+00 5.61681367e-01 5.27571120e-01
 7.78975777e-01 2.99964795e+00 3.16434700e-01 1.15935352e+00
 2.40913580e+00 1.09409692e+00 2.05809682e+00]
Estimation: 
[8.98092717e-01 6.34828386e-02 9.99988610e-01 ... 2.12003368e-03
 4.30588385e-05 4.38842396e-01]
Accuracy: 
99.9995
