## Logistic Regression from Scratch

In [None]:
import pandas as pd
import numpy as np

In [147]:
class LogisticRegression:

    def __init__(self, num_iters, learning_rate, tolerance = 1e-6):
        self.num_iters = num_iters
        self.learning_rate = learning_rate
        self.tolerance = tolerance

    def log_likelihood(self, y, preds):
        return np.sum(y * np.log(preds) + (1 - y) * np.log(1 - preds))
        
    def fit(self, X, y):
        # Initialize some weights
        # Typically, initialization can be either 0's or mean's or random values
        
        self.weights = np.zeros(X.shape[1])

        # Compute the initial predictions
        preds = self.predict(X)

        # Compute initial loss
        current_loss = -self.log_likelihood(y, preds)
        prev_loss = float('inf')
        
        # Update the weights
        for steps in range(0, self.num_iters):
            dB = np.dot(X.T, (preds - y)) / y.size

            self.weights -= self.learning_rate * dB 
            print(self.weights)
            # Re-compute the loss
            preds = self.predict(X)
            new_loss = self.log_likelihood(y, preds)

            if abs(prev_loss - current_loss) < self.tolerance:
                break
            
            prev_loss = current_loss


    def predict(self, X):
        # Sigmoid function: 1/(1+e^-z)
        
        z = np.dot(X, self.weights)
        preds = 1/(1 + np.exp(-z))
        return preds
        
    

In [163]:
X_train = np.array([[1,2], [3,4], [5,6], [8, 8]])
y_train = np.array([1, 0, 1])

X_test = np.array([[5,3], [3,2], [5,9]])
y_test = np.array([1,0,0])

In [164]:
model = LogisticRegression(num_iters = 100, learning_rate= 0.02)

In [165]:
model.fit(X_train, y_train)

ValueError: operands could not be broadcast together with shapes (3,) (4,) 

In [166]:
model.predict(X_test)

array([0.5, 0.5, 0.5])

In [167]:
pd.DataFrame(X_train)

Unnamed: 0,0,1
0,1,2
1,3,4
2,5,6
3,8,8
