In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

import numpy as np 
%matplotlib inline

In [None]:

class NumpyClassifier():
    """Common methods to """

    def accuracy(self, X_test, y_test, **kwargs):
        pred = self.predict(X_test, **kwargs)
        if len(pred.shape) > 1:
            pred = pred[:, 0]
        return sum(pred == y_test) / len(pred)



def logistic(x):
    return 1 / (1 + np.exp(-x))


class NumpyLogReg(NumpyClassifier):
    
    def add_bias(self,X):
        return np.c_[np.ones(X.shape[0])*(-1), X]

    def fit(self, X_train, t_train, eta, epochs=10):
        """X_train is a Nxm matrix, N data points, m features
        t_train are the targets values for training data"""
        (k, m) = X_train.shape
        
      
        X_train = self.add_bias(X_train)
        self.beta =  np.zeros(m + 1)
        print(f"X_train.shape:{X_train.shape}")
  
        for e in range(epochs):
            # new beta(update):
            update =  X_train.T @ (t_train - self.forward(X_train))/k
            
            #update = eta / k * X_train.T @ (self.forward(X_train) - t_train)

            self.beta += update*eta

    def forward(self, X):
        return logistic(X @ self.beta)

    def predict(self, x, threshold=0.5):
        z = self.add_bias(x)
        score = self.forward(z)
        # score = z @ self.theta
        return (score > threshold).astype('int')


In [None]:
cancer = load_breast_cancer()

X_train, X_test, y_train, y_test = train_test_split(cancer.data,cancer.target,random_state=0)

scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

eta_list = [1,0.5,0.1,0.01,0.001]
acu_scores = np.zeros(len(eta_list))

cntr = 0
for eta in eta_list:
    logreg = NumpyLogReg()
    logreg.fit(X_train = X_train_scaled, t_train = y_train, eta = eta)
    score = acu_scores[cntr] = logreg.accuracy(X_test_scaled, y_test)
    print(f"eta: {eta}, score: {score}")
    cntr += 1 
    
    
plt.plot(eta_list, acu_scores)
plt.xlabel("Eta")
plt.ylabel("Accuracy")

In [None]:
from sklearn.linear_model import LogisticRegression

def sci_kit_test_acu(X_train, X_test, y_train, y_test):
    logreg = LogisticRegression()
    logreg.fit(X_train, y_train)
    y_pred = logreg.predict(X_test)
    

    return sum(y_pred == y_test) / len(y_pred)

sci_kit_test_acu(X_train_scaled, X_test_scaled, y_train, y_test)

