In [1]:
import numpy as np
import pandas as pd

In [2]:
class LogisticRegressor:
    def __init__(self, alpha=0.01, max_iters=100000, threshold=1e-6, N=1000,hidden_layers=1, hidden_layer_size=3, seed=42):
        self.alpha = alpha
        self.max_iters = max_iters
        self.threshold = threshold
        self.N = N
        self.hidden_layers = hidden_layers
        self.hidden_layer_size = hidden_layer_size # not sure what to do with this. 
        self.seed = seed
        self.loss_history = []
        self.accuracy_history = []
        self.trained = False #to be used in predict
        


    def sigmoid(self, t):
        return 1/(1 + np.exp(-t))

    #def relu(self, z):
        #return np.maximum(0, z)

    #def relu_derivative(self, z):
        #return (z > 0).astype(float)
    
    def loss_function(self, y, y_cap):
        epsilon = np.finfo(float).eps
        y_cap = np.clip(y_cap, epsilon, 1 - epsilon)
        return -(y * np.log(y_cap) + (1 - y) * np.log(1 - y_cap))
    
    def fit(self, X, Y):
        np.random.seed(self.seed)
        self.loss_history = []
        self.accuracy_history = []

        
        m = X.shape[0] # samples
        n = X.shape[1] # features

        #init weights and bias
        W1 = np.random.randn(self.hidden_layer_size,n) * 0.01 #random values with mean=0, stdev=0.01 ?
        b1 = np.zeros((self.hidden_layer_size,1))

        W2 = np. random.randn(1,self.hidden_layer_size) * 0.01 
        b2 = np.zeros((1, 1))

        X = np.array(X)
        Y = np.array(Y).reshape(-1, 1)


        stopping = False; loss_prev = 0; iteration = 0; acc = 0
        

        while not stopping:
            #select random data point
            
            i = np.random.choice(np.arange(m))  # pick a random index
            x = X[i].reshape(-1,1)
            y = Y[i, 0]

            #forward pass
            a1 = self.sigmoid(W1 @ x + b1)
            y_cap = (self.sigmoid(W2 @ a1 + b2)).item()
            
            
            loss = self.loss_function(y, y_cap)
            self.loss_history.append(float(loss))
            
            #back propagation
            #output layer
            deltaz = y_cap - y
            delW2 = deltaz * a1.T
            delb2 = deltaz
            #hidden layer
            deltaz1 = (W2.T * deltaz) * (a1 * (1 - a1))
            delW1 = deltaz1 @ x.T    # gradient of hidden weights
            delb1 = deltaz1           # gradient of hidden biases

            #stochatic gradient descent
            W1 = W1 - self.alpha * delW1
            b1 = b1 - self.alpha * delb1
            W2 = W2 - self.alpha * delW2
            b2 = b2 - self.alpha * delb2

            #to store accuracy per iteration
            pred_i = 1 if y_cap >= 0.5 else 0
            self.accuracy_history.append(1.0 if pred_i == y else 0.0)

        
            loss_prev = loss
            iteration += 1
            if iteration >= self.max_iters:
                stopping = True

        #set weights here
        self.W1 = W1
        self.b1 = b1
        self.W2 = W2
        self.b2 = b2
        self.trained = True

    def predict(self,x):
        if (self.trained == False):
            raise ValueError("Not trained uyet!")
        
        #change here
        a1 = self.sigmoid(self.W1 @ x.T + self.b1)
        y_prob = self.sigmoid(self.W2 @ a1 + self.b2)

        Y_pred = (y_prob >= 0.5).astype(int)
        return Y_pred
    
    def score(self, x_test, y_test):
        if (self.trained == False):
            raise ValueError("Not trained uyet!")        
        
        y_pred = self.predict(x_test)
        #running into shape issues so trying this 
        y_pred = y_pred.flatten()
        y_test = y_test.flatten()
        correct = 0
        size = len(y_test)

        for i in range(size):
            if (y_pred[i]==y_test[i]):
                correct+=1

        return correct/size
    

In [3]:

#file_name = r"../datasets/blobs600.csv"
file_name = r"../datasets/circles500.csv"
df1 = pd.read_csv(file_name)
Y = df1['Class'].values
del df1['Class']  
X = df1.values     
X.shape

(500, 2)

In [4]:
from sklearn.model_selection import train_test_split
import numpy as np

def data_split(X, Y, seed=42):
    Y = np.array(Y).reshape(-1,1)

    X_train, X_temp, y_train, y_temp = train_test_split( X,Y,test_size=0.30, random_state=seed, shuffle=True)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp,test_size=0.50, random_state=seed, shuffle=True)

    return X_train, y_train, X_val, y_val, X_test, y_test

In [5]:
import pandas as pd 
hidden_sizes = [2, 3, 5, 10, 20]
datasets = [("Dataset1_blobs600", r"../datasets/blobs600.csv"),("Dataset2_circles500", r"../datasets/circles500.csv")]
results = {}

for name, path in datasets:
    df = pd.read_csv(path)

    Y = df["Class"].values
    del df["Class"]
    X = df.values

    X_train, y_train, X_val, y_val, X_test, y_test = data_split(X, Y, seed=42)

    print(name)
    print("Shapes:", X_train.shape, X_val.shape, X_test.shape)
    results[name] = []
    
    

Dataset1_blobs600
Shapes: (420, 3) (90, 3) (90, 3)
Dataset2_circles500
Shapes: (350, 2) (75, 2) (75, 2)


In [6]:
for h in hidden_sizes :
    lr = LogisticRegressor(alpha=0.01,max_iters=100000,threshold=1e-6,N=1000,hidden_layer_size=h)
    lr.fit(X_train, y_train)

    train_acc = lr.score(X_train, y_train)
    val_acc = lr.score(X_val, y_val)
    test_acc = lr.score(X_test, y_test)

    results[name].append((h, train_acc, val_acc, test_acc))

    print("hidden_nodes=", h,"| train=", train_acc, "| val=", val_acc, "| test=", test_acc)

hidden_nodes= 2 | train= 0.5142857142857142 | val= 0.5066666666666667 | test= 0.4266666666666667
hidden_nodes= 3 | train= 0.5085714285714286 | val= 0.48 | test= 0.41333333333333333
hidden_nodes= 5 | train= 0.5057142857142857 | val= 0.41333333333333333 | test= 0.5066666666666667
hidden_nodes= 10 | train= 0.4857142857142857 | val= 0.49333333333333335 | test= 0.5733333333333334
hidden_nodes= 20 | train= 0.4857142857142857 | val= 0.49333333333333335 | test= 0.5733333333333334


The current implementation does not achieve high accuracy which indicates that training may not be converging properly.
The shallow neural network doesn't clearly outperform logistic regression, while some configurations slightly improve test accuracy, performance is a bit unstable.
Increasing the number of hidden layers don't clearly improve the performance but larger configurations show slight improvements.