In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 


In [3]:
class neuron:
    
    def __init__(self, n_inputs, activation, Lr = 0.01):
        self.w = np.random.randn(n_inputs, 1)
        self.b = np.zeros((1, 1))
        self.lose =[] #for viz
        self.activation = activation
        self.Lr = Lr
        self.n_inputs = n_inputs


    #activation functions
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-np.clip(z, -250, 250)))
    

    def relu(self, z):
        return np.maximum(0, z)
    

    def tanh(self, z):
        return np.tanh(z) 
    


    #forward path to activate the neuron
    def forward(self, X):

        self.X = X
        z = X @ self.w + self.b

        #calling activation functions to activate the neuron to produce output(prediction)
        if self.activation == "Linear":
            self.y_pred = z
        elif self.activation == "Relu":
            self.y_pred = self.relu(z)
        elif self.activation == "Tanh":
            self.y_pred = self.tanh(z)
        elif self.activation == "Sigmoid":
            self.y_pred = self.sigmoid(z)


        return self.y_pred
    


    def loss(self, los, y_pred, Y_true):
        if self.los == "MSE":
            L = (Y_true - y_pred) ** 2
        elif self.los == "BCE":
            L = -[Y_true * np.log(y_pred) + (1-Y_true) * np.log(1-y_pred)]
        
        return L



    # backword --> calculate gradients
    def backword(self, los, y_pred, Y_true):

        #loss function first
        L = self.loss(los, y_pred, Y_true)
        #filling the loss list for viz
        self.lose.append(L)
        
        # global deriviatives
        dL_dy = 0
        if self.loss == "MSE":
            dy_dz = 1
            dL_dy = 2 * (Y_true - y_pred) * dy_dz
        elif self.loss == "BCE":
            dL_dy = (-(Y_true / y_pred + (1- Y_true)/(1-y_pred))) * dy_dz  # this is temporar till I find the correct answer

        
        dL_dw = dL_dy @ self.dw
        dL_db = np.sum(dL_dy) * self.db
        #dL_dx = self.dL_dy * self.dx

        return dL_dw, dL_db

    def step(self, dL_dw, dL_db):
        #update parameters
        self.w = self.w - self.Lr * dL_dw
        self.b = self.b - self.Lr * dL_db

        return self.w, self.b



In [None]:
class DesnsLayer:

    #n_output mean the number of neurons per layer
    def __init__(self, n_inputs, n_outputs, activation, Lr = 0.01):
        self.w = np.random.randn(n_inputs, n_outputs) * 0.1 # here we multiplied by 0.1 to avoid the explding of the gradients because if the values of w came large then z will be big
        self.b = np.zeros((1, n_outputs))
        self.activation = activation
        self.Lr = Lr
        self.lose =[] #for viz
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs

    #activation functions
    def sigmoid(self, X):
        return 1 / (1 + np.exp(-np.clip(X, -250, 250)))
    
    def relu(self, X):
        return np.maximum(0, X)
    
    def tanh(self, X):
        return np.tanh(X)

    def forward(self, X):
        self.X = X
        self.z = X @ self.w + self.b
        #print(z)

        #calling activation functions to activate the neuron to produce output(prediction)
        if self.activation == "Linear":
            self.y_pred = self.z
        elif self.activation == "Relu":
            self.y_pred = self.relu(self.z)
        elif self.activation == "Tanh":
            self.y_pred = self.tanh(self.z)
        elif self.activation == "Sigmoid":
            self.y_pred = self.sigmoid(self.z)

        return self.y_pred


    def loss(self, los, y_pred, Y_true):
        dL_dy = 0
        if los == "MSE":
            L = np.mean((Y_true - y_pred) ** 2)
            self.lose.append(L)
            n = Y_true.shape[0]
            dL_dy = (2 /n) * (Y_true - y_pred)
        elif los == "BCE":
            # use the clip
            epsilon = 1e-15
            y_pred = np.clip(y_pred, epsilon, 1 - epsilon) #to avoid log(0)
            L = - np.mean(Y_true * np.log(y_pred) + (1 - Y_true) * np.log(1 - y_pred))
            self.lose.append(L)
            dL_dy = (y_pred - Y_true) / Y_true.shape[0]
            #(y_pred * (1- y_pred) * len(Y_true))
        
        return dL_dy


    # backword --> calculate gradients
    def backword(self, dL_dy):

        # activations derivatieves
        dy_dz = 0
        if self.activation == "Linear":
            dy_dz = 1
        elif self.activation == "Relu":
            dy_dz = (self.z > 0).astype(float)
        elif self.activation == "Tanh":
            dy_dz = 1 - self.y_pred**2
        elif self.activation == "Sigmoid":
            dy_dz = self.y_pred * (1 - self.y_pred)
        
        dL_dz = dL_dy * dy_dz
        dL_dw = self.X.T @ dL_dz
        dL_db = np.sum(dL_dz, axis=0, keepdims=1) * 1
        dL_dx = dL_dz @ self.w.T

        return dL_dw, dL_db, dL_dx

    def step(self, dL_dw, dL_db):
        #update parameters
        self.w = self.w - self.Lr * dL_dw
        self.b = self.b - self.Lr * dL_db

        return self.w, self.b
 

    def viz(self):
        plt.figure(figsize=(10, 6))
        plt.title("ðŸ§  Neural Layer Architecture", fontsize=14, pad=20)

        # Draw neurons
        for i in range(self.n_inputs):  # Input layer
            plt.scatter(0, i, s=500, c='lightblue', edgecolors='black', zorder=5)
            plt.text(0, i, f'x{i+1}', ha='center', va='center', fontweight='bold')
            
        for j in range(self.n_outputs):  # Output layer
            plt.scatter(2, j, s=500, c='lightgreen', edgecolors='black', zorder=5)
            plt.text(2, j, f'a{j+1}', ha='center', va='center', fontweight='bold')
            
            # Draw connections
            for i in range(self.n_inputs):
                plt.plot([0, 2], [i, j], 'gray', alpha=0.3)

        plt.xlim(-0.5, 2.5)
        plt.ylim(-0.5, 4.5)
        plt.axis('off')
        plt.text(0, -0.3, f"Input Layer {self.n_inputs}, neurons", ha='center', fontsize=12)
        plt.text(2, -0.3, f"Dense Layer {self.n_outputs}, neurons", ha='center', fontsize=12)
        plt.show()



In [34]:
#model building stack of layers MLP
class MLP:

    def __init__(self, n_neuorns_per_layer, list_activations=None):
    
        #item in zero index is the intput it self
        self.n_neuorns_per_layer = n_neuorns_per_layer
        self.list_activations = list_activations or ["Relu"] * (len(n_neuorns_per_layer) - 2) + ["Sigmoid"]
        #to stack layers in it
        self.layers = [] 
        self.lose = []

        #for the first layer is X it self and it is not counted in the loop as a layer, I see it as a layer but the program doesn't, it see the second layer
        # we said - 1 to skip first layer as it is the input
        for i in range(len(n_neuorns_per_layer) - 1): 
            layer = DesnsLayer(self.n_neuorns_per_layer[i], self.n_neuorns_per_layer[i + 1], self.list_activations[i])
            #creating a list of objects to call it later to access the class functions
            self.layers.append(layer)

    def forward(self, X):
        #to save the input to the memeory to know it later
        self.X = X 

        #because for each layer its input is the output of previous layer
        self.previous_output = X 
        for layer in self.layers:
            self.previous_output = layer.forward(self.previous_output)
        #return the last output as the inner ones are not important to me
        return self.previous_output  
    
    def backward(self, dL_dy):
        y_pred = self.previous_output

        parameters_gradient =[]
        current_grad = dL_dy
        for layer in reversed(self.layers):
            dL_dw, dL_db, dL_dx = layer.backword(current_grad)
            parameters_gradient.append((dL_dw, dL_db))
            current_grad = dL_dx

        return list(reversed(parameters_gradient))
    
    def step(self, parameters_gradient, Lr=0.01):
        for i, (dL_dw, dL_db) in enumerate(parameters_gradient):
            self.layers[i].w -= Lr * dL_dw
            self.layers[i].b -= Lr * dL_db


In [38]:
#building the classiifer
class NeruronNetworkLearning:

    def __init__(self, layer_size, activations = None, Lr = 0.01, batch_size = 32):
        self.mlp = MLP(layer_size, activations)
        self.layer_size = layer_size
        self.activations = activations
        self.batch_size = batch_size
        self.Lr = Lr
        self.loss_history = []
        self.accuracy_history = []
        self.eval_loss_history = []
        self.eval_acc_history = []


    def binary_cross_entropy(self, y_pred, y_true):
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon) #to avoid log(0)
        L = - np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return L
    
    #its gradiant
    def binary_cross_entropy_grad(self, y_pred, y_true):
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        L = (y_pred - y_true) / (y_pred * (1 - y_pred) * len(y_true))
        return L
    
    def train(self, X_train, y_train, x_val, y_val, epochs = 1000, verbose = True):
        n_samples = X_train.shape[0]

        for epoch in range(epochs):

             #to avoid learning one type of output, it will memoeries leading to overtting, also we didn't use suffle function because it will break the dataset sequance
            index = np.random.permutation(n_samples)
            x_shaffeled = X_train[index]
            y_shaffeled = y_train[index]

            #loop to create batchs
            for start_idx in range(0, n_samples, self.batch_size):
                end_index = min(start_idx + self.batch_size, n_samples)
                #slizing the dataset based on the batch size
                x_batch = x_shaffeled[start_idx : end_index]
                y_batch = y_shaffeled[start_idx: end_index]

                y_pred = self.mlp.forward(x_batch)

                dL_dy = self.binary_cross_entropy_grad(y_pred, y_batch)
                gradiants = self.mlp.backward(dL_dy)

                self.mlp.step(gradiants, self.Lr)

            y_pred_full_train = self.mlp.forward(X_train)
            full_train_loss = self.binary_cross_entropy(y_pred_full_train, y_train)
            full_train_acc = self.accuracy(y_pred_full_train, y_train)

            self.loss_history.append(full_train_loss)
            self.accuracy_history.append(full_train_acc)

            #validating the model larning on new
            if x_val is not None and y_val is not None:
                y_pred_val = self.mlp.forward(x_val)
                val_loss = self.binary_cross_entropy(y_pred_val, y_val)
                val_acc = self.accuracy(y_pred_val, y_val)

                self.eval_loss_history.append(val_loss)
                self.eval_acc_history.append(val_acc)

            if verbose and (epoch % 100 == 0 or epoch == epochs - 1):
                val_info = f" | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%" if x_val is not None else ""
                print(f"Epoch {epoch:4d} | Loss: {full_train_loss:.4f} | Acc: {full_train_acc:.2f}%{val_info}")


    
    def accuracy(self, y_pred, y_true):
        y_pred = (y_pred >= 0.5).astype(int)
        return np.mean(y_pred == y_true) * 100
    

    def predict(self, X):
        """Make predictions"""
        y_pred = self.mlp.forward(X)
        return (y_pred >= 0.5).astype(int)
    
    def predict_proba(self, X):
        """Predict probabilities"""
        return self.mlp.forward(X)
