### **Neural Network**

In [None]:
import numpy as np

class NeuralNetwork:
    """
    fully-connected feed-forward neural network
    """
    

    def __init__(self, layer_sizes=[784, 128, 64, 10], learning_rate=0.3, epochs=15, batch_size=64, decay=0.98, patience=5):
        """
        Initialize the neural network.

        Parameters:
        - layer_sizes: Number of neurons in each layer
        - learning_rate: Initial learning rate for gradient descent
        - epochs:Number of epochs to train
        - batch_size: Size of mini-batches for training
        - decay: Multiplicative decay factor for the learning rate
        - patience: Number of epochs with no improvement to wait before early stopping
        """
        self.layer_sizes = layer_sizes
        self.initial_lr = learning_rate
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.decay = decay
        self.patience = patience
        self.weights = []
        self.biases = []
        self._init_weights()

    def _init_weights(self):
        """
        Initialize weights using Kaiming initialization and biases with zeros.
        """
        for i in range(len(self.layer_sizes) - 1):
            # Kaiming initialization for ReLU activations: w ~ N(0, sqrt(2/layer_sizes))
            W = np.random.randn(self.layer_sizes[i], self.layer_sizes[i + 1]) * np.sqrt(2. / self.layer_sizes[i])
            # Biases initialized to zero
            b = np.zeros((1, self.layer_sizes[i + 1]))
            self.weights.append(W)
            self.biases.append(b)

    def _relu(self, x):
        """
        ReLU activation function.
        Returns: max(0, x)
        """
        return np.maximum(0, x)

    def _relu_derivative(self, x):
        """
        Derivative of ReLU.
        Returns: 1 where x > 0, else 0
        """
        return (x > 0).astype(float)

    def _softmax(self, x):
        """
        Softmax activation function.
        """
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def _forward(self, X):
        """
        Forward pass through the network.

        Parameters:
        - X: Input data

        Returns:
        - activations: list of activations per layer
        - zs: list of pre-activation values (z) per layer
        """
        activations = [X]
        zs = []

        for i in range(len(self.weights) - 1):
            z = activations[-1] @ self.weights[i] + self.biases[i]
            zs.append(z)
            a = self._relu(z)
            activations.append(a)

        # Output layer with Softmax
        z = activations[-1] @ self.weights[-1] + self.biases[-1]
        zs.append(z)
        a = self._softmax(z)  # Apply softmax to final layer
        activations.append(a)  # Append correct output activation

        return activations, zs

    def _backward(self, X, y, activations, zs):
        """
        Backward pass to compute gradients and update weights using backpropagation.

        Parameters:
        - X: Input batch
        - y: True labels
        - activations: Activations from forward pass
        - zs: Pre-activations from forward pass
        """
        m = X.shape[0]  # batch size
        y = y.astype(int)

        # convert labels y to one-hot encoding
        y_onehot = np.zeros_like(activations[-1])
        y_onehot[np.arange(m), y] = 1

        # compute delta (error) at output layer
        delta = activations[-1] - y_onehot

        # gradient for last layer weights and biases
        grads_w = [0] * len(self.weights)
        grads_b = [0] * len(self.biases)

        grads_w[-1] = activations[-2].T @ delta / m
        grads_b[-1] = np.sum(delta, axis=0, keepdims=True) / m

        # backpropagate through hidden layers using relu derivative
        for i in range(len(self.weights) - 2, -1, -1):
            delta = (delta @ self.weights[i + 1].T) * self._relu_derivative(zs[i])
            grads_w[i] = activations[i].T @ delta / m
            grads_b[i] = np.sum(delta, axis=0, keepdims=True) / m

        # Update weights and biases here
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * grads_w[i]
            self.biases[i] -= self.learning_rate * grads_b[i]

    def fit(self, X_train, y_train, X_val=None, y_val=None):
        """
        Train the neural network with mini-batch gradient descent and optional early stopping.

        Parameters:
        - X_train: Training features
        - y_train: Training labels
        - X_val: Validation features (optional)
        - y_val: Validation labels (optional)
        """
        best_val_acc = 0
        epochs_no_improve = 0

        for epoch in range(self.epochs):
            # Shuffle training data
            indices = np.arange(X_train.shape[0])
            np.random.shuffle(indices)
            X_shuffled = X_train[indices]
            y_shuffled = y_train[indices]

            # Mini-batch training
            for start in range(0, X_train.shape[0], self.batch_size):
                end = start + self.batch_size
                X_batch = X_shuffled[start:end]
                y_batch = y_shuffled[start:end]

                activations, zs = self._forward(X_batch)
                self._backward(X_batch, y_batch, activations, zs)

            # Decay learning rate
            self.learning_rate *= self.decay

            # Evaluate on validation set
            if X_val is not None and y_val is not None:
                val_acc = self.score(X_val, y_val)

                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    epochs_no_improve = 0
                else:
                    epochs_no_improve += 1

                if val_acc >= 0.99:
                    print(f"Early stopping: validation accuracy reached {val_acc:.4f} >= 0.99")
                    break

                if epochs_no_improve >= self.patience:
                    print(f"Early stopping: no improvement in validation accuracy for {self.patience} epochs")
                    break



    def predict(self, X):
        """
        Predict class labels for input data.

        Parameters:
        - X: Input features

        Returns:
        - predictions: Predicted class
        """
        activations, _ = self._forward(X)
        return np.argmax(activations[-1], axis=1)

    def score(self, X, y):
        """
        Compute accuracy of the model.

        Parameters:
        - X:  Input features
        - y: True labels

        Returns:
        - accuracy
        """
        predictions = self.predict(X)
        return np.mean(predictions == y)

### **Main MNIST**

In [None]:
import pandas as pd


if __name__ == '__main__':
    # Load the MNIST datasets
    data_train = pd.read_csv("MNIST-train.csv").to_numpy()
    data_test = pd.read_csv("MNIST-test.csv").to_numpy()

    # Split features and labels
    X_train, y_train = data_train[:, :-1], data_train[:, -1]
    X_test_full, y_test_full = data_test[:, :-1], data_test[:, -1]

    # Normalize pixel values
    X_train = X_train / 255.0
    X_test_full = X_test_full / 255.0

    # Split test set into validation and test sets
    split_idx = len(X_test_full) // 2
    X_val, y_val = X_test_full[:split_idx], y_test_full[:split_idx]
    X_test, y_test = X_test_full[split_idx:], y_test_full[split_idx:]

    # Initialize the neural network
    nn = NeuralNetwork(layer_sizes=[784, 128, 64, 10],learning_rate=0.4,epochs=15,batch_size=64,decay=0.98,patience=4)

    # Train the neural network
    nn.fit(X_train, y_train, X_val=X_val, y_val=y_val)

    # Evaluate on test data
    print('Test accuracy:', nn.score(X_test, y_test))

Test accuracy: 0.9912


### **Main Data MB**

In [None]:
from sklearn.preprocessing import StandardScaler

# Function to preprocess labels-converts label strings to binary integers
def preproess_labels(y):
    return np.array([1 if "Fibro" in str(s) else 0 for s in y], dtype=int)

if __name__ == '__main__':
    # Load data
    data_train = pd.read_csv("MB_data_train.csv").to_numpy()
    #(commented out)
    #data_test= pd.read_csv("MB_data_test.csv").to_numpy()

    # Extract features and labels
    X_train = data_train[:, 1:]  # features
    y_train = preproess_labels(data_train[:, 0])  # labels (first column)

    #(commented out)
    #X_test = data_test[:, 1:]  # features
    #y_test = preproess_labels(data_test[:, 0])  # labels (first column)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    #(commented out)
    #X_test = scaler.fit_transform(X_test)

    # Initialize the neural network
    nn = NeuralNetwork(layer_sizes=[X_train.shape[1], 128, 64, 2])
    #(commented out)
    #nn.fit(X_train, y_train)
    # print('Test accuracy:', nn.score(X_test, y_test))

### **Experiments**

In [None]:
import os
import time
import matplotlib.pyplot as plt

def plot_graph(param_dict, param_name, x_train, y_train, x_test, y_test, x_val, y_val, folder='graphs'):
    """
    Tests different values of a single parameter and plots accuracy and time in one combined graph.
    """

    accuracies = []
    times = []
    labels = list(param_dict.keys())

    print(f"\n\n--- Testing different values for {param_name} ---\n")

    for label in labels:
        param_value = param_dict[label]
        print(f"\nTesting {param_name} = {param_value}")

        # Default values
        nn_kwargs = {
            "layer_sizes": [784, 128, 64, 10],
            "learning_rate": 0.1,
            "epochs": 15,
            "batch_size": 64,
            "decay": 0.98
        }

        # Override the tested parameter
        nn_kwargs[param_name] = param_value

        # Create and train the model
        nn = NeuralNetwork(**nn_kwargs)

        start_time = time.time()
        nn.fit(x_train, y_train, X_val=x_val, y_val=y_val)
        duration = time.time() - start_time

        acc = nn.score(x_test, y_test)
        accuracies.append(acc)
        times.append(duration)

        print(f"Test Accuracy: {acc:.4f}, Time: {duration:.2f} seconds")

    os.makedirs(folder, exist_ok=True)

    # Plot combined graph
    fig, ax1 = plt.subplots(figsize=(8, 5))

    ax1.set_xlabel(param_name)
    ax1.set_ylabel('Accuracy', color='crimson')
    ax1.plot(labels, accuracies, marker='o', color='crimson', linewidth=2)
    ax1.tick_params(axis='y', labelcolor='crimson')
    ax1.set_ylim(0.97, 1)

    ax2 = ax1.twinx()
    ax2.set_ylabel('Time (seconds)', color='cornflowerblue')
    ax2.plot(labels, times, marker='s', color='cornflowerblue', linewidth=2)
    ax2.tick_params(axis='y', labelcolor='cornflowerblue')

    plt.title(f"{param_name} vs Accuracy & Time")
    plt.xticks(rotation=45)
    plt.grid(True, linestyle='--', alpha=0.6)
    fig.tight_layout()

    plt.savefig(os.path.join(folder, f"{param_name.replace(' ', '_')}_combined.png"))
    plt.close()

def test_and_plot_learning_rates(param_dict, param_name, x_train, y_train, x_test, y_test, x_val, y_val, folder='graphs'):


    accuracies = []
    times = []
    labels = list(param_dict.keys())

    print(f"\n\n--- Testing different values for {param_name} ---\n")

    for label in labels:
        param_value = param_dict[label]
        print(f"\nTesting {param_name} = {param_value}")



        # Create and train the model
        nn = NeuralNetwork(layer_sizes=[784, 128, 64, 10], learning_rate=param_value, decay=1)

        start_time = time.time()
        nn.fit(x_train, y_train, X_val=x_val, y_val=y_val)
        duration = time.time() - start_time

        acc = nn.score(x_test, y_test)
        accuracies.append(acc)
        times.append(duration)

        print(f"Test Accuracy: {acc:.4f}, Time: {duration:.2f} seconds")

    os.makedirs(folder, exist_ok=True)

    # Plot combined graph
    fig, ax1 = plt.subplots(figsize=(8, 5))

    ax1.set_xlabel(param_name)
    ax1.set_ylabel('Accuracy', color='crimson')
    ax1.plot(labels, accuracies, marker='o', color='crimson', linewidth=2)
    ax1.tick_params(axis='y', labelcolor='crimson')
    ax1.set_ylim(0.96 , 1)


    plt.title(f"{param_name} vs Accuracy")
    plt.xticks(rotation=45)
    plt.grid(True, linestyle='--', alpha=0.6)
    fig.tight_layout()

    plt.savefig(os.path.join(folder, f"{param_name.replace(' ', '_')}_combined.png"))
    plt.close()

def test_and_plot_layer_configs(layer_configs, X_train, y_train, X_test, y_test, X_val, y_val, folder='graphs'):
    os.makedirs(folder, exist_ok=True)

    accuracies = []
    times = []
    labels = []

    for label, config in layer_configs.items():
        print(f"Testing layer_sizes = {config}")
        start = time.time()
        nn = NeuralNetwork(layer_sizes=config)
        nn.fit(X_train, y_train, X_val=X_val, y_val=y_val)
        end = time.time()

        acc = nn.score(X_test, y_test)

        times.append(end - start)
        accuracies.append(acc)
        labels.append(label)

        print(f"Test Accuracy: {acc:.4f}, Time: {end - start:.2f} seconds")

    x = range(len(labels))
    width = 0.4

    fig, ax1 = plt.subplots(figsize=(10, 6))

    ax1.bar([i - width / 2 for i in x], accuracies, width=width, color='crimson', label='Accuracy')
    ax2 = ax1.twinx()
    ax2.bar([i + width / 2 for i in x], times, width=width, color='cornflowerblue', label='Time')

    ax1.set_ylabel('Accuracy', color='crimson')
    ax2.set_ylabel('Time (seconds)', color='cornflowerblue')
    ax1.set_ylim(0.95, 1.0)
    ax1.set_xlabel('Layer Configuration')
    ax1.set_xticks(x)
    ax1.set_xticklabels(labels, rotation=45)
    plt.title('Accuracy vs Time for Different Layer Configurations')

    # Legends
    l1, lab1 = ax1.get_legend_handles_labels()
    l2, lab2 = ax2.get_legend_handles_labels()
    plt.legend(l1 + l2, lab1 + lab2, loc='upper left')

    plt.tight_layout()
    plt.savefig(os.path.join(folder, 'layer_config.png'))
    plt.close()




### **TEST LAYERS DEPTH**

In [None]:
 # ~~~~~~~~~~~~~~~~~~~~~~~TEST LAYERS DEPTH~~~~~~~~~~~~~~~~~~~~~~~

layer_sizes = {
      "1": [784, 64, 10],
      "2": [784, 128, 64, 10],
      "3": [784, 256, 128, 64, 10],

}

plot_graph(layer_sizes, 'layer_sizes', X_train, y_train, X_test, y_test, X_val, y_val)



--- Testing different values for layer_sizes ---


Testing layer_sizes = [784, 64, 10]
Test Accuracy: 0.9830, Time: 34.25 seconds

Testing layer_sizes = [784, 128, 64, 10]
Test Accuracy: 0.9858, Time: 61.45 seconds

Testing layer_sizes = [784, 256, 128, 64, 10]
Early stopping: no improvement in validation accuracy for 5 epochs
Test Accuracy: 0.9870, Time: 58.91 seconds


### **TEST LAYERS CONFIGURATIONS**

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~TEST LAYERS CONFIGURATIONS~~~~~~~~~~~~~~~~~~~~~~~
layer_configs = {
      "[784, 64, 10]": [784, 64, 10],
      "[784, 128, 10]": [784, 128, 10],
      "[784, 256, 10]": [784, 256, 10],
      "[784, 512, 256, 10]": [784, 512, 256, 10],
      "[784, 512, 128, 10]": [784, 512, 128, 10],
      "[784, 512, 64, 10]": [784, 512, 64, 10],
      "[784, 256, 128, 10]": [784, 256, 128, 10],
      "[784, 256, 64, 10]": [784, 256, 64, 10],
      "[784, 128, 64, 10]": [784, 128, 64, 10],
      "[784, 256, 128, 64, 10]": [784, 256, 128, 64, 10],
      "[784, 512, 256, 128, 10]": [784, 512, 256, 128, 10],
      "[784, 512, 256, 128, 64, 10]": [784, 512, 256, 128, 64, 10]
 }

test_and_plot_layer_configs(layer_configs, X_train, y_train, X_test, y_test, X_val, y_val)

Testing layer_sizes = [784, 64, 10]
Early stopping: no improvement in validation accuracy for 5 epochs
Test Accuracy: 0.9856, Time: 26.04 seconds
Testing layer_sizes = [784, 128, 10]
Test Accuracy: 0.9834, Time: 39.73 seconds
Testing layer_sizes = [784, 256, 10]
Test Accuracy: 0.9894, Time: 63.11 seconds
Testing layer_sizes = [784, 512, 256, 10]
Early stopping: no improvement in validation accuracy for 5 epochs
Test Accuracy: 0.9902, Time: 166.13 seconds
Testing layer_sizes = [784, 512, 128, 10]
Test Accuracy: 0.9894, Time: 137.23 seconds
Testing layer_sizes = [784, 512, 64, 10]
Test Accuracy: 0.9908, Time: 123.56 seconds
Testing layer_sizes = [784, 256, 128, 10]
Test Accuracy: 0.9906, Time: 76.95 seconds
Testing layer_sizes = [784, 256, 64, 10]
Test Accuracy: 0.9902, Time: 73.66 seconds
Testing layer_sizes = [784, 128, 64, 10]
Test Accuracy: 0.9894, Time: 40.85 seconds
Testing layer_sizes = [784, 256, 128, 64, 10]
Test Accuracy: 0.9882, Time: 78.81 seconds
Testing layer_sizes = [784, 

### **TEST EPOCHS**

In [18]:
# ~~~~~~~~~~~~~~~~~~~~~~~TEST EPOCHS~~~~~~~~~~~~~~~~~~~~~~~

epochs = {
      "5 epochs": 5,
      "10 epochs": 10,
      "15 epochs": 15,
      "20 epochs": 20,
      "25 epochs": 25,
      "30 epochs": 30
  }
plot_graph(epochs, 'epochs', X_train, y_train, X_test, y_test, X_val, y_val)



--- Testing different values for epochs ---


Testing epochs = 5
Test Accuracy: 0.9844, Time: 15.16 seconds

Testing epochs = 10
Test Accuracy: 0.9876, Time: 27.22 seconds

Testing epochs = 15
Early stopping: no improvement in validation accuracy for 5 epochs
Test Accuracy: 0.9860, Time: 43.14 seconds

Testing epochs = 20
Early stopping: no improvement in validation accuracy for 5 epochs
Test Accuracy: 0.9868, Time: 46.97 seconds

Testing epochs = 25
Early stopping: no improvement in validation accuracy for 5 epochs
Test Accuracy: 0.9868, Time: 51.88 seconds

Testing epochs = 30
Early stopping: no improvement in validation accuracy for 5 epochs
Test Accuracy: 0.9864, Time: 64.26 seconds


### **TEST BATCH SIZE**

In [19]:
# ~~~~~~~~~~~~~~~~~~~~~~~TEST BATCH SIZE~~~~~~~~~~~~~~~~~~~~~~~

batch_sizes = {
      "bs=32": 32,
      "bs=64": 64,
      "bs=128": 128,
      "bs=256": 256
 }
plot_graph(batch_sizes, 'batch_size', X_train, y_train, X_test, y_test, X_val, y_val)



--- Testing different values for batch_size ---


Testing batch_size = 32
Test Accuracy: 0.9886, Time: 55.79 seconds

Testing batch_size = 64
Test Accuracy: 0.9858, Time: 41.90 seconds

Testing batch_size = 128
Test Accuracy: 0.9850, Time: 37.15 seconds

Testing batch_size = 256
Test Accuracy: 0.9818, Time: 32.25 seconds


### **TEST LEARNING RATES**

In [20]:
# ~~~~~~~~~~~~~~~~~~~~~~~TEST LEARNING RATES~~~~~~~~~~~~~~~~~~~~~~~
learning_rates = {

      "0.05": 0.05,
      "0.1": 0.1,
      "0.15": 0.15,
      "0.2": 0.2,
      "0.25": 0.25,
      "0.3": 0.3,
      "0.35": 0.35,
      "0.4": 0.4,
      "0.45": 0.45,
      "0.5": 0.5,
      "0.55": 0.55,
      "0.6": 0.6,
      "0.65": 0.65,
      "0.7": 0.7,
      "0.75": 0.75,
      "0.8": 0.8,
      "0.85": 0.85,
      "0.9": 0.9,
      "0.95": 0.95,


  }

plot_graph(learning_rates, 'learning_rate', X_train, y_train, X_test, y_test, X_val, y_val)



--- Testing different values for learning_rate ---


Testing learning_rate = 0.05
Test Accuracy: 0.9842, Time: 44.35 seconds

Testing learning_rate = 0.1
Test Accuracy: 0.9872, Time: 42.38 seconds

Testing learning_rate = 0.15
Early stopping: no improvement in validation accuracy for 5 epochs
Test Accuracy: 0.9852, Time: 20.60 seconds

Testing learning_rate = 0.2
Test Accuracy: 0.9872, Time: 42.53 seconds

Testing learning_rate = 0.25
Test Accuracy: 0.9888, Time: 46.16 seconds

Testing learning_rate = 0.3
Early stopping: no improvement in validation accuracy for 5 epochs
Test Accuracy: 0.9872, Time: 30.32 seconds

Testing learning_rate = 0.35
Test Accuracy: 0.9910, Time: 41.97 seconds

Testing learning_rate = 0.4
Test Accuracy: 0.9900, Time: 44.17 seconds

Testing learning_rate = 0.45
Test Accuracy: 0.9898, Time: 44.86 seconds

Testing learning_rate = 0.5
Test Accuracy: 0.9882, Time: 41.80 seconds

Testing learning_rate = 0.55
Test Accuracy: 0.9866, Time: 42.07 seconds

Testing learn

### **TEST DECAY**

In [21]:
# ~~~~~~~~~~~~~~~~~~~~~~~TEST DECAY~~~~~~~~~~~~~~~~~~~~~~~
decay_rates = {
      "1.0 (no decay)": 1.0,
      "0.99": 0.99,
      "0.98": 0.98,
      "0.95": 0.95,
      "0.9": 0.9,
      "0.85": 0.85,
      "0.8": 0.8,
      "0.7": 0.7,
  }
plot_graph(decay_rates, 'decay', X_train, y_train, X_test, y_test, X_val, y_val)



--- Testing different values for decay ---


Testing decay = 1.0
Test Accuracy: 0.9886, Time: 42.08 seconds

Testing decay = 0.99
Test Accuracy: 0.9862, Time: 41.31 seconds

Testing decay = 0.98
Test Accuracy: 0.9870, Time: 44.19 seconds

Testing decay = 0.95
Test Accuracy: 0.9878, Time: 41.39 seconds

Testing decay = 0.9
Test Accuracy: 0.9864, Time: 45.08 seconds

Testing decay = 0.85
Test Accuracy: 0.9854, Time: 42.13 seconds

Testing decay = 0.8
Test Accuracy: 0.9852, Time: 45.56 seconds

Testing decay = 0.7
Test Accuracy: 0.9852, Time: 41.55 seconds
