# Creation of an artificial neural network from scratch

This notebook currently contains a very basic implementation of an artificial neural network from scratch. 
The first cell contains

1. A definition of several activation functions and their derivatives.
2. A definition of a loss function and its derivative.
3. A definition of the forward propagation through a single layer
4. A definition of a backward propagation through a single layer


Your task is to do the following extentions to the code in this cell:

1. Add support for an additional activation function.
2. Add the use of a bias in the forward- and backward-propagation. 
As it is implemented now, all solutions have to pass the origin.

In [34]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(12345)  # Set initial random seed (good to always do)

# Activation functions, the parameter f decides which activation function that is used.
# Added: Sigmoid activation function and its derivative
def activate(a, f="none"):
    if f == "ReLU":
        y = ReLU(a)
    elif f == "softmax":
        y = softmax(a)
    elif f == "sigmoid":
        y = 1 / (1 + np.exp(-a))
    else:
        y = a
    return y


# Derivatives of activation functions
def d_activate(a, f="none"):
    if f == "none":
        dy = np.ones_like(a)
    elif f == "ReLU":
        dy = 1 * (a > 0)
    elif f == "sigmoid":
        dy = activate(a, "sigmoid") * (1 - activate(a, "sigmoid"))
    elif f == "softmax":
        dy = 1

    # print(f)
    return dy


# This is the loss for a set of predictions y_hat compared to a set of real values y
def MSE_loss(y_hat, y):
    return 1/2 * np.mean((y_hat-y)**2)

# This is the derivative of the loss with respect to the predicted value y_hat
def d_MSE_loss(y_hat, y):
    m = y.shape[0]
    return 1./m * (y_hat-y)


# Propagate a signal through a layer in a neural network.
# Added: support for the use of a bias
def propagate_forward(w, b, a, f="none"):
    z = activate(np.dot(a, w) + b, f)
    return z

# Calculate the backward gradients that are passed through the layer in the backward pass.
# Returns both the derivative of the loss with respect to the weights w and the input signal a.
# Added: support for a bias
def propagate_backward(w, b, a, dl_dz, f="none"):
    dl = d_activate(np.dot(a, w) + b, f) * dl_dz
    dw = np.dot(a.T, dl)
    db = np.sum(dl, axis=0)
    da = np.dot(dl, w.T)
    return dw, db, da

def ReLU(x):
    return np.maximum(0, x)

def d_ReLU(x):
    return (x > 0).astype(float)

def softmax(x):
    exps = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return exps / np.sum(exps, axis=-1, keepdims=True)

def d_softmax_cross_entropy(x, y):
    return x - y



## An implementation of a neural network

Below is an implementation of a MLP neural network. This implementation is still lacking several details that are needed for the network to be robust and function well. Your task is to improve it with the following:

1. Add a bias to the activation functions that are fine tuned during training. 
2. Add a function that trains the network using minibatches (the network only trains on a few samples at a time) 
3. Optional: Make use of an validation set in the training function. The model should hence stop training when the loss starts to increase for the validation set.


In [38]:
from sklearn.metrics import confusion_matrix


class NeuralNet(object):
    # Setup all parameters and activation functions.
    # Added: support for a bias for each neuron
    def __init__(self, input_size, num_classes, hidden_neurons, hidden_activations=[], output_activation='softmax'):
        self.ws = []
        self.bs = []
        self.activations = []
        self.output_dim = num_classes

        last_size = input_size
        for neurons, activation in zip(hidden_neurons, hidden_activations):
            self.ws.append(np.random.randn(last_size, neurons) * np.sqrt(2 / last_size))
            self.bs.append(np.zeros(neurons))
            self.activations.append(activation)
            last_size = neurons

        self.ws.append(np.random.randn(last_size, num_classes) * np.sqrt(2 / last_size))
        self.bs.append(np.zeros(num_classes))
        self.activations.append(output_activation)

        # Print statements to check the output layer
        # print(f"Output layer weights shape: {self.ws[-1].shape}")
        # print(f"Output layer bias shape: {self.bs[-1].shape}")
        # print(f"Output layer activation: {self.activations[-1]}")

    # Predict the input through the network and calculate the output.
    # Added: support for a bias for each neuron
    def forward(self, x):
        for w, b, f in zip(self.ws, self.bs, self.activations):
            x = propagate_forward(w, b, x, f)
        return x

    # Adjust the weights in the network to better fit the desired output (y), given the input (x).
    # alpha is the learning rate.
    # Added: support for a bias for each neuron and make sure these are learned as well.
    def adjust_weights(self, x, y, alpha=1e-4):
        a = x
        al = []

        for w, b, f in zip(self.ws, self.bs, self.activations):
            al.append(a)
            a = propagate_forward(w, b, a, f)
            loss = MSE_loss(a, y)
            da = d_MSE_loss(a, y)

        for w, b, f, x in reversed(list(zip(self.ws, self.bs, self.activations, al))):
            dw, db, da = propagate_backward(w, b, x, da, f)
            w -= alpha * dw
            b -= alpha * db

        return loss
    
    # A function for the training of the network.
    # Added: a training loop, support for mini batches, and optional training/validation data split
    def train_net(self, x, y, batch_size=64, epochs=100, val_split=0.1, verbose=True):
        n = x.shape[0]
        num_batches = n // batch_size
        val_size = int(n * val_split)
        train_size = n - val_size

        x_train, x_val = x[:train_size], x[train_size:]
        y_train, y_val = y[:train_size], y[train_size:]

        losses = []
        val_losses = []

        for epoch in range(epochs):
            for batch in range(num_batches):
                start = batch * batch_size
                end = min((batch + 1) * batch_size, train_size)
                x_batch = x_train[start:end]
                y_batch = y_train[start:end]

                #     # Add print statements here
                # print(f"x_batch shape: {x_batch.shape}")
                # print(f"y_batch shape: {y_batch.shape}")
                # print(f"y_batch sample: {y_batch[:5]}")

            
                loss = self.adjust_weights(x_batch, y_batch)
                losses.append(loss)

            if val_split > 0:
                y_val_pred = self.forward(x_val)
                val_loss = MSE_loss(y_val_pred, y_val)
                val_losses.append(val_loss)
                if verbose:
                    print(f"Epoch {epoch + 1}: Training Loss: {loss:.4f}, Validation Loss: {val_loss:.4f}")
            else:
                if verbose:
                    print(f"Epoch {epoch + 1}: Training Loss: {loss:.4f}")

        return losses, val_losses
    
    def accuracy(self, x, y):
        y_pred = self.forward(x)

        # get confusion matrix
        y_pred = np.argmax(y_pred, axis=1)
        y_true = np.argmax(y, axis=1)
        cm = confusion_matrix(y_true, y_pred)

        return cm
        


# Misc Functions

In [36]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

def load_and_preprocess_dataset(url, feature_cols, target_col, column_names=None, test_size=0.2):
    # Load dataset
    df = pd.read_csv(url, header=None, names=column_names)

    # Split into train and test sets
    train_df, test_df = train_test_split(df, test_size=test_size, random_state=42, stratify=df[target_col])

    # Create X and y arrays
    X_train = train_df[feature_cols].values
    X_test = test_df[feature_cols].values
    y_train = pd.get_dummies(train_df[target_col]).values
    y_test = pd.get_dummies(test_df[target_col]).values

    return X_train, X_test, y_train, y_test

def one_hot_encode(labels):
    num_classes = len(np.unique(labels))
    one_hot_encoded = np.zeros((labels.shape[0], num_classes))
    one_hot_encoded[np.arange(labels.shape[0]), labels.reshape(-1)] = 1
    return one_hot_encoded



## Main programs

This cell should contain your different programs. In this cell you should present:

1. At least 3 programs where the neural network is applied to 3 different datasets.
2. You should also have at least 2 programs where you fine tune 2 hyper-parameters.

In [39]:
def main1():
    np.random.seed(1234) # Set initial random seed (good to always do)
    n = 1000
    d = 4
    k = np.random.randint(0, 10, (d, 1))
    x = np.random.normal(0, 1, (n, d))
    y = np.dot(x, k) + 0.1 + np.random.normal(0, 0.01, (n, 1))

    nn = NeuralNet(d, 1, [18, 12])

    losses, val_losses = nn.train_net(x, y, batch_size=64, epochs=100, val_split=0.1)

    plt.plot(losses)
    plt.plot(np.arange(0, len(losses), len(losses) // len(val_losses)), val_losses)
    plt.title("Loss")
    plt.legend(['Training', 'Validation'])
    plt.show()


def iris():
    iris_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
    iris_column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
    iris_feature_cols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
    iris_target_col = 'species'

    X_train_iris, X_test_iris, y_train_iris, y_test_iris = load_and_preprocess_dataset(iris_url, iris_feature_cols, iris_target_col, column_names=iris_column_names)

    return X_train_iris, X_test_iris, y_train_iris, y_test_iris


def wine():
    wine_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data"
    wine_column_names = ['class', 'alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
    wine_feature_cols = wine_column_names[1:]  # All columns except the first one
    wine_target_col = 'class'

    X_train_wine, X_test_wine, y_train_wine, y_test_wine = load_and_preprocess_dataset(wine_url, wine_feature_cols, wine_target_col, column_names=wine_column_names)

    return X_train_wine, X_test_wine, y_train_wine, y_test_wine


def bc():
    bc_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
    bc_column_names = ['id', 'diagnosis'] + [f"feature_{i}" for i in range(30)]
    bc_feature_cols = [f"feature_{i}" for i in range(30)]
    bc_target_col = 'diagnosis'

    X_train_bc, X_test_bc, y_train_bc, y_test_bc = load_and_preprocess_dataset(bc_url, bc_feature_cols, bc_target_col, column_names=bc_column_names)

    return X_train_bc, X_test_bc, y_train_bc, y_test_bc



def main2():
    X_train_iris, X_test_iris, y_train_iris, y_test_iris = iris()
    X_train_wine, X_test_wine, y_train_wine, y_test_wine = wine()
    X_train_bc, X_test_bc, y_train_bc, y_test_bc = bc()

    nn_iris = NeuralNet(X_train_iris.shape[1], y_train_iris.shape[1], hidden_neurons=[3], hidden_activations=["ReLU"], output_activation='softmax')
    nn_wine = NeuralNet(X_train_wine.shape[1], y_train_wine.shape[1], hidden_neurons=[3], hidden_activations=["ReLU"], output_activation='softmax')
    nn_bc = NeuralNet(X_train_bc.shape[1], y_train_bc.shape[1], hidden_neurons=[2], hidden_activations=["ReLU"], output_activation='softmax')

    # Train the neural networks
    nn_iris.train_net(X_train_iris, y_train_iris, batch_size=64, epochs=100, val_split=0.1, verbose=False)
    nn_wine.train_net(X_train_wine, y_train_wine, batch_size=64, epochs=100, val_split=0.1, verbose=False)
    nn_bc.train_net(X_train_bc, y_train_bc, batch_size=64, epochs=100, val_split=0.1, verbose=False)

    # Evaluate the performance
    test_accuracy_iris = nn_iris.accuracy(X_test_iris, y_test_iris)
    test_accuracy_wine = nn_wine.accuracy(X_test_wine, y_test_wine)
    test_accuracy_bc = nn_bc.accuracy(X_test_bc, y_test_bc)

    print("Test accuracy for Iris dataset:", test_accuracy_iris)
    print("Test accuracy for Wine dataset:", test_accuracy_wine)
    print("Test accuracy for Breast Cancer dataset:", test_accuracy_bc)

main2()



Test accuracy for Iris dataset: [[10  0  0]
 [10  0  0]
 [10  0  0]]
Test accuracy for Wine dataset: [[12  0  0]
 [10  4  0]
 [10  0  0]]
Test accuracy for Breast Cancer dataset: [[72  0]
 [18 24]]
