In [1]:
import pandas as pd
import numpy as np

# Sigmoid activation function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Function to load and preprocess the dataset
def get_dataset(file_name, training_count, shuffle=True):
    traing_data = pd.read_csv(file_name)
    data = traing_data.to_numpy()

    # Shuffle the data if specified
    if shuffle:
        np.random.shuffle(data)

    # Extract features (X) and labels (Y) from the data
    X = data[:, 1:]
    # Limit training data count if specified
    if training_count > 0:
        X = X[:training_count, :]

    # Add bias term to features
    X_with_bias = np.hstack((np.ones((X.shape[0], 1)), X))
    # Normalize features
    X_with_bias = X_with_bias / 255.0

    # Extract labels (Y) from the data
    Y = data[:, 0].reshape(-1, 1)
    # Limit training data count if specified
    if training_count > 0:
        Y = Y[:training_count, 0].reshape(-1, 1)
    
    # Encode labels using one-hot encoding
    num_classes = 10
    Y_onehotEncoded = onehot_encode(Y)

    return X_with_bias, Y_onehotEncoded

# One-hot encoding function
def onehot_encode(T, num_classes=10, soft_weights=True):
    Y_gt = np.zeros((T.shape[0], num_classes))
    
    if soft_weights:
        # Soft weights: assign 0.1 to all classes
        Y_gt += 0.1
        # Set the value to 0.9 for the true class
        Y_gt[np.arange(T.shape[0]), T[:, 0]] = 0.9
    else:
        # Hard weights: assign 1 to the true class
        Y_gt[np.arange(T.shape[0]), T[:, 0]] = 1
    return Y_gt

# Function to generate random weights
def generate_wt(rows, columns):
    l = []
    
    # Generate random weights within the range [-0.5, 0.5]
    for i in range(rows * columns):
        l.append(np.random.uniform(-0.5, 0.5))

    return np.array(l).reshape(rows, columns)

# Function for forward propagation
def forwardPropagation(X, layer_1_wts, layer_2_wts):
    # Compute the activation of the hidden layer
    hidden_layer = np.matmul(X, layer_1_wts)
    sigmoid_vectorized = np.vectorize(sigmoid)
    h = sigmoid_vectorized(hidden_layer).reshape(1, -1)  # Activation of hidden node

    # Add bias term to the hidden layer
    h_layer = np.hstack((np.ones((h.shape[0], 1)), h))

    # Compute the output layer
    output_layer = np.dot(layer_2_wts, h_layer.T)
    o_layer = sigmoid_vectorized(output_layer)

    return h_layer, o_layer

# Function to calculate gradients
def calculateGradient(learningRate, X, output_error, hidden_error, acti_out, act_hid):
    # Compute the gradient for the output layer weights
    grad_out = learningRate * output_error * act_hid

    # Remove bias term from the hidden error
    hidden_error = hidden_error.T[:, 1:]

    # Reshape input data
    X = X.reshape(1, -1)

    # Compute the gradient for the hidden layer weights
    grad_hid = learningRate * X.T * hidden_error

    return grad_out, grad_hid

# Function to calculate errors during backpropagation
def calculateError2(h_layer, o_layer, y, hidden_to_output_Weight):
    # Reshape label data
    y_new = y.reshape(-1, 1)

    # Compute error for the output layer
    error_output_layer = o_layer * (1 - o_layer) * (y_new - o_layer)

    # Compute error for the hidden layer
    tt = np.dot(hidden_to_output_Weight.T, error_output_layer)
    error_hidlayer = h_layer * (1 - h_layer) * tt.T

    return error_output_layer, error_hidlayer.T

# Function for prediction
def predict(X, layer_1_wts, layer_2_wts):
    # Compute the activation of the hidden layer
    hidden_layer = np.dot(X, layer_1_wts)
    sigmoid_vectorized = np.vectorize(sigmoid)
    h = sigmoid_vectorized(hidden_layer)  # Activation of hidden node

    # Add bias term to the hidden layer
    h_layer = np.hstack((np.ones((h.shape[0], 1)), h))

    # Compute the output layer
    output_layer = np.dot(layer_2_wts, h_layer.T)
    o_layer = sigmoid_vectorized(output_layer)

    return h_layer, o_layer

# Function to count rows with same argmax values between two arrays
def count_same_argmax_rows(arr1, arr2):
    # Find argmax values for each array
    argmax_arr1 = np.argmax(arr1, axis=1)
    argmax_arr2 = np.argmax(arr2, axis=1)

    # Compute confusion matrix
    conf_matrix = confusion_matrix(argmax_arr1, argmax_arr2)

    # Count rows with same argmax values
    same_argmax_rows = np.sum(argmax_arr1 == argmax_arr2)
    return same_argmax_rows, conf_matrix



In [None]:
# Function to train the neural network
def train(learning_rate, momentum, num_epoch, num_neuron_in_hiddenLayer, training_count):
    # Initialize parameters
    hidden_layers_count = num_neuron_in_hiddenLayer
    l_rate = learning_rate
    momentum = momentum
    
    # Load training data
    X, Y_onehotEncoded = get_dataset("/content/drive/MyDrive/MachineLearning/mnist_train.csv", training_count)

    # Initialize weights
    hidden_wt = generate_wt(hidden_layers_count, X.shape[1])
    output_wt = generate_wt(Y_onehotEncoded.shape[1], hidden_layers_count + 1)

    train_accuracy = []
    test_accuracy = []

    # Iterate over epochs
    for epoch in range(num_epoch):
        
        # Load training data for each epoch
        X, y = get_dataset("/content/drive/MyDrive/MachineLearning/mnist_train.csv", training_count)
        total = 0
        print("epoch started := ", epoch)
        
        # Initialize gradient for weights
        prev_hidden_wt_grad = np.zeros((785, hidden_layers_count))
        prev_output_wt_grad = np.zeros((10, hidden_layers_count + 1))
        
        # Iterate over each sample in the training data
        for i in range(X.shape[0]):

            # Forward Propagation
            h_layer, o_layer = forwardPropogation(X[i, :], hidden_wt.T, output_wt)

            # Calculate Error
            error_output_layer, error_hidden_layer = calculateError2(h_layer, o_layer, y[i, :], output_wt)
            
            # Calculate Gradient
            grad_out, grad_hid = calculateGradient(l_rate, X[i, :], error_output_layer, error_hidden_layer, o_layer, h_layer)

            # Calculate new weights
            new_output_wt = output_wt + grad_out + momentum * prev_output_wt_grad
            new_hidden_wt = hidden_wt + grad_hid.T + momentum * prev_hidden_wt_grad.T

            # Update weights
            output_wt = new_output_wt
            hidden_wt = new_hidden_wt

            # Update gradients
            prev_output_wt_grad = grad_out
            prev_hidden_wt_grad = grad_hid

            # Calculate total accuracy
            total = total + (np.argmax(o_layer) == np.argmax(y[i, :]))

        # Append training accuracy for the epoch
        train_accuracy.append(total * 100 / X.shape[0])

        # Load test data
        X_test, y_test = get_dataset("mnist_test.csv", training_count=0)

        # Predict output for Test Data
        _, y_test_predict = predict(X_test, hidden_wt.T, output_wt)

        # Calculate test accuracy
        total_test, confusion_matrix = count_same_argmax_rows(y_test_predict.T, y_test)
        test_accuracy.append(total_test * 100 / X_test.shape[0])

        print("accuracy_train. ", train_accuracy)
        print("accuracy_test. ", test_accuracy)

    return train_accuracy, test_accuracy, confusion_matrix


# Experiments

In [2]:
# Function to perform Experiment 1
def experiment1():
    print("experiment1 started")
    learning_rate = 0.1
    momentum = 0.9
    epoch = 50
    training_count = 0   # Training count 0 means consider all data for training
    
    # Iterate over different hidden units
    for hid_unit in [20, 50, 100]:
        # Train the model and get accuracy metrics
        train_accuracy, test_accuracy, confusion_matrix = train(learning_rate, momentum, epoch, hid_unit, training_count)

        epoch_numbers = [i for i in range(0, epoch)]
        
        # Plot the first line graph for training accuracy
        sns.lineplot(x=epoch_numbers, y=train_accuracy, label='Train Accuracy')

        # Plot the second line graph on the same graph for validation accuracy
        sns.lineplot(x=epoch_numbers, y=test_accuracy, label='Validation Accuracy')

        # Add labels and title
        plt.xlabel('Number Of epoch')
        plt.ylabel('Accuracy (%)')
        plt.title('Number Of epoch vs Accuracy')
        plt.grid(True)
        
        plt.legend()
        plt.show()
        
        # Plot confusion matrix
        plt.figure(figsize=(10, 8))
        sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)
        plt.xlabel('Predicted Label')
        plt.ylabel('True Label')
        plt.title('Confusion Matrix for MNIST Dataset')
        plt.show()


In [3]:
# Function to perform Experiment 2
def experiment2():
    print("experiment2 started")
    learning_rate = 0.1
    epoch = 50
    hid_unit = 100
    training_count = 0   # Training count 0 means consider all data for training
    
    # Iterate over different momentum values
    for momentum in [0, 0.25, 0.5]:
        # Train the model and get accuracy metrics
        train_accuracy, test_accuracy, confusion_matrix = train(learning_rate, momentum, epoch, hid_unit, training_count)

        epoch_numbers = [i for i in range(0, epoch)]
        
        # Plot the first line graph for training accuracy
        sns.lineplot(x=epoch_numbers, y=train_accuracy, label='Train Accuracy')

        # Plot the second line graph on the same graph for validation accuracy
        sns.lineplot(x=epoch_numbers, y=test_accuracy, label='Validation Accuracy')

        # Add labels and title
        plt.xlabel('Number Of epoch')
        plt.ylabel('Accuracy (%)')
        plt.title(f"Number Of epoch vs Accuracy for momentum {momentum}")
        plt.grid(True)
        
        plt.legend()
        plt.show()
        
        # Plot confusion matrix
        plt.figure(figsize=(10, 8))
        sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)
        plt.xlabel('Predicted Label')
        plt.ylabel('True Label')
        plt.title(f"Confusion Matrix for momentum {momentum}")
        plt.show()


In [4]:
# Function to perform Experiment 3
def experiment3():
    print("experiment3 started")
    learning_rate = 0.1
    momentum = 0.9
    epoch = 50
    hid_unit = 100

    # Iterate over different training counts
    for training_count in [15000, 30000]:
        # Train the model and get accuracy metrics
        train_accuracy, test_accuracy, confusion_matrix = train(learning_rate, momentum, epoch, hid_unit, training_count)

        # Example data
        epoch_numbers = [i for i in range(0, epoch)]
        
        # Plot the first line graph for training accuracy
        sns.lineplot(x=epoch_numbers, y=train_accuracy, label='Train Accuracy')

        # Plot the second line graph on the same graph for validation accuracy
        sns.lineplot(x=epoch_numbers, y=test_accuracy, label='Validation Accuracy')

        # Add labels and title
        plt.xlabel('Number Of epoch')
        plt.ylabel('Accuracy (%)')
        plt.title('Number Of epoch vs Accuracy')
        plt.grid(True)

        # Show the plot
        plt.legend()
        plt.show()

        # Plot confusion matrix
        plt.figure(figsize=(10, 8))
        sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)
        plt.xlabel('Predicted Label')
        plt.ylabel('True Label')
        plt.title('Confusion Matrix for MNIST Dataset')
        plt.show()


In [None]:
experiment1()
experiment2()
experiment3()