In [16]:
import numpy as np


In [17]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


In [18]:
def sigmoid_derivative(x):
    return x * (1 - x)

In [19]:
def cross_entropy(y_pred, y_true):
    # Ensure prediction is not exactly 0 or 1
    y_pred = np.clip(y_pred, 1e-12, 1 - 1e-12)
    # Calculate cross-entropy loss
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))


In [20]:
class MLP:
    def __init__(self):
        # Initialize weights and biases
        self.weights1 = np.random.rand(2, 10)
        self.weights2 = np.random.rand(10, 10)
        self.weights3 = np.random.rand(10, 2)
        self.bias1 = np.random.rand(1, 10)
        self.bias2 = np.random.rand(1, 10)
        self.bias3 = np.random.rand(1, 2)

    def forward(self, X):
        self.layer1 = sigmoid(np.dot(X, self.weights1) + self.bias1)
        self.layer2 = sigmoid(np.dot(self.layer1, self.weights2) + self.bias2)
        self.output = sigmoid(np.dot(self.layer2, self.weights3) + self.bias3)
        return self.output

    def backward(self, X, y, learning_rate):
        # error (pred - label)
        output_layer_error = self.output - y

        # deriv. for weights and biases
        d_weights3 = np.dot(self.layer2.T, output_layer_error)
        d_bias3 = np.sum(output_layer_error, axis=0, keepdims=True)

        # backpropagation
        hidden_layer_error2 = np.dot(output_layer_error, self.weights3.T) * sigmoid_derivative(self.layer2)
        d_weights2 = np.dot(self.layer1.T, hidden_layer_error2)
        d_bias2 = np.sum(hidden_layer_error2, axis=0, keepdims=True)

        hidden_layer_error1 = np.dot(hidden_layer_error2, self.weights2.T) * sigmoid_derivative(self.layer1)
        d_weights1 = np.dot(X.T, hidden_layer_error1)
        d_bias1 = np.sum(hidden_layer_error1, axis=0, keepdims=True)

        # Update weights and bias
        self.weights3 -= learning_rate * d_weights3
        self.bias3 -= learning_rate * d_bias3
        self.weights2 -= learning_rate * d_weights2
        self.bias2 -= learning_rate * d_bias2
        self.weights1 -= learning_rate * d_weights1
        self.bias1 -= learning_rate * d_bias1


    def train(self, X_train, y_train, epochs, learning_rate):
        for epoch in range(epochs):
            # Forward pass
            self.forward(X_train)

            # Backward pass and weight update
            self.backward(X_train, y_train, learning_rate)

            # Print the loss every 10 epochs
            if epoch % 10 == 0:
                loss = cross_entropy(self.forward(X_train), y_train)
                print(f"Epoch {epoch}, Current loss: {loss}")


In [21]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load training dataset
train_df = pd.read_excel("./THA3train.xlsx")
X_train = train_df.iloc[:, :-1].values
y_train = train_df.iloc[:, -1].values

# One-hot encoding for two-class classification
y_train_one_hot = np.zeros((y_train.size, y_train.max() + 1))
y_train_one_hot[np.arange(y_train.size), y_train] = 1

# Standardize training features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Load test dataset
test_df = pd.read_excel('./THA3validate.xlsx')
X_test = test_df.iloc[:, :-1].values
y_test = test_df.iloc[:, -1].values

# One-hot encoding
y_test_one_hot = np.zeros((y_test.size, y_test.max() + 1))
y_test_one_hot[np.arange(y_test.size), y_test] = 1

# Standardize
X_test_scaled = scaler.transform(X_test)

# Print shapes of the datasets
X_train_scaled.shape, y_train_one_hot.shape, X_test_scaled.shape, y_test_one_hot.shape


((410, 2), (410, 2), (82, 2), (82, 2))

In [22]:


# Initialize MLP model
mlp = MLP()

# Train model
mlp.train(X_train_scaled, y_train_one_hot, epochs=1000, learning_rate=0.004)


Epoch 0, Current loss: 1.3994459094228213
Epoch 10, Current loss: 1.2784985073303383
Epoch 20, Current loss: 0.8107528222535199
Epoch 30, Current loss: 0.6977347376851374
Epoch 40, Current loss: 0.6815081895658498
Epoch 50, Current loss: 0.6739459696635599
Epoch 60, Current loss: 0.6641158296417374
Epoch 70, Current loss: 0.6488122279119047
Epoch 80, Current loss: 0.6246136154265507
Epoch 90, Current loss: 0.5868643487785163
Epoch 100, Current loss: 0.6383424188994323
Epoch 110, Current loss: 0.589674936966094
Epoch 120, Current loss: 0.4819336125657156
Epoch 130, Current loss: 0.36586255054168537
Epoch 140, Current loss: 0.3074197133039063
Epoch 150, Current loss: 0.2868180106882213
Epoch 160, Current loss: 0.27705140145035856
Epoch 170, Current loss: 0.27167221764276844
Epoch 180, Current loss: 0.2684273199151547
Epoch 190, Current loss: 0.26619451441091463
Epoch 200, Current loss: 0.2643977915170976
Epoch 210, Current loss: 0.262737114368835
Epoch 220, Current loss: 0.26105024539558

In [23]:
from sklearn.metrics import accuracy_score, confusion_matrix

def evaluate_model(model, X_test, y_test):
    # Pred
    y_pred = model.forward(X_test)

    # Convert predictions and true values to class labels
    y_pred_labels = np.argmax(y_pred, axis=1)
    y_true_labels = np.argmax(y_test, axis=1)

    # Calculate accuracy
    accuracy = accuracy_score(y_true_labels, y_pred_labels)

    # Calculate confusion matrix
    conf_matrix = confusion_matrix(y_true_labels, y_pred_labels)

    return accuracy, conf_matrix

# Evaluate the model on the validation set
accuracy, conf_matrix = evaluate_model(mlp, X_test_scaled, y_test_one_hot)

# Print the results
accuracy, conf_matrix



(0.975609756097561,
 array([[40,  1],
        [ 1, 40]], dtype=int64))

## Part B

First we initialize the model with weights and biases equal to zero, instead of randomizing them.