In [25]:
import numpy as np
import matplotlib.pyplot as plt

def train_test_split(x_data, y_data, split_percentage=0.8, shuffle_train=True):
    x_train, x_test, y_train, y_test = [], [], [], []
    unique_digits = np.unique(y_data)

    for digit in unique_digits:
        digit_indices = np.where(y_data == digit)[0]
        split_point = int(len(digit_indices) * split_percentage)
        train_indices = digit_indices[:split_point]
        test_indices = digit_indices[split_point:]
        x_train.append(x_data[train_indices])
        y_train.append(y_data[train_indices])
        x_test.append(x_data[test_indices])
        y_test.append(y_data[test_indices])

    x_train = np.concatenate(x_train, axis=0)
    y_train = np.concatenate(y_train, axis=0)
    x_test = np.concatenate(x_test, axis=0)
    y_test = np.concatenate(y_test, axis=0)

    if shuffle_train:
        shuffle_indices = np.random.permutation(len(x_train))
        x_train = x_train[shuffle_indices]
        y_train = y_train[shuffle_indices]

    return x_train.T, x_test.T, y_train.T, y_test.T

x_data = np.loadtxt('HW3_datafiles/MNISTnumImages5000_balanced.txt')  
y_data = np.loadtxt('HW3_datafiles/MNISTnumLabels5000_balanced.txt')
x_train, x_test,y_train,y_test = train_test_split(x_data, y_data, split_percentage=0.8, shuffle_train=True)

weights = np.random.randn(200, 784) - 0.5
bias = np.random.randn(200, 1) - 0.5
weights2 = np.random.randn(10, 200) - 0.5
bias2 = np.random.randn(10, 1) - 0.5



In [21]:
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    sig = sigmoid(x)
    return sig * (1 - sig)


In [None]:
def train(x_train, y_train, weights, bias, weights2, bias2, learning_rate=0.01, epochs=1000):
    for epoch in range(epochs):
        # Forward pass
        A = np.dot(weights, x_train) + bias
        A = relu(A)  # Hidden layer with ReLU activation
        A2 = np.dot(weights2, A) + bias2
        A2 = sigmoid(A2)  # Output layer with Sigmoid activation

        # Calculate loss and accuracy
        loss = np.mean((A2 - y_train)**2)  # MSE
        predictions = np.argmax((A2 > 0.5).astype(int),axis=0)  # Threshold at 0.5 for binary classification
        accuracy = np.mean(predictions == y_train)
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}, Accuracy: {accuracy * 100:.2f}%")

        # Backpropagation
        # Gradient of MSE Loss with respect to A2
        dA2 = 2 * (A2 - y_train) / len(y_train)
        dA2 *= sigmoid_derivative(A2)  # Apply Sigmoid derivative

        dweights2 = np.dot(dA2, A.T)
        dbias2 = np.sum(dA2, axis=1, keepdims=True)

        # Backpropagate to hidden layer
        dA = np.dot(weights2.T, dA2) * relu_derivative(A)  # Apply ReLU derivative

        dweights = np.dot(dA, x_train.T)
        dbias = np.sum(dA, axis=1, keepdims=True)

        # Update weights and biases
        weights2 -= learning_rate * dweights2
        bias2 -= learning_rate * dbias2
        weights -= learning_rate * dweights
        bias -= learning_rate * dbias


In [24]:
train(x_train, y_train, weights, bias, weights2, bias2, learning_rate=0.01, epochs=1000)

Epoch 1/1000, Loss: 24.4746, Accuracy: 9.99%
Epoch 2/1000, Loss: 24.4438, Accuracy: 9.99%
Epoch 3/1000, Loss: 24.4131, Accuracy: 9.99%
Epoch 4/1000, Loss: 24.3823, Accuracy: 9.99%
Epoch 5/1000, Loss: 24.3516, Accuracy: 9.99%
Epoch 6/1000, Loss: 24.3209, Accuracy: 9.99%
Epoch 7/1000, Loss: 24.2902, Accuracy: 9.99%
Epoch 8/1000, Loss: 24.2596, Accuracy: 9.99%
Epoch 9/1000, Loss: 24.2290, Accuracy: 9.99%
Epoch 10/1000, Loss: 24.1984, Accuracy: 10.00%
Epoch 11/1000, Loss: 24.1678, Accuracy: 10.00%
Epoch 12/1000, Loss: 24.1373, Accuracy: 10.00%
Epoch 13/1000, Loss: 24.1069, Accuracy: 10.00%
Epoch 14/1000, Loss: 24.0765, Accuracy: 9.98%
Epoch 15/1000, Loss: 24.0461, Accuracy: 9.99%
Epoch 16/1000, Loss: 24.0158, Accuracy: 9.99%
Epoch 17/1000, Loss: 23.9856, Accuracy: 9.99%
Epoch 18/1000, Loss: 23.9554, Accuracy: 9.99%
Epoch 19/1000, Loss: 23.9253, Accuracy: 9.98%
Epoch 20/1000, Loss: 23.8952, Accuracy: 9.98%
Epoch 21/1000, Loss: 23.8652, Accuracy: 9.98%
Epoch 22/1000, Loss: 23.8353, Accuracy:

[6 6 6 ... 6 6 6]
