<font size=8>HW4 - Intelligent Systems\
Question 2\
Bekarys Dukenbaev


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Organize the Data

In [None]:
with open("HW3_datafiles/MNISTnumImages5000_balanced.txt") as f:
    data = f.readlines()

with open("HW3_datafiles/MNISTnumLabels5000_balanced.txt") as f:
    labels = f.readlines()

labels = [x[0] for x in labels]

In [None]:
data_df = pd.read_csv('HW3_datafiles/MNISTnumImages5000_balanced.txt', sep='\t', header=None)
label_df = pd.read_csv('HW3_datafiles/MNISTnumLabels5000_balanced.txt', names=['label'])
data_df['label'] = label_df['label']

In [None]:
train_dfs = []
test_dfs = []

for i in range(10):  
    digit_data = data_df[data_df['label'] == i]
    digit_train = digit_data.sample(n=400, replace=False, random_state=42)
    digit_test = digit_data[~digit_data.index.isin(digit_train.index)]

    train_dfs.append(digit_train)
    test_dfs.append(digit_test)
    
train_df = pd.concat(train_dfs, axis=0)
test_df = pd.concat(test_dfs, axis=0)

train_labels = train_df['label']
test_labels = test_df['label']
train_images = train_df.drop(columns=['label'])
test_image = test_df.drop(columns=['label'])
train_labels_onehot = pd.get_dummies(train_labels).values
test_labels_onehot = pd.get_dummies(test_labels).values

# Create the Neural Network

In [None]:
class Autoencoder:
    
    def __init__(self, input_size, hidden_size, learning_rate, beta):
        self.W1, self.b1, self.W2, self.b2 = self.initialize_weights(input_size, hidden_size)
        self.V_dW1, self.V_db1, self.V_dW2, self.V_db2 = self.initialize_momentum(input_size, hidden_size)
        self.learning_rate = learning_rate
        self.beta = beta
        
    def initialize_weights(self, input_size, hidden_size):
        W1 = np.random.randn(hidden_size, input_size) * 0.01
        b1 = np.zeros((hidden_size, 1))
        W2 = np.random.randn(input_size, hidden_size) * 0.01
        b2 = np.zeros((input_size, 1))
        return W1, b1, W2, b2

    def initialize_momentum(self, input_size, hidden_size):
        V_dW1 = np.zeros((hidden_size, input_size))
        V_db1 = np.zeros((hidden_size, 1))
        V_dW2 = np.zeros((input_size, hidden_size))
        V_db2 = np.zeros((input_size, 1))
        return V_dW1, V_db1, V_dW2, V_db2
    
    def relu(self, Z):
        return np.maximum(0, Z)

    def relu_derivative(self, Z):
        return (Z > 0).astype(float)

    def sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))

    def compute_loss(self, y_true, y_pred):
        return 0.5 * np.mean(np.sum(np.square(y_true - y_pred), axis=0))

    def forward_propagation(self, X):
        Z1 = np.dot(self.W1, X) + self.b1
        A1 = self.relu(Z1)
        Z2 = np.dot(self.W2, A1) + self.b2
        A2 = self.sigmoid(Z2)
        return Z1, A1, Z2, A2

    def backward_propagation(self, X, Y, Z1, A1, Z2, A2):
        m = X.shape[1]
        
        dZ2 = A2 - Y
        dW2 = np.dot(dZ2, A1.T) / m
        db2 = np.sum(dZ2, axis=1, keepdims=True) / m
        dZ1 = np.dot(self.W2.T, dZ2) * self.relu_derivative(Z1)
        dW1 = np.dot(dZ1, X.T) / m
        db1 = np.sum(dZ1, axis=1, keepdims=True) / m
        
        # Apply momentum
        self.V_dW1 = self.beta * self.V_dW1 + (1 - self.beta) * dW1
        self.V_db1 = self.beta * self.V_db1 + (1 - self.beta) * db1
        self.V_dW2 = self.beta * self.V_dW2 + (1 - self.beta) * dW2
        self.V_db2 = self.beta * self.V_db2 + (1 - self.beta) * db2
        
        # Update weights
        self.W1 -= self.learning_rate * self.V_dW1
        self.b1 -= self.learning_rate * self.V_db1
        self.W2 -= self.learning_rate * self.V_dW2
        self.b2 -= self.learning_rate * self.V_db2
    
    def train(self, X, Y, X_test, Y_test, epochs):
        X = np.array(X)
        Y = np.array(Y)
        X_test = np.array(X_test)
        Y_test = np.array(Y_test)

        training_losses = []
        test_losses = []
        epochs_recorded = []

        for epoch in range(epochs):
            Z1, A1, Z2, A2_train = self.forward_propagation(X)
            cost_train = self.compute_loss(Y, A2_train)
            self.backward_propagation(X, Y, Z1, A1, Z2, A2_train)

            _, _, _, A2_test = self.forward_propagation(X_test)
            cost_test = self.compute_loss(Y_test, A2_test)

            if epoch % 10 == 0 or epoch == epochs - 1:
                epochs_recorded.append(epoch)
                training_losses.append(cost_train)
                test_losses.append(cost_test)
                print(f'Epoch: {epoch}, Training Loss: {cost_train:.4f}, Test Loss: {cost_test:.4f}')
                
        return training_losses, test_losses, epochs_recorded

    def reconstruct(self, X):
        _, _, _, A2 = self.forward_propagation(X)
        return A2

# Train the Neural Network

In [None]:
# Initialize
input_size = 784  
hidden_size = 150
learning_rate = 0.1
beta = 0.9
autoencoder = Autoencoder(input_size, hidden_size, learning_rate, beta)

X_train = train_images.T  
X_test = test_image.T   

# Train the autoencoder
epochs = 250
results = autoencoder.train(X_train, X_train, X_test, X_test, epochs)

# Use the autoencoder to reconstruct the test images
reconstructed_images = autoencoder.reconstruct(X_test)

In [None]:
results

# Plot loss time series

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(results[2], results[0], label='Training Loss')
plt.plot(results[2], results[1], label='Test Loss')
plt.title('Training and Test Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

# Test latent space representation 
(not required, just out of curiosity)

In [None]:
reconstructed_images = []
for i in range(10):
    digit_test_images = test_image[test_labels == i]
    if not digit_test_images.empty:
        first_image = digit_test_images.iloc[0].values.reshape(-1, 1)
        reconstructed_image = autoencoder.reconstruct(first_image)
        reconstructed_images.append(reconstructed_image)

height = 28  
width = 28   

fig, axes = plt.subplots(10, 2, figsize=(10, 20)) 

for i, reconstructed_image in enumerate(reconstructed_images):
    original_image = test_image[test_labels == i].iloc[0].values.reshape(height, width).T
    reconstructed_image_reshaped = reconstructed_image.reshape(height, width).T

    ax = axes[i, 0]
    ax.imshow(original_image, cmap='gray')
    ax.set_title(f"Original {i}")
    ax.axis('off')

    ax = axes[i, 1]
    ax.imshow(reconstructed_image_reshaped, cmap='gray')
    ax.set_title(f"Reconstructed {i}")
    ax.axis('off')

plt.tight_layout()
plt.show()

# Visulize a hidden neuron's feature

In [None]:
def visualize_hidden_neuron_feature(W1, neuron_index, subplot_index):
    neuron_weights = W1[neuron_index, :]

    neuron_image = neuron_weights.reshape(28, 28)

    # Normalize the weights to the range [0, 255] if they aren't already
    neuron_image -= neuron_image.min()  # translate so the minimum value is 0
    neuron_image /= neuron_image.max()  # scale the values to the range [0, 1]
    neuron_image *= 255  # scale the values to the range [0, 255]
    
    # Cast to an integer data type
    neuron_image = neuron_image.astype(np.uint8)

    # Display the image
    plt.subplot(5, 4, subplot_index)
    plt.imshow(neuron_image, cmap='gray')
    plt.title(f"Feature for hidden neuron {neuron_index}")

    # # Plot the activation of the neuron as a subplot
    # plt.subplot(5, 4, subplot_index)
    # plt.plot(neuron_weights)
    # plt.title(f'Neuron {neuron_index}')
    # plt.xlabel('Input Feature Index')
    # plt.ylabel('Activation')

neuron_indices = [147, 117, 29, 99, 67, 141, 89, 115, 53, 59, 130, 134, 9, 21, 142, 43, 15, 75, 139, 120]

# # Specify which neuron to visualize
# for neuron in neuron_indices:
#     visualize_hidden_neuron_feature(autoencoder.W1, neuron_index=neuron)
# # visualize_hidden_neuron_feature(autoencoder.W1, neuron_index=100)

# Create a 5x4 grid of subplots for the 20 neurons
plt.figure(figsize=(12, 10))
for i, neuron_index in enumerate(neuron_indices, 1):
    visualize_hidden_neuron_feature(autoencoder.W1, neuron_index, i)

plt.tight_layout()
plt.show()
