In [4]:
#!/usr/bin/env python
# coding: utf-8

# In[ ]:


import numpy as np
import h5py

# Function to load the dataset
def load_dataset(file_path):
    with h5py.File(file_path, 'r') as hdf:
        trX = np.array(hdf.get('trX'))
        trY = np.array(hdf.get('trY'))
        tstX = np.array(hdf.get('tstX'))
        tstY = np.array(hdf.get('tstY'))
    return trX, trY, tstX, tstY

# Load data
file_path = 'data-Mini Project 2.h5'
trX, trY, tstX, tstY = load_dataset(file_path)

# Splitting the training data into training and validation sets
validation_size = int(0.1 * trX.shape[0])
validation_indices = np.random.choice(trX.shape[0], validation_size, replace=False)
training_indices = np.array([i for i in range(trX.shape[0]) if i not in validation_indices])

# Creating training and validation sets
valX = trX[validation_indices]
valY = trY[validation_indices]
trX = trX[training_indices]
trY = trY[training_indices]



# Activation functions and their derivatives

e =  2.718281828459045

def tanh(x):
    return np.tanh(x)

def dtanh(y):
    return 1 - y ** 2

def sigmoid(x):
    """Compute the sigmoid of x - Vectorized implementation."""
    # Clip input values to avoid exp overflow:
    x_clipped = np.clip(x, -500, 500)
    return 1.0 / (1.0 + np.exp(-x_clipped))



def dsigmoid(y):
    return y * (1 - y)

# Cross-entropy loss function
def cross_entropy_loss(predictions, labels):
    predictions = np.clip(predictions, 1e-15, 1 - 1e-15)
    loss = -np.sum(labels * np.log(predictions)) / labels.shape[0]
    return loss

# Forward pass function
def forward_pass(trX, W1h, Whh, Who, N):
    num_samples, num_timesteps, _ = trX.shape
    hidden_states = np.zeros((num_samples, N))
    outputs = np.zeros((num_samples, 6))

    for t in range(num_timesteps):
        Xt = trX[:, t, :]
        Xt_bias = np.hstack((Xt, np.ones((Xt.shape[0], 1))))  # Adding bias term
        hidden_states = np.tanh(np.dot(Xt_bias, W1h.T) + np.dot(hidden_states, Whh.T))
        outputs = sigmoid(np.dot(hidden_states, Who.T))

    return hidden_states, outputs

# Backpropagation Through Time (BPTT) function
def bptt(trX, trY, W1h, Whh, Who, learning_rate, N):
    hidden_states, outputs = forward_pass(trX, W1h, Whh, Who, N)
    training_loss = cross_entropy_loss(outputs, trY)  # Compute training loss
    dL_doutputs = outputs - trY

    dW1h = np.zeros_like(W1h)
    dWhh = np.zeros_like(Whh)
    dWho = np.zeros_like(Who)

    for t in reversed(range(trX.shape[1])):
        dWho += np.dot(dL_doutputs.T, hidden_states)
        dhidden = np.dot(dL_doutputs, Who) * dtanh(hidden_states)  # dtanh for derivative of tanh

        for bptt_step in reversed(range(max(0, t - backprop_truncate), t+1)):
            dWhh += np.dot(dhidden.T, hidden_states)
            Xt_bias = np.hstack((trX[:, bptt_step, :], np.ones((trX.shape[0], 1))))
            dW1h += np.dot(dhidden.T, Xt_bias)
            dhidden = np.dot(dhidden, Whh) * dtanh(hidden_states)  # dtanh for derivative of tanh

    W1h -= learning_rate * dW1h
    Whh -= learning_rate * dWhh
    Who -= learning_rate * dWho

    return W1h, Whh, Who, training_loss






# Initialize weights
N = 50  # Number of neurons in the hidden layer
input_size = trX.shape[2]  # Number of input features
output_size = 6  # Number of output classes

W1h = np.random.uniform(-0.1, 0.1, (N, input_size + 1))  # Input to hidden weights, including bias
Whh = np.random.uniform(-0.1, 0.1, (N, N))  # Hidden to hidden weights
Who = np.random.uniform(-0.1, 0.1, (output_size, N))  # Hidden to output weights

# Hyperparameters
learning_rate = 0.05
epochs = 50
mini_batch_size = 10
backprop_truncate = trX.shape[1]  # Assuming we backpropagate through the entire sequence

# Function to create mini-batches
def create_mini_batches(X, Y, batch_size):
    mini_batches = []
    num_complete_batches = X.shape[0] // batch_size
    for i in range(num_complete_batches):
        X_batch = X[i * batch_size:(i + 1) * batch_size, :]
        Y_batch = Y[i * batch_size:(i + 1) * batch_size, :]
        mini_batches.append((X_batch, Y_batch))
    if X.shape[0] % batch_size != 0:
        X_batch = X[num_complete_batches * batch_size:X.shape[0], :]
        Y_batch = Y[num_complete_batches * batch_size:Y.shape[0], :]
        mini_batches.append((X_batch, Y_batch))
    return mini_batches

# Preparing the mini-batches for training
train_mini_batches = create_mini_batches(trX, trY, mini_batch_size)

# Training the network using BPTT
training_losses = []  # Initialize a list to store training losses
training_accuracies = []


# Function to calculate accuracy
def calculate_accuracy(X, Y, W1h, Whh, Who, N):
    _, outputs = forward_pass(X, W1h, Whh, Who, N)
    predicted_labels = np.argmax(outputs, axis=1)
    true_labels = np.argmax(Y, axis=1)
    correct_predictions = np.sum(predicted_labels == true_labels)
    accuracy = (correct_predictions / len(true_labels)) * 100
    return accuracy




for epoch in range(epochs):
    np.random.shuffle(train_mini_batches)  # Shuffle mini-batches at the beginning of each epoch
    for X_batch, Y_batch in train_mini_batches:
        W1h, Whh, Who, training_loss = bptt(X_batch, Y_batch, W1h, Whh, Who, learning_rate, N)
        training_losses.append(training_loss)  # Append the training loss
        
    epoch_accuracy = calculate_accuracy(trX, trY, W1h, Whh, Who, N)
    training_accuracies.append(epoch_accuracy)
    print(f"Epoch {epoch+1}/{epochs}, Training Accuracy: {epoch_accuracy:.2f}%")    
        

# Calculate accuracy on test data
test_accuracy = calculate_accuracy(tstX, tstY, W1h, Whh, Who, N)
print(f"Test Accuracy: {test_accuracy}%")


  dhidden = np.dot(dhidden, Whh) * dtanh(hidden_states)  # dtanh for derivative of tanh


Epoch 1/50, Training Accuracy: 16.26%
Epoch 2/50, Training Accuracy: 16.26%
Epoch 3/50, Training Accuracy: 16.26%
Epoch 4/50, Training Accuracy: 16.26%
Epoch 5/50, Training Accuracy: 16.26%
Epoch 6/50, Training Accuracy: 16.26%
Epoch 7/50, Training Accuracy: 16.26%
Epoch 8/50, Training Accuracy: 16.26%
Epoch 9/50, Training Accuracy: 16.26%
Epoch 10/50, Training Accuracy: 16.26%
Epoch 11/50, Training Accuracy: 16.26%
Epoch 12/50, Training Accuracy: 16.26%
Epoch 13/50, Training Accuracy: 16.26%
Epoch 14/50, Training Accuracy: 16.26%
Epoch 15/50, Training Accuracy: 16.26%
Epoch 16/50, Training Accuracy: 16.26%
Epoch 17/50, Training Accuracy: 16.26%
Epoch 18/50, Training Accuracy: 16.26%
Epoch 19/50, Training Accuracy: 16.26%
Epoch 20/50, Training Accuracy: 16.26%
Epoch 21/50, Training Accuracy: 16.26%
Epoch 22/50, Training Accuracy: 16.26%
Epoch 23/50, Training Accuracy: 16.26%
Epoch 24/50, Training Accuracy: 16.26%
Epoch 25/50, Training Accuracy: 16.26%
Epoch 26/50, Training Accuracy: 16