In [1]:
import numpy as np
import h5py
import matplotlib.pyplot as plt

In [2]:
# Load and open the file containing the data
myFile = h5py.File('data-Mini Project 2.h5', 'r+')

# List all groups in the .h5 file
print(f"Keys: {myFile.keys()}")

Keys: <KeysViewHDF5 ['trX', 'trY', 'tstX', 'tstY']>


In [3]:
# print the information about the keys in the data
for key in myFile.keys():
    print(myFile[key])

<HDF5 dataset "trX": shape (3000, 150, 3), type "<f8">
<HDF5 dataset "trY": shape (3000, 6), type "<f8">
<HDF5 dataset "tstX": shape (600, 150, 3), type "<f8">
<HDF5 dataset "tstY": shape (600, 6), type "<f8">


In [4]:
# Extract the data from the file as numpy arrays

n1 = myFile.get('trX')  # trX is the training data
trX = np.array(n1)
print(f"trX shape: {trX.shape}, dtype: {trX.dtype}")

n1 = myFile.get('trY')  # trY is the training labels
trY = np.array(n1)
print(f"trY shape: {trY.shape}, dtype: {trY.dtype}")

n1 = myFile.get('tstX') # tstX is the test data
tstX = np.array(n1)
print(f"tstX shape: {tstX.shape}, dtype: {tstX.dtype}")

n1 = myFile.get('tstY') # tstY is the test labels
tstY = np.array(n1)
print(f"tstY shape: {tstY.shape}, dtype: {tstY.dtype}")

trX shape: (3000, 150, 3), dtype: float64
trY shape: (3000, 6), dtype: float64
tstX shape: (600, 150, 3), dtype: float64
tstY shape: (600, 6), dtype: float64


In [5]:
# close the .h5 file
myFile.close()

# shuffle the training and the test data
indexes = np.arange(trX.shape[0])
np.random.shuffle(indexes)
trX = trX[indexes]
trY = trY[indexes]

indexes = np.arange(tstX.shape[0])
np.random.shuffle(indexes)
tstX = tstX[indexes]
tstY = tstY[indexes]

In [6]:
# tanh activation function for the hidden layer
def tanh_activation(x):
    return np.tanh(x)

# tanh derivative
def tanh_derivative(x):
    return 1.0 - np.tanh(x) ** 2

# sigmoid activation function for the output layer
def sigmoid_activation(x):
    return 1 / (1 + np.exp(-x))

# sigmoid derivative
def sigmoid_derivative(x):
    return x * (1 - x)


In [7]:
class RNN:
    def __init__(self, trX, trY, tstX, tstY, N, learning_rate, mini_batch_size, num_epochs):
        # initialize the data
        self.trX = trX  # 3000 x 150 x 3
        self.trY = trY  # 3000 x 6
        self.tstX = tstX    # 600 x 150 x 3
        self.tstY = tstY    # 600 x 6
        
        # add the bias to the data
        self.trX = np.concatenate((self.trX, np.ones((self.trX.shape[0], self.trX.shape[1], 1))), axis=2)   # 3000 x 150 x 4
        self.tstX = np.concatenate((self.tstX, np.ones((self.tstX.shape[0], self.tstX.shape[1], 1))), axis=2)   # 600 x 150 x 4

        # initialize the hyperparameters
        self.N = N
        self.learning_rate = learning_rate
        self.mini_batch_size = mini_batch_size
        self.num_epochs = num_epochs

        # initialize the weights and biases
        self.Whh = np.random.uniform(-0.1, 0.1, (self.N, self.N+1))   # N x N
        self.W1h = np.random.uniform(-0.1, 0.1, (self.N, 3+1))  # N x 4
        self.Who = np.random.uniform(-0.1, 0.1, (6, self.N+1))  # 6 x (N+1)

    # forward pass
    def forward_pass(self, x):
        # x is a 150x4 vector where the first 3 elements are the sensor data and the last element is the bias with 150 time steps
        # initialize hidden layer
        h = np.zeros((self.N, 1))   # N x 1
        # initialize hidden layer output
        h_out = np.zeros((self.N, 1))   # N x 1
        # initialize output layer
        y = np.zeros((6, 1))
        # initialize hidden states
        hidden_states = []
        hidden_states.append(np.zeros((self.N, 1))) # initial state
        
        # initialize the predictions
        preds = []

        # loop over the time steps
        for t in range(x.shape[0]):
            # update the hidden layer
            h = np.matmul(self.Whh, h) + np.matmul(self.W1h, x[t].reshape(-1, 1))
            # update the hidden layer output
            h_out = tanh_activation(h)
            hidden_states.append(h_out)
            # add the bias to the hidden layer output
            h_out = np.concatenate((h_out, np.ones((1, 1))), axis=0)
            # update the output layer
            y = np.matmul(self.Who, h_out)
            # apply the sigmoid activation function to the output layer
            y = sigmoid_activation(y)
            preds.append(y)

        return hidden_states, preds
    
    # multi category cross entropy loss function
    def loss_function(self, y, pred):
        loss = np.sum(y * np.log(pred) + (1 - y) * np.log(1 - pred))
        return -loss

    # backpropagation
    def backpropagation(self, batch_size):

        for epoch in range(self.num_epochs):
            for i in range(0, self.trX.shape[0], batch_size):
                x_batch = np.array_split(self.trX, batch_size)[i]
                y_batch = np.array_split(self.trY, batch_size)[i]
                
                # initialize the gradients
                dW1h = np.zeros_like(self.W1h)
                dWhh = np.zeros_like(self.Whh)
                dWho = np.zeros_like(self.Who)
                
                dH = np.zeros((self.N, 1)) # initialize the gradient of the hidden layer
                
                previous_state = np.zeros((self.N, 1)) # initialize the previous state as a zero vector (h_-1 = 0)
                
                for t in range(self.trX.shape[1]):
                    print(x_batch[t].shape)
                    hidden_states, preds = self.forward_pass(x_batch[t])
                    previous_state = hidden_states[-1]
                    pred = preds[-1]
                    d_o = ( pred - y_batch[t].reshape(-1, 1) ) * sigmoid_derivative(pred)
                    dWho += np.matmul(d_o[:-1], previous_state.T)                  
                    dH = np.matmul(self.Who.T, d_o) * tanh_derivative(np.concatenate((previous_state, np.ones((1, 150))), axis=0))
                    dWhh += np.matmul(dH, np.concatenate((previous_state, np.ones((1, 150))), axis=0).T)
                    dW1h += np.matmul(dH, x_batch[t])
                    
                    if t == self.trX.shape[1] - 1:
                        correctPred = np.sum(np.argmax(pred, axis=0) == np.argmax(y_batch[t].reshape(-1, 1), axis=0))
                
                # update the weights and biases
                self.Whh -= self.learning_rate * dWhh / self.trX.shape[1]
                self.W1h -= self.learning_rate * dW1h / self.trX.shape[1]
                self.Who -= self.learning_rate * dWho / self.trX.shape[1]
                
            accuracy = correctPred / trX.shape[0]
            print(f"Epoch: {epoch}, Accuracy: {accuracy}")
            
            

In [None]:
# initialize the network
N = 50
learning_rate = 0.05
mini_batch_size = 30
num_epochs = 50
rnn = RNN(trX, trY, tstX, tstY, N, learning_rate, mini_batch_size, num_epochs)
y, p = rnn.forward_pass(trX[0])
print(y[0].shape)
print(p[0].shape)

