In [157]:
import numpy as np
import h5py
import matplotlib.pyplot as plt

In [158]:
# Load and open the file containing the data
myFile = h5py.File('data-Mini Project 2.h5', 'r+')

# List all groups in the .h5 file
print(f"Keys: {myFile.keys()}")

Keys: <KeysViewHDF5 ['trX', 'trY', 'tstX', 'tstY']>


In [159]:
# print the information about the keys in the data
for key in myFile.keys():
    print(myFile[key])

<HDF5 dataset "trX": shape (3000, 150, 3), type "<f8">
<HDF5 dataset "trY": shape (3000, 6), type "<f8">
<HDF5 dataset "tstX": shape (600, 150, 3), type "<f8">
<HDF5 dataset "tstY": shape (600, 6), type "<f8">


In [160]:
# Extract the data from the file as numpy arrays

n1 = myFile.get('trX')  # trX is the training data
trX = np.array(n1)
print(f"trX shape: {trX.shape}, dtype: {trX.dtype}")

n1 = myFile.get('trY')  # trY is the training labels
trY = np.array(n1)
print(f"trY shape: {trY.shape}, dtype: {trY.dtype}")

n1 = myFile.get('tstX') # tstX is the test data
tstX = np.array(n1)
print(f"tstX shape: {tstX.shape}, dtype: {tstX.dtype}")

n1 = myFile.get('tstY') # tstY is the test labels
tstY = np.array(n1)
print(f"tstY shape: {tstY.shape}, dtype: {tstY.dtype}")

trX shape: (3000, 150, 3), dtype: float64
trY shape: (3000, 6), dtype: float64
tstX shape: (600, 150, 3), dtype: float64
tstY shape: (600, 6), dtype: float64


In [161]:
myFile.close()

In [162]:
# add the average of each example for each sensor over 150 time steps as the 151st time step

# trX = np.concatenate((trX, np.mean(trX, axis=1, keepdims=True)), axis=1)
# print(f"trX shape: {trX.shape}, dtype: {trX.dtype}")

# tstX = np.concatenate((tstX, np.mean(tstX, axis=1, keepdims=True)), axis=1)
# print(f"tstX shape: {tstX.shape}, dtype: {tstX.dtype}")


In [163]:
# hidden layer activation function
def tanh_activation(x):
    return np.tanh(x)

# sigmoid activation function
def sigmoid_activation(x):
    return 1 / (1 + np.exp(-x))


In [164]:
class RNN:
    def __init__(self, trX, trY, tstX, tstY, N, learning_rate, mini_batch_size, num_epochs):
        # initialize the data
        self.trX = trX  # 3000 x 150 x 3
        self.trY = trY  # 3000 x 6
        self.tstX = tstX    # 600 x 150 x 3
        self.tstY = tstY    # 600 x 6
        
        # add the bias to the data
        self.trX = np.concatenate((self.trX, np.ones((self.trX.shape[0], self.trX.shape[1], 1))), axis=2)   # 3000 x 150 x 4
        self.tstX = np.concatenate((self.tstX, np.ones((self.tstX.shape[0], self.tstX.shape[1], 1))), axis=2)   # 600 x 150 x 4

        # initialize the hyperparameters
        self.N = N
        self.learning_rate = learning_rate
        self.mini_batch_size = mini_batch_size
        self.num_epochs = num_epochs

        # initialize the weights and biases
        self.Whh = np.random.uniform(-0.1, 0.1, (self.N, self.N))   # N x N
        self.W1h = np.random.uniform(-0.1, 0.1, (self.N, 3+1))  # N x 4
        self.Who = np.random.uniform(-0.1, 0.1, (6, self.N+1))  # 6 x (N+1)

    # forward pass
    def forward_pass(self, x):
        # x is a 150x4 vector where the first 3 elements are the sensor data and the last element is the bias with 150 time steps
        # initialize hidden layer
        h = np.zeros((self.N, 1))   # N x 1
        # initialize hidden layer output
        h_out = np.zeros((self.N, 1))   # N x 1
        # initialize output layer
        y = np.zeros((6, 1))

        # loop over the time steps
        for t in range(x.shape[0]):
            # update the hidden layer
            ara = np.matmul(self.W1h, x[t].reshape(-1, 1))
            h = np.matmul(self.Whh, h) + ara
            # update the hidden layer output
            h_out = tanh_activation(h)
            # add the bias to the hidden layer output
            h_out = np.concatenate((h_out, np.ones((1, 1))), axis=0)
            # update the output layer
            y = np.matmul(self.Who, h_out)
            # apply the sigmoid activation function to the output layer
            y = sigmoid_activation(y)

        return y
    
    def cross_entropy_loss(self, y, pred):
        # calculate the cross entropy loss
        loss = -np.sum(np.multiply(y, np.log(pred)) + np.multiply((1 - y), np.log(1 - pred)))
        return loss

    # backpropagation
    def backpropagation(self, x, y, pred):
        # initialize the gradients
        dW1h = np.zeros_like(self.W1h)
        dWhh = np.zeros_like(self.Whh)
        dWho = np.zeros_like(self.Who)
        
        # initialize the hidden layer
        h = np.zeros((self.N, 1))
        
        
        
        for t in reversed(range(x.shape[0])):
            
            

In [None]:
# backward pass
    def backward_pass(self, x, y, pred):
        # initialize the error in the output layer
        delta_o = np.zeros((6, 1))
        # initialize the error in the hidden layer
        delta_h = np.zeros((self.N, 1))

        # loop over the time steps
        for t in reversed(range(x.shape[1])):
            # calculate the error in the output layer
            delta_o = (pred[:, t].reshape(-1, 1) - y[:, t].reshape(-1, 1)) * pred[:, t].reshape(-1, 1) * (1 - pred[:, t].reshape(-1, 1))
            # calculate the error in the hidden layer
            delta_h = np.dot(self.Who.T) * delta_o + np.dot(self.Whh.T, delta_h)

# Backpropagation Through Time (BPTT) function
def bptt(trX, trY, W1h, Whh, Who, learning_rate, N):
    hidden_states, outputs = forward_pass(trX, W1h, Whh, Who, N)
    training_loss = cross_entropy_loss(outputs, trY)  # Compute training loss
    dL_doutputs = outputs - trY

    dW1h = np.zeros_like(W1h)
    dWhh = np.zeros_like(Whh)
    dWho = np.zeros_like(Who)

    for t in reversed(range(trX.shape[1])):
        dWho += np.dot(dL_doutputs.T, hidden_states)
        dhidden = np.dot(dL_doutputs, Who) * dtanh(hidden_states)  # dtanh for derivative of tanh

        for bptt_step in reversed(range(max(0, t - backprop_truncate), t+1)):
            dWhh += np.dot(dhidden.T, hidden_states)
            Xt_bias = np.hstack((trX[:, bptt_step, :], np.ones((trX.shape[0], 1))))
            dW1h += np.dot(dhidden.T, Xt_bias)
            dhidden = np.dot(dhidden, Whh) * dtanh(hidden_states)  # dtanh for derivative of tanh

    W1h -= learning_rate * dW1h
    Whh -= learning_rate * dWhh
    Who -= learning_rate * dWho

    return W1h, Whh, Who, training_loss

In [170]:
# initialize the network
N = 50
learning_rate = 0.05
mini_batch_size = 30
num_epochs = 50
rnn = RNN(trX, trY, tstX, tstY, N, learning_rate, mini_batch_size, num_epochs)
y = rnn.forward_pass(rnn.trX[453])
print(f"y: {y}, shape: {y.shape}")



y: [[0.50517189]
 [0.48755945]
 [0.51946932]
 [0.50987327]
 [0.52375783]
 [0.46712384]], shape: (6, 1)


In [90]:
# implement the training loop and test the network on the test data
# initialize the training loss
tr_loss = np.zeros((num_epochs, 1))
# initialize the test loss
tst_loss = np.zeros((num_epochs, 1))

# initialize the network
N = 50
learning_rate = 0.05
mini_batch_size = 30
num_epochs = 50
rnn = RNN(trX, trY, tstX, tstY, N, learning_rate, mini_batch_size, num_epochs)

# loop over the epochs
for epoch in range(num_epochs):
    # initialize the training loss for this epoch
    tr_loss_epoch = 0
    # initialize the test loss for this epoch
    tst_loss_epoch = 0

    # loop over the training examples
    for i in range(trX.shape[0]):
        # forward pass
        y_hat = rnn.forward_pass(trX[i, :, :])
        # backward pass
        rnn.backward_pass(trX[i, :, :], trY[i, :, :], y_hat)
        # update the weights and biases
        Who = Who - learning_rate * rnn.delta_o * rnn.h.T
        W1h = W1h - learning_rate * rnn.delta_h * rnn.h_out.T
        Whh = Whh - learning_rate * rnn.delta_h * rnn.h.T

        # calculate the training loss for this example
        tr_loss_epoch = tr_loss_epoch + np.sum(np.square(trY[i, :, :] - y_hat))
        # calculate the test loss for this example
        tst_loss_epoch = tst_loss_epoch + np.sum(np.square(tstY[i, :, :] - y_hat))

    # calculate the average training loss for this epoch
    tr_loss[epoch] = tr_loss_epoch / trX.shape[0]
    # calculate the average test loss for this epoch
    tst_loss[epoch] = tst_loss_epoch / tstX.shape[0]

    # print the training and test loss for this epoch
    print(f"Epoch: {epoch}, Training Loss: {tr_loss[epoch]}, Test Loss: {tst_loss[epoch]}")

ValueError: shapes (50,4) and (150,1) not aligned: 4 (dim 1) != 150 (dim 0)