In [1]:
# Import libraries
import numpy as np 
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# Prepare Dataset
# load data

# relative path to npz files
path = 'Measurements'
file_name = 'output_batch_%d.npz'

# Training
# how many frames to load
frames_num = 10
data_frames = []
data_forces = []
for i in range(frames_num):
    file_path = os.path.join(path,file_name %i)
    data = np.load(file_path)
    data_frames.append(data['frames'])
    data_forces.append(data['forces'])

combined_frames = np.concatenate(data_frames, axis=0)
combined_forces = np.concatenate(data_forces, axis=0)

# Test
# file_path = os.path.join(path, file_name %11)
# test_data = np.load(file_path)

In [3]:
# train test split. Size of train data is 10/11 and size of test data is 1/11 
# features_train, features_test, targets_train, targets_test = train_test_split(combined_frames,
#                                                                              combined_forces[:,2],
#                                                                              test_size = (1/11),
#                                                                              random_state = 42) 
# train_test_split NOT RECOMMENDED, because it shuffles temporally dependent data
features_train = combined_frames
targets_train = combined_forces[:,2]
# features_test = test_data['frames']
# targets_test = test_data['forces']
# targets_test = targets_test[:,2]

# create feature and targets tensor for train set. As you remember we need variable to accumulate gradients. Therefore first we create tensor, then we will create variable
featuresTrain = torch.from_numpy(features_train)
targetsTrain = torch.from_numpy(targets_train)

# create feature and targets tensor for test set. 
# featuresTest = torch.from_numpy(features_test)
# targetsTest = torch.from_numpy(targets_test)

In [4]:
# Create RNN Model
class RNNModel(nn.Module):
    def __init__(self, input_channels, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        
        # Convolutional layers
        self.conv1 = nn.Conv2d(input_channels, 9, kernel_size = 5, stride = 1, padding = 2)
        self.pool = nn.MaxPool2d(kernel_size = 4, stride = 4)
        self.conv2 = nn.Conv2d(9, 18, kernel_size = 5, stride = 1, padding = 2) # If needed
        
        # RNN
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn = nn.RNN(18 * 16 * 30, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu')
        
        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        # Convolutional layers
        x = self.pool(nn.ELU()(self.conv1(x)))
        x = self.pool(nn.ELU()(self.conv2(x))) # If needed
        
        # Reshape for RNN
        x = torch.reshape(x, (-1, 1, 18 * 16 * 30))  # Reshape to (batch_size, seq_len, input_size)
        print("Size of x:", x.size())  # Print size of x
        
        # RNN
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        #print("Size of h0:", h0.size())  # Print size of h0 - Debugging - Obsolete
        out, hn = self.rnn(x, h0)
        
        # Output layer
        out = self.fc(out[:, -1, :]) 
        return out


In [5]:
# batch_size, epoch and iteration
batch_size = 1000
num_epochs = 10

# Pytorch train and test sets
train = TensorDataset(featuresTrain,targetsTrain)
# test = TensorDataset(featuresTest,targetsTest)

# data loader
train_loader = DataLoader(train, batch_size = batch_size, shuffle = False)
# test_loader = DataLoader(test, batch_size = batch_size, shuffle = False)
    
# Create RNN
input_channels = 3  # RGB channels
hidden_dim = 100  # hidden layer dimension
layer_dim = 1     # number of hidden layers
output_dim = 1   # output dimension

model = RNNModel(input_channels, hidden_dim, layer_dim, output_dim)

# Define your loss function
error = nn.MSELoss()

# Define your optimizer
learning_rate = 0.0001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [6]:
# Training
seq_dim = 50 # Consider the whole batch to be temporally correlated
loss_list = []
iteration_list = []
accuracy_list = []
count = 0
for epoch in range(num_epochs):
    print('~~~BEGINNING OF DATASET~~~')
    for i, (images, labels) in enumerate(train_loader):
        images = images.float()
        # print(images.shape)  # Add this line to check the shape of images - Debugging purposes
        images = images.permute(0, 3, 1, 2)
            
        # Clear gradients
        optimizer.zero_grad()
        
        # Forward propagation
        outputs = model(images)
        outputs = torch.squeeze(outputs)
        
        # Calculate loss
        loss = error(outputs, labels.float())
        
        # Backpropagation
        loss.backward()
        
        # Update parameters
        optimizer.step()
        
        count += 1
            
        # Store loss and iteration
        loss_list.append(loss.data)
        # Print Loss
        if count % 1 == 0: # for now print for every iteration
            print('Iteration: {}  Loss: {}'.format(count, loss.data.item()))

~~~BEGINNING OF DATASET~~~
Size of x: torch.Size([1000, 1, 8640])
Iteration: 1  Loss: 1681.076171875
Size of x: torch.Size([1000, 1, 8640])
Iteration: 2  Loss: 141.35411071777344
Size of x: torch.Size([1000, 1, 8640])
Iteration: 3  Loss: 145.57406616210938
Size of x: torch.Size([1000, 1, 8640])
Iteration: 4  Loss: 607.0349731445312
Size of x: torch.Size([1000, 1, 8640])
Iteration: 5  Loss: 654.3024291992188
Size of x: torch.Size([1000, 1, 8640])
Iteration: 6  Loss: 386.0529479980469
Size of x: torch.Size([1000, 1, 8640])
Iteration: 7  Loss: 117.34428405761719
Size of x: torch.Size([1000, 1, 8640])
Iteration: 8  Loss: 5.088362216949463
Size of x: torch.Size([1000, 1, 8640])
Iteration: 9  Loss: 37.02680587768555
Size of x: torch.Size([1000, 1, 8640])
Iteration: 10  Loss: 162.85350036621094
~~~BEGINNING OF DATASET~~~
Size of x: torch.Size([1000, 1, 8640])
Iteration: 11  Loss: 277.4777526855469
Size of x: torch.Size([1000, 1, 8640])
Iteration: 12  Loss: 259.69415283203125
Size of x: torch.

In [7]:
print(loss_list)
print(np.sqrt(np.mean(loss_list[-10:])))

[tensor(1681.0762), tensor(141.3541), tensor(145.5741), tensor(607.0350), tensor(654.3024), tensor(386.0529), tensor(117.3443), tensor(5.0884), tensor(37.0268), tensor(162.8535), tensor(277.4778), tensor(259.6942), tensor(181.8954), tensor(73.6506), tensor(12.5093), tensor(4.8440), tensor(33.5562), tensor(74.4556), tensor(114.4628), tensor(96.4539), tensor(56.1740), tensor(28.4306), tensor(6.4673), tensor(2.1124), tensor(10.4384), tensor(29.2021), tensor(43.7554), tensor(45.6422), tensor(25.5744), tensor(18.1954), tensor(10.0412), tensor(2.3739), tensor(4.8093), tensor(16.0103), tensor(23.8507), tensor(22.4510), tensor(15.6177), tensor(9.0392), tensor(5.2935), tensor(2.3339), tensor(7.2790), tensor(9.9613), tensor(12.0886), tensor(8.1535), tensor(5.5378), tensor(4.2463), tensor(2.4785), tensor(2.7514), tensor(5.0480), tensor(5.2774), tensor(3.9240), tensor(4.2950), tensor(3.1567), tensor(3.0832), tensor(2.1783), tensor(2.4627), tensor(3.3121), tensor(4.8103), tensor(2.1949), tensor(3.4

In [8]:
print(outputs) # Small sanity check

tensor([-2.0124e+00, -1.6255e+00, -2.4482e+00, -2.2931e+00, -2.1744e+00,
        -1.8728e+00, -2.1315e+00, -1.9730e+00, -1.7552e+00, -1.6828e+00,
        -1.6514e+00, -2.0475e+00, -2.1053e+00, -1.4348e+00, -2.5052e+00,
        -1.6313e+00, -1.8625e+00, -2.6759e+00, -1.5475e+00, -1.6707e+00,
        -1.3077e+00, -1.9264e+00, -1.5310e+00, -1.3943e+00, -2.7603e+00,
        -1.6588e+00, -1.5918e+00, -2.4011e+00, -2.9571e+00, -2.4022e+00,
        -2.2389e+00, -2.9246e+00, -2.0929e+00, -1.0412e+00, -2.5827e+00,
        -1.9896e+00, -1.9685e+00, -2.5351e+00, -1.8889e+00, -2.0059e+00,
        -1.1405e+00, -1.8913e+00, -1.6616e+00, -2.4287e+00, -2.0257e+00,
        -1.3838e+00, -1.6378e+00, -2.4027e+00, -1.6009e+00, -1.6436e+00,
        -1.9559e+00, -1.5582e+00, -1.6805e+00, -2.5315e+00, -2.2803e+00,
        -3.1635e+00, -1.6446e+00, -2.0440e+00, -1.4603e+00, -2.2316e+00,
        -1.1591e+00, -1.2438e+00, -2.4220e+00, -1.8457e+00, -1.8852e+00,
        -2.0594e+00, -2.0096e+00, -1.8593e+00, -2.1

In [9]:
print(labels.float())

tensor([-1.0797, -0.9949, -0.9238, -0.8581, -0.7988, -0.7402, -0.6864, -0.6413,
        -0.6007, -0.5637, -0.5303, -0.5034, -0.4781, -0.4516, -0.4438, -0.4478,
        -0.4525, -0.4622, -0.4775, -0.4831, -0.4824, -0.4824, -0.4840, -0.4829,
        -0.4796, -0.4784, -0.4745, -0.4723, -0.4696, -0.4686, -0.4738, -0.4907,
        -0.5179, -0.5654, -0.6264, -0.6943, -0.7702, -0.8488, -0.9312, -1.0187,
        -1.1152, -1.2284, -1.3486, -1.4846, -1.6458, -1.8335, -2.0371, -2.2583,
        -2.4803, -2.6908, -2.8747, -3.0401, -3.1835, -3.2991, -3.3773, -3.4140,
        -3.4004, -3.3335, -3.2181, -3.0745, -2.9110, -2.7374, -2.5585, -2.3805,
        -2.1968, -2.0178, -1.8431, -1.6839, -1.5378, -1.3989, -1.2665, -1.1307,
        -0.9906, -0.8688, -0.7793, -0.7269, -0.7026, -0.7065, -0.7217, -0.7390,
        -0.7422, -0.7402, -0.7296, -0.7031, -0.6639, -0.6237, -0.5844, -0.5494,
        -0.5165, -0.4928, -0.4744, -0.4658, -0.4598, -0.4682, -0.4804, -0.4992,
        -0.5257, -0.5651, -0.6205, -0.69

Update (5 epochs): Semi-automated data loading procedure and additional convolutional-pooling layer to the CNN, reducing the output nodes that are fed to RNN by a factor of 10. Learning rate reduced tenfold (1e-4). Run over 5 epochs shows significant improvement. Root of average MSE in last epoch is 2.07, at that instance it seems it could be further lowered by learning.
Note: Assuming values are equally distributed between -3 and 0, a random force predictor would have the metric above equal to 1.5 . Below 1.5 is needed in a test setting means so that the force predictor is relevant. Still, significant improvement must be noted.

Update (10 epochs): Average RMSE of 1.43 after 10 epochs, plateau reached but (in theory) better than a random guesser. Improvements to be considered: sequence length (e.g. 100 frames), reducing input resolution, adding 1 more conv&pool layer, increasing hidden state size of RNN.