# Convolutional LSTM for action prediction

- keep track of best validation value + Save checkpoints with information (autosave)
- clean naming (of how models are saved, configs are saved, runs are called)
- confusion matrix of result / comparison with baseline LSTM (that only takes in actions)
- wandb/ tensorboard integration (also for gradient information?)
- hydra integration for hyperparameters?

### Hyperparameters

In [11]:
# preprocessing
seq_length = 20
training_set_size = 0.67

# lstm configuration
input_size = 960
hidden_size = 20
num_layers = 1
num_classes = 1

# training
num_epochs = 2000
learning_rate = 0.01

### Imports

In [2]:
import pickle
import hydra
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
from torchinfo import summary

from tqdm.auto import tqdm

# own
import common.action as action
import common.world as world
import common.plot as plot
import common.preprocess as preprocess
import common.nets as nets
import common.train as train
import common.tools as tools

  if LooseVersion(mpl.__version__) >= "3.0":
  other = LooseVersion(other)
  if not hasattr(tensorboard, '__version__') or LooseVersion(tensorboard.__version__) < LooseVersion('1.15'):


### Load datasets

In [3]:
with open('datasets/oracle_data.pickle', 'rb') as handle:
    oracle_data = pickle.load(handle)

with open('datasets/oracle_reversed_data.pickle', 'rb') as handle:
    oracle_reversed_data = pickle.load(handle)

with open('datasets/oracle_random_data.pickle', 'rb') as handle:
    oracle_random_data = pickle.load(handle)

### Preprocess data

In [4]:
def sliding_windows(dataset, seq_length):    
    x_actions = []
    y_actions = []

    actions = dataset['actions']
    imgs = dataset['observations']
    
    # preprocess actions
    actions = [[i] for i in actions]
    
    # preprocess images
    x_imgs = []
    for img in imgs:
        img = torch.from_numpy(img).float()
        img = img.permute(2, 0, 1)
        x_imgs.append(img)
    x_imgs_processed = torch.stack(x_imgs)
    
    # actual sliding window
    x_imgs = []
    for i in range(len(actions)-seq_length-1):
        _x_actions = actions[i:(i+seq_length)]
        _x_imgs = x_imgs_processed[i:(i+seq_length)]
        _y_actions = actions[i+1+seq_length] # _y = data[i+seq_length]
        
        x_actions.append(_x_actions)
        x_imgs.append(_x_imgs)
        y_actions.append(_y_actions)
        
    x_imgs = torch.stack(x_imgs)

    return np.array(x_actions), x_imgs, np.array(y_actions) # train, val. data

x_acts, x_imgs, y_acts = sliding_windows(oracle_data, seq_length)

In [5]:
from torch.autograd import Variable

train_size = int(len(y_acts) * training_set_size)
test_size = len(y_acts) - train_size

# (full) data set
dataX_acts = Variable(torch.Tensor(np.array(x_acts)))
dataX_imgs = Variable(torch.Tensor(x_imgs))
dataY_acts = Variable(torch.Tensor(np.array(y_acts)))

# training set
trainX_acts = Variable(torch.Tensor(np.array(x_acts[0:train_size])))
trainX_imgs = Variable(torch.Tensor(np.array(x_imgs[0:train_size])))
trainY_acts = Variable(torch.Tensor(np.array(y_acts[0:train_size])))

# validation set
testX_acts = Variable(torch.Tensor(np.array(x_acts[train_size:len(x_acts)])))
testX_imgs = Variable(torch.Tensor(np.array(x_imgs[train_size:len(x_imgs)])))
testY_acts = Variable(torch.Tensor(np.array(y_acts[train_size:len(y_acts)])))

In [6]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # newly initialized only after each epoch
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        h_out = h_out.view(-1, self.hidden_size)
        out = self.fc(h_out)
        
        return out

In [7]:
class CNN(nn.Module):
    
    def __init__(self, seq_length):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, seq_length, 5)
        self.conv2 = nn.Conv2d(seq_length, seq_length*2, 5)
        self.conv3 = nn.Conv2d(seq_length*2, seq_length*3, 5)
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, i):
        x = i.reshape(-1, i.shape[2], i.shape[3], i.shape[4]) # merges batch and length dimension
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(i.shape[0], i.shape[1], -1)
        return x

In [19]:
cnn = CNN(seq_length)

input_size = 960

lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

features = cnn(trainX_imgs)
outputs = lstm(features)

print('CNN input shape:', trainX_imgs.size())
print('CNN output shape:', features.size())

print('LSTM input shape:', features.size())
print('LSTM output shape:', outputs.size())

print('Label shape:', trainY_acts.size())

CNN input shape: torch.Size([786, 20, 3, 32, 32])
CNN output shape: torch.Size([786, 20, 960])
LSTM input shape: torch.Size([786, 20, 960])
LSTM output shape: torch.Size([786, 1])
Label shape: torch.Size([786, 1])


### Network training

In [21]:
input_size = 1
lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

criterion = torch.nn.MSELoss() # MSELoss - regression, CrossEntropyLoss for labels
#params = list(cnn.parameters()) + list(lstm.parameters())
#optimizer = torch.optim.Adam(params, lr=learning_rate)
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

# full batch training likely is too slow!
for epoch in range(num_epochs):
    #features = net_cnn(trainX_imgs)
    #features = features #+ 0.01*trainX_acts
    features = trainX_acts
    outputs = lstm(features)
    optimizer.zero_grad()
    
    # obtain the loss function
    loss = criterion(outputs, trainY_acts)
    
    loss.backward()
    
    optimizer.step()
    if epoch % 200 == 0:
        print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

Epoch: 0, loss: 3.16080
Epoch: 200, loss: 0.14590
Epoch: 400, loss: 0.13812


KeyboardInterrupt: 

In [None]:
criterion = torch.nn.MSELoss() # MSELoss - regression, CrossEntropyLoss for labels
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    outputs = lstm(trainX_acts) # we do not need to pass around the states since we train full batch
    optimizer.zero_grad()
    
    # obtain the loss function
    loss = criterion(outputs, trainY_acts)
    
    loss.backward()
    
    optimizer.step()
    if epoch % 200 == 0:
        print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

### Plotting results

In [None]:
from matplotlib import pyplot as plt

lstm.eval()
train_predict = lstm(dataX_acts)

data_predict = train_predict.data.numpy()
dataY_plot = dataY_acts.data.numpy()

#data_predict = sc.inverse_transform(data_predict)
#dataY_plot = sc.inverse_transform(dataY_plot)

plt.axvline(x=train_size, c='r', linestyle='--')

plt.plot(dataY_plot, 'o', markersize=10)
plt.plot(data_predict, 'o', markersize=5)
plt.suptitle('Time-Series Prediction')
plt.show()

### Initialize models

In [None]:
# https://stackoverflow.com/questions/52138290/how-can-we-define-one-to-one-one-to-many-many-to-one-and-many-to-many-lstm-ne

# enable model to work with different sequence lengths
# wandb and tensorboard integration

# write model summary into runs folder

### Tensorboard integrations

In [None]:
%load_ext tensorboard
%tensorboard --logdir=runs

In [None]:
# hydra integration for hyperparameters
import hydra
from omegaconf import OmegaConf
import datetime


hydra.core.global_hydra.GlobalHydra.instance().clear()

hydra.initialize(version_base=None, config_path='conf') # Assume the configuration file is in the current folder
cfg = hydra.compose(config_name='config')

print(cfg)
print(cfg.params.lr)

print(OmegaConf.to_yaml(cfg))

local_time = str(datetime.datetime.now().isoformat())
name = local_time + '_configs'
OmegaConf.save(cfg, "runs/"+name)

### Train model

In [None]:
# track best validation value
# save model during training

# Print model's state_dict
print("Model's state_dict:")
for param_tensor in DualInput_model.state_dict():
    print(param_tensor, "\t", DualInput_model.state_dict()[param_tensor].size())

# Print optimizer's state_dict
print("\nOptimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

### Plot loss and accuracy curves

In [None]:
# look into confusion matrix