# Convolutional LSTM for action prediction

- keep track of best validation value + Save checkpoints with information (autosave)
- clean naming (of how models are saved, configs are saved, runs are called)
- confusion matrix of result / comparison with baseline LSTM (that only takes in actions)
- wandb/ tensorboard integration (also for gradient information?)
- hydra integration for hyperparameters?

### Imports

In [None]:
import pickle
import hydra
import datetime
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.autograd import Variable
from torchinfo import summary

from tqdm.auto import tqdm

from omegaconf import OmegaConf

# own
import common.action as action
import common.world as world
import common.plot as plot
import common.preprocess as preprocess
import common.nets as nets
import common.train as train
import common.tools as tools

### Hyperparameters

In [7]:
# input size is 960 or 1
train_imgs = False

# preprocessing
seq_length = 20
training_set_size = 0.67

# lstm configuration
hidden_size = 20
num_layers = 1
num_classes = 4
if train_imgs:
    input_size = 960
else:
    input_size = 1
# training
num_epochs = 2000
learning_rate = 0.01
optimizer_type = 'Adam'

In [17]:
# hydra integration for hyperparameters
hydra.core.global_hydra.GlobalHydra.instance().clear()

hydra.initialize(version_base=None, config_path='conf') # Assume the configuration file is in the current folder
cfg = hydra.compose(config_name='config')
# Can be used in the following way: cfg.params.learning_rate 
print(OmegaConf.to_yaml(cfg))

params:
  train_imgs: false
  seq_length: 20
  batch_size: 128
  training_set_size: 0.67
  hidden_size: 20
  num_layers: 1
  num_classes: 4
  input_size: 1
  num_epochs: 2000
  learning_rate: 0.01
  optimizer_type: Adam
  save_model: false
  save_plots: false



### Load datasets

In [9]:
with open('datasets/oracle_data.pickle', 'rb') as handle:
    oracle_data = pickle.load(handle)

with open('datasets/oracle_reversed_data.pickle', 'rb') as handle:
    oracle_reversed_data = pickle.load(handle)

with open('datasets/oracle_random_data.pickle', 'rb') as handle:
    oracle_random_data = pickle.load(handle)

### Preprocess data

In [10]:
# preprocess with sequence length
x_acts, x_imgs, y_acts = preprocess.sliding_windows(oracle_data, seq_length)
# data, train, test split
data, train, test = preprocess.split(x_acts, x_imgs, y_acts, training_set_size)
dataX_acts, dataX_imgs, dataY_acts = data
trainX_acts, trainX_imgs, trainY_acts = train
testX_acts, testX_imgs, testY_act = test

### Initialize models

In [13]:
cnn = nets.CNN(seq_length)
lstm = nets.LSTM(num_classes, input_size, hidden_size, num_layers, seq_length)

if train_imgs:
    features = cnn(trainX_imgs)
else:
    features = trainX_acts

outputs = lstm(features)

print('CNN input shape:', trainX_imgs.size())
print('CNN output shape:', features.size())

print('LSTM input shape:', features.size())
print('LSTM output shape:', outputs.size())

print('Label shape:', trainY_acts.size())

print('SUMMARY CNN \n', summary(cnn, trainX_imgs.size()), '\n')
print('SUMMARY LSTM \n', summary(lstm, (outputs.size(), ((2, 10), (2, 10,)))))

CNN input shape: torch.Size([786, 20, 3, 32, 32])
CNN output shape: torch.Size([786, 20, 1])
LSTM input shape: torch.Size([786, 20, 1])
LSTM output shape: torch.Size([786, 4])
Label shape: torch.Size([786, 4])
SUMMARY CNN 
Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      [786, 20, 960]            --
├─Conv2d: 1-1                            [15720, 20, 28, 28]       1,520
├─Conv2d: 1-2                            [15720, 40, 24, 24]       20,040
├─MaxPool2d: 1-3                         [15720, 40, 12, 12]       --
├─Conv2d: 1-4                            [15720, 60, 8, 8]         60,060
├─MaxPool2d: 1-5                         [15720, 60, 4, 4]         --
Total params: 81,620
Trainable params: 81,620
Non-trainable params: 0
Total mult-adds (G): 260.61
Input size (MB): 193.17
Forward/backward pass size (MB): 5352.35
Params size (MB): 0.33
Estimated Total Size (MB): 5545.84



TypeError: rand() received an invalid combination of arguments - got (tuple, tuple), but expected one of:
 * (tuple of ints size, *, torch.Generator generator, tuple of names names, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (tuple of ints size, *, torch.Generator generator, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (tuple of ints size, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (tuple of ints size, *, tuple of names names, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)


### Tensorboard integrations

In [None]:
from torch.utils.tensorboard import SummaryWriter

%load_ext tensorboard
%tensorboard --logdir=runs

### Network training

In [None]:
# wandb and tensorboard integration
# write model summary/ parameter settings into runs folder

# track best validation value
# save model during training

# full batch training likely is too slow (?)
"""
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in DualInput_model.state_dict():
    print(param_tensor, "\t", DualInput_model.state_dict()[param_tensor].size())

# Print optimizer's state_dict
print("\nOptimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])
"""

In [20]:
if train_imgs:
    params = list(cnn.parameters()) + list(lstm.parameters())
else:
    params = lstm.parameters()

# MSELoss - regression, CrossEntropyLoss for labels or BCEWithLogitsLoss
criterion = torch.nn.CrossEntropyLoss()

if optimizer_type == 'Adam':
    optimizer = torch.optim.Adam(params, lr=learning_rate)

num_epochs = 10

# save current conf infos
#local_time = str(datetime.datetime.now().isoformat())
#name = local_time + '_configs'
#OmegaConf.save(cfg, "runs/"+name)

with tqdm(total=num_epochs, unit =" Episode", desc ="Progress") as pbar:
    
    # tracking results
    train_loss_lst, test_loss_lst = [], []
    train_acc_lst, test_acc_lst = [], []
    test_best_acc = 100
    writer = SummaryWriter()

    for epoch in range(num_epochs):
        
        train_loss, test_loss = 0, 0
        
        # choose training style
        if train_imgs:
            features = cnn(trainX_imgs)
        else:
            features = trainX_acts
            
        # model
        outputs = lstm(features)
        optimizer.zero_grad()

        # loss + optimize
        loss = criterion(outputs, trainY_acts)
        loss.backward()
        optimizer.step()
        
        with torch.no_grad():
            pass
                
        # display
        if epoch % 1 == 0:
            print("Epoch: %d - Train Loss: %1.3f" % (epoch, loss.item()))
        
        # tensorboard logs
        
        # plotting logs
            
        pbar.update(1)
    
    writer.close()
    print('Finished Training')

Progress:   0%|          | 0/10 [00:00<?, ? Episode/s]

Epoch: 0 - Train Loss: 0.487
Epoch: 1 - Train Loss: 0.490
Epoch: 2 - Train Loss: 0.468
Epoch: 3 - Train Loss: 0.467
Epoch: 4 - Train Loss: 0.458
Epoch: 5 - Train Loss: 0.444
Epoch: 6 - Train Loss: 0.437
Epoch: 7 - Train Loss: 0.431
Epoch: 8 - Train Loss: 0.421
Epoch: 9 - Train Loss: 0.411
Finished Training


### Plotting results

In [None]:
def recode_one_hot(data_predict):
    new_data_predict = []
    for i in data_predict:
        if str(i) != '[0. 0. 0. 0.]':
            new_data_predict.append(np.where(i==1)[0][0])
        else:
            new_data_predict.append(None)
    return new_data_predict

In [None]:
from matplotlib import pyplot as plt

lstm.eval()

train_imgs = True
if train_imgs:
    features = cnn(trainX_imgs)
    features = features #+ 0.01*trainX_acts
    train_predict = lstm(features)
else:
    train_predict = lstm(trainX_acts)

t = Variable(torch.Tensor([0]))  # threshold
train_predict = (train_predict > t).float() * 1

data_predict = train_predict.data.numpy()
dataY_plot = dataY_acts.data.numpy()
 
new_data_predict = recode_one_hot(data_predict)
new_dataY_plot = [np.where(r==1)[0][0] for r in dataY_plot]

with plt.style.context('ggplot'):
    plt.figure(figsize=(12, 7))
    plt.rcParams.update({'font.size': 16})
    plt.axvline(int(len(y_acts) * training_set_size), c='r', linestyle='--')

    plt.plot(new_dataY_plot, 'o', color='slategray', markersize=10, label="Ground truth")
    plt.plot(new_data_predict, 'o', markersize=5, label="Predicted")
    
    plt.legend(loc="lower right")
    plt.title('Action sequence prediction')
    plt.xlabel('Sequence')
    plt.ylabel('Action')
    plt.show()

### Plot loss and accuracy curves

In [None]:
# look into confusion matrix