In [1]:
import torch
import os
from torch.utils.data import DataLoader
import torch.optim as optim
import numpy as np
import argparse
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
from pytorch_model_summary import summary
%load_ext tensorboard
%matplotlib notebook

In [11]:
%load_ext autoreload
%autoreload 2

from unit_vector_skeleton import *
from model import *
from train import *
from test import *
from visualization import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Parameters

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lr = 0.001
epochs = 30
seq_len = 10 
pred_len = 10 
batch_size = 64
input_size = 51

## Normalized datasets and dataloaders

In [4]:
dataset_h36m_train = Unit_Vector_Skeleton_Dataset(input_n = seq_len, output_n = pred_len, split=0)
dataset_h36m_test = Unit_Vector_Skeleton_Dataset(input_n = seq_len, output_n = pred_len, split=1)
dataset_extra = Unit_Vector_Skeleton_Dataset(input_n = seq_len, output_n = pred_len, split=2)

Reading preprocessed subject 1, action walking, subaction 1
Reading preprocessed subject 1, action walking, subaction 2
Reading preprocessed subject 1, action eating, subaction 1
Reading preprocessed subject 1, action eating, subaction 2
Reading preprocessed subject 1, action smoking, subaction 1
Reading preprocessed subject 1, action smoking, subaction 2
Reading preprocessed subject 1, action discussion, subaction 1
Reading preprocessed subject 1, action discussion, subaction 2
Reading preprocessed subject 1, action directions, subaction 1
Reading preprocessed subject 1, action directions, subaction 2
Reading preprocessed subject 1, action greeting, subaction 1
Reading preprocessed subject 1, action greeting, subaction 2
Reading preprocessed subject 1, action phoning, subaction 1
Reading preprocessed subject 1, action phoning, subaction 2
Reading preprocessed subject 1, action posing, subaction 1
Reading preprocessed subject 1, action posing, subaction 2
Reading preprocessed subject 1

Reading preprocessed subject 11, action walking, subaction 1
Reading preprocessed subject 11, action walking, subaction 2
Reading preprocessed subject 11, action eating, subaction 1
Reading preprocessed subject 11, action eating, subaction 2
Reading preprocessed subject 11, action smoking, subaction 1
Reading preprocessed subject 11, action smoking, subaction 2
Reading preprocessed subject 11, action discussion, subaction 1
Reading preprocessed subject 11, action discussion, subaction 2
Reading preprocessed subject 11, action directions, subaction 1
Reading preprocessed subject 11, action directions, subaction 2
Reading preprocessed subject 11, action greeting, subaction 1
Reading preprocessed subject 11, action greeting, subaction 2
Reading preprocessed subject 11, action phoning, subaction 1
Reading preprocessed subject 11, action phoning, subaction 2
Reading preprocessed subject 11, action posing, subaction 1
Reading preprocessed subject 11, action posing, subaction 2
Reading prepro

In [5]:
train_loader = DataLoader(dataset_h36m_train, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True)
test_loader = DataLoader(dataset_h36m_test, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True)
extra_loader = DataLoader(dataset_extra, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True)

# Model with longer 1d convolution

In [7]:
model = Model(kernel_type=0, kernel_x=input_size, kernel_y=5, num_layers=5, hidden_dim=64,
                 input_size=input_size, seq_len=seq_len, pred_len=pred_len, time_hid=32, joints_hid=128)
print(summary(model, torch.zeros((batch_size, seq_len, input_size)), show_hierarchical=True))

-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Linear-1        [64, 10, 64]           3,328           3,328
       ConvBlock-2        [64, 10, 64]         115,090         115,090
       ConvBlock-3        [64, 10, 64]         115,090         115,090
       ConvBlock-4        [64, 10, 64]         115,090         115,090
       ConvBlock-5        [64, 10, 64]         115,090         115,090
       ConvBlock-6        [64, 10, 64]         115,090         115,090
          Conv1d-7        [64, 10, 64]             110             110
          Linear-8        [64, 10, 51]           3,315           3,315
Total params: 582,203
Trainable params: 582,203
Non-trainable params: 0
-----------------------------------------------------------------------



Model(
  (encoder): Linear(in_features=51, out_features=64, bias=True), 3,328 params
  (conv_blocks): Sequential(
    (0): ConvBlock(
      (n

## Train model

In [8]:
model_1d_long = Model(kernel_type=0, kernel_x=input_size, kernel_y=5, num_layers=5, hidden_dim=64,
                 input_size=input_size, seq_len=seq_len, pred_len=pred_len, time_hid=32, joints_hid=128).to(device)
train(model_1d_long, train_loader, test_loader, device, lr=lr, n_epochs=epochs, root='./runs', 
      input_n=seq_len, output_n=pred_len, autoreg=False)

Save data of the run in: ./runs\exp5
Run epoch: 0


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:49<00:00, 12.41it/s]


Run epoch: 1


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:28<00:00, 13.67it/s]


Run epoch: 2


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:29<00:00, 13.59it/s]


Run epoch: 3


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:27<00:00, 13.74it/s]


Run epoch: 4


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:33<00:00, 13.32it/s]


Run epoch: 5


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:28<00:00, 13.63it/s]


Run epoch: 6


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:26<00:00, 13.77it/s]


Run epoch: 7


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:29<00:00, 13.57it/s]


Run epoch: 8


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:22<00:00, 14.03it/s]


Run epoch: 9


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:21<00:00, 14.14it/s]


Run epoch: 10


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:48<00:00, 12.43it/s]


Run epoch: 11


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:28<00:00, 13.68it/s]


Run epoch: 12


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:21<00:00, 14.11it/s]


Run epoch: 13


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:28<00:00, 13.68it/s]


Run epoch: 14


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:27<00:00, 13.71it/s]


Epoch 00015: reducing learning rate of group 0 to 3.0000e-04.
Run epoch: 15


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:22<00:00, 14.08it/s]


Run epoch: 16


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:26<00:00, 13.80it/s]


Run epoch: 17


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:26<00:00, 13.79it/s]


Run epoch: 18


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:24<00:00, 13.93it/s]


Epoch 00019: reducing learning rate of group 0 to 9.0000e-05.
Run epoch: 19


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:22<00:00, 14.04it/s]


Run epoch: 20


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:25<00:00, 13.84it/s]


Run epoch: 21


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:23<00:00, 13.97it/s]


Epoch 00022: reducing learning rate of group 0 to 2.7000e-05.
Run epoch: 22


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:24<00:00, 13.90it/s]


Run epoch: 23


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:26<00:00, 13.82it/s]


Run epoch: 24


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:24<00:00, 13.95it/s]


Epoch 00025: reducing learning rate of group 0 to 8.1000e-06.
Run epoch: 25


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:28<00:00, 13.67it/s]


Run epoch: 26


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:24<00:00, 13.90it/s]


Run epoch: 27


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:25<00:00, 13.84it/s]


Epoch 00028: reducing learning rate of group 0 to 2.4300e-06.
Run epoch: 28


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:28<00:00, 13.68it/s]


Run epoch: 29


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:27<00:00, 13.73it/s]


## Load saved model

In [9]:
model_1d_long = Model(kernel_type=0, kernel_x=input_size, kernel_y=5, num_layers=5, hidden_dim=64,
                 input_size=input_size, seq_len=seq_len, pred_len=pred_len, time_hid=32, joints_hid=128).to(device)
model_1d_long.load_state_dict(torch.load("runs/1d_long\model.pt"))
print(summary(model_1d_long.to('cpu'), torch.zeros((batch_size, seq_len, input_size)), show_hierarchical=True))

-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Linear-1        [64, 10, 64]           3,328           3,328
       ConvBlock-2        [64, 10, 64]         115,090         115,090
       ConvBlock-3        [64, 10, 64]         115,090         115,090
       ConvBlock-4        [64, 10, 64]         115,090         115,090
       ConvBlock-5        [64, 10, 64]         115,090         115,090
       ConvBlock-6        [64, 10, 64]         115,090         115,090
          Conv1d-7        [64, 10, 64]             110             110
          Linear-8        [64, 10, 51]           3,315           3,315
Total params: 582,203
Trainable params: 582,203
Non-trainable params: 0
-----------------------------------------------------------------------



Model(
  (encoder): Linear(in_features=51, out_features=64, bias=True), 3,328 params
  (conv_blocks): Sequential(
    (0): ConvBlock(
      (n

## Test and visualize

In [10]:
train_loss, test_loss, extra_loss, train_seq, test_seq, extra_seq = test(model=model_1d_long.to(device), 
train_loader=train_loader, test_loader=test_loader, extra_loader=extra_loader, dev=device, input_n=seq_len, output_n=pred_len)
print("Train loss:", train_loss)
print("Valid loss:", test_loss)
print("Extra loss:", extra_loss)

Train loss: 0.034228477627038956
Valid loss: 0.04928528144955635
Extra loss: 0.15251636505126953


# Fine-tune the model  in autoregressive mode

In [17]:
model_1d_tune = Model(kernel_type=0, kernel_x=7, kernel_y=3, num_layers=5, hidden_dim=64,
                 input_size=input_size, seq_len=seq_len, pred_len=pred_len, time_hid=32, joints_hid=128).to(device)
model_1d_tune.load_state_dict(torch.load("runs/1d\model.pt"))
print(summary(model_1d_tune.to('cpu'), torch.zeros((batch_size, seq_len, input_size)), show_hierarchical=True))

-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Linear-1        [64, 10, 64]           3,328           3,328
       ConvBlock-2        [64, 10, 64]          54,162          54,162
       ConvBlock-3        [64, 10, 64]          54,162          54,162
       ConvBlock-4        [64, 10, 64]          54,162          54,162
       ConvBlock-5        [64, 10, 64]          54,162          54,162
       ConvBlock-6        [64, 10, 64]          54,162          54,162
          Conv1d-7        [64, 10, 64]             110             110
          Linear-8        [64, 10, 51]           3,315           3,315
Total params: 277,563
Trainable params: 277,563
Non-trainable params: 0
-----------------------------------------------------------------------



Model(
  (encoder): Linear(in_features=51, out_features=64, bias=True), 3,328 params
  (conv_blocks): Sequential(
    (0): ConvBlock(
      (n

## Train model

In [19]:
train(model_1d_tune.to(device), train_loader, test_loader, device, lr=lr, n_epochs=10, root='./runs', 
      input_n=seq_len, output_n=pred_len, autoreg=True)

Save data of the run in: ./runs\exp7
Run epoch: 0


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [12:52<00:00,  3.69it/s]


Run epoch: 1


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [14:59<00:00,  3.17it/s]


Run epoch: 2


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [16:58<00:00,  2.80it/s]


Run epoch: 3


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [16:36<00:00,  2.86it/s]


Run epoch: 4


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [18:37<00:00,  2.55it/s]


Run epoch: 5


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [21:52<00:00,  2.17it/s]


Run epoch: 6


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [22:00<00:00,  2.16it/s]


Epoch 00007: reducing learning rate of group 0 to 3.0000e-04.
Run epoch: 7


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [21:58<00:00,  2.16it/s]


Run epoch: 8


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [22:27<00:00,  2.11it/s]


Run epoch: 9


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [24:18<00:00,  1.95it/s]


# Reusing model with 1d convolution for longer prediction using sequence-to-sequence prediction

In [12]:
dataset_h36m_train_25 = Unit_Vector_Skeleton_Dataset(input_n=seq_len, output_n=25, split=0)
dataset_h36m_test_25 = Unit_Vector_Skeleton_Dataset(input_n=seq_len, output_n=25, split=1)
dataset_extra_25 = Unit_Vector_Skeleton_Dataset(input_n=seq_len, output_n=25, split=2)

Reading preprocessed subject 1, action walking, subaction 1
Reading preprocessed subject 1, action walking, subaction 2
Reading preprocessed subject 1, action eating, subaction 1
Reading preprocessed subject 1, action eating, subaction 2
Reading preprocessed subject 1, action smoking, subaction 1
Reading preprocessed subject 1, action smoking, subaction 2
Reading preprocessed subject 1, action discussion, subaction 1
Reading preprocessed subject 1, action discussion, subaction 2
Reading preprocessed subject 1, action directions, subaction 1
Reading preprocessed subject 1, action directions, subaction 2
Reading preprocessed subject 1, action greeting, subaction 1
Reading preprocessed subject 1, action greeting, subaction 2
Reading preprocessed subject 1, action phoning, subaction 1
Reading preprocessed subject 1, action phoning, subaction 2
Reading preprocessed subject 1, action posing, subaction 1
Reading preprocessed subject 1, action posing, subaction 2
Reading preprocessed subject 1

Reading preprocessed subject 11, action walking, subaction 2
Reading preprocessed subject 11, action eating, subaction 1
Reading preprocessed subject 11, action eating, subaction 2
Reading preprocessed subject 11, action smoking, subaction 1
Reading preprocessed subject 11, action smoking, subaction 2
Reading preprocessed subject 11, action discussion, subaction 1
Reading preprocessed subject 11, action discussion, subaction 2
Reading preprocessed subject 11, action directions, subaction 1
Reading preprocessed subject 11, action directions, subaction 2
Reading preprocessed subject 11, action greeting, subaction 1
Reading preprocessed subject 11, action greeting, subaction 2
Reading preprocessed subject 11, action phoning, subaction 1
Reading preprocessed subject 11, action phoning, subaction 2
Reading preprocessed subject 11, action posing, subaction 1
Reading preprocessed subject 11, action posing, subaction 2
Reading preprocessed subject 11, action purchases, subaction 1
Reading prep

In [13]:
train_loader_25 = DataLoader(dataset_h36m_train_25, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True)
test_loader_25 = DataLoader(dataset_h36m_test_25, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True)
extra_loader_25 = DataLoader(dataset_extra_25, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True)

## Load saved model

In [15]:
model_1d_tune = Model(kernel_type=0, kernel_x=7, kernel_y=3, num_layers=5, hidden_dim=64,
                 input_size=input_size, seq_len=seq_len, pred_len=pred_len, time_hid=32, joints_hid=128).to(device)
model_1d_tune.load_state_dict(torch.load("runs/1d_tune\model.pt"))
print(summary(model_1d_tune.to('cpu'), torch.zeros((batch_size, seq_len, input_size)), show_hierarchical=True))

-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Linear-1        [64, 10, 64]           3,328           3,328
       ConvBlock-2        [64, 10, 64]          54,162          54,162
       ConvBlock-3        [64, 10, 64]          54,162          54,162
       ConvBlock-4        [64, 10, 64]          54,162          54,162
       ConvBlock-5        [64, 10, 64]          54,162          54,162
       ConvBlock-6        [64, 10, 64]          54,162          54,162
          Conv1d-7        [64, 10, 64]             110             110
          Linear-8        [64, 10, 51]           3,315           3,315
Total params: 277,563
Trainable params: 277,563
Non-trainable params: 0
-----------------------------------------------------------------------



Model(
  (encoder): Linear(in_features=51, out_features=64, bias=True), 3,328 params
  (conv_blocks): Sequential(
    (0): ConvBlock(
      (n

## Test and visualize

In [20]:
train_loss, test_loss, extra_loss, train_seq, test_seq, extra_seq = test(model=model_1d_tune.to(device), 
train_loader=train_loader_25, test_loader=test_loader_25, extra_loader=extra_loader_25, dev=device, 
                                                                         input_n=seq_len, output_n=25,
                                                                         autoreg=True)
print("Train loss:", train_loss)
print("Valid loss:", test_loss)
print("Extra loss:", extra_loss)

Train loss: 0.07261426001787186
Valid loss: 0.09933522343635559
Extra loss: 0.18682482838630676


# Model trained for 5 epochs

In [21]:
model = Model(kernel_type=0, kernel_x=7, kernel_y=3, num_layers=5, hidden_dim=64,
                 input_size=input_size, seq_len=seq_len, pred_len=pred_len, time_hid=32, joints_hid=128)
print(summary(model, torch.zeros((batch_size, seq_len, input_size)), show_hierarchical=True))

-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Linear-1        [64, 10, 64]           3,328           3,328
       ConvBlock-2        [64, 10, 64]          54,162          54,162
       ConvBlock-3        [64, 10, 64]          54,162          54,162
       ConvBlock-4        [64, 10, 64]          54,162          54,162
       ConvBlock-5        [64, 10, 64]          54,162          54,162
       ConvBlock-6        [64, 10, 64]          54,162          54,162
          Conv1d-7        [64, 10, 64]             110             110
          Linear-8        [64, 10, 51]           3,315           3,315
Total params: 277,563
Trainable params: 277,563
Non-trainable params: 0
-----------------------------------------------------------------------



Model(
  (encoder): Linear(in_features=51, out_features=64, bias=True), 3,328 params
  (conv_blocks): Sequential(
    (0): ConvBlock(
      (n

## Train model

In [22]:
model_1d_5e = Model(kernel_type=0, kernel_x=7, kernel_y=3, num_layers=5, hidden_dim=64,
                 input_size=input_size, seq_len=seq_len, pred_len=pred_len, time_hid=32, joints_hid=128).to(device)
train(model_1d_5e, train_loader, test_loader, device, lr=lr, n_epochs=5, root='./runs', 
      input_n=seq_len, output_n=pred_len, autoreg=False)

Save data of the run in: ./runs\exp7
Run epoch: 0


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:42<00:00, 12.78it/s]


Run epoch: 1


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:41<00:00, 12.83it/s]


Run epoch: 2


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:38<00:00, 13.00it/s]


Run epoch: 3


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:44<00:00, 12.68it/s]


Run epoch: 4


100%|██████████████████████████████████████████████████████████████████████████████| 2847/2847 [03:42<00:00, 12.79it/s]


## Load saved model

In [24]:
model_1d_5e = Model(kernel_type=0, kernel_x=7, kernel_y=3, num_layers=5, hidden_dim=64,
                 input_size=input_size, seq_len=seq_len, pred_len=pred_len, time_hid=32, joints_hid=128).to(device)
model_1d_5e.load_state_dict(torch.load("runs/1d_5e\model.pt"))
print(summary(model_1d_5e.to('cpu'), torch.zeros((batch_size, seq_len, input_size)), show_hierarchical=True))

-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Linear-1        [64, 10, 64]           3,328           3,328
       ConvBlock-2        [64, 10, 64]          54,162          54,162
       ConvBlock-3        [64, 10, 64]          54,162          54,162
       ConvBlock-4        [64, 10, 64]          54,162          54,162
       ConvBlock-5        [64, 10, 64]          54,162          54,162
       ConvBlock-6        [64, 10, 64]          54,162          54,162
          Conv1d-7        [64, 10, 64]             110             110
          Linear-8        [64, 10, 51]           3,315           3,315
Total params: 277,563
Trainable params: 277,563
Non-trainable params: 0
-----------------------------------------------------------------------



Model(
  (encoder): Linear(in_features=51, out_features=64, bias=True), 3,328 params
  (conv_blocks): Sequential(
    (0): ConvBlock(
      (n

## Test and visualize

In [25]:
train_loss, test_loss, extra_loss, train_seq, test_seq, extra_seq = test(model=model_1d_5e.to(device), 
train_loader=train_loader, test_loader=test_loader, extra_loader=extra_loader, dev=device, input_n=seq_len, output_n=pred_len)
print("Train loss:", train_loss)
print("Valid loss:", test_loss)
print("Extra loss:", extra_loss)

Train loss: 0.045284055173397064
Valid loss: 0.05008590593934059
Extra loss: 0.14274415373802185
