### **Video Prediction Project - Lab CudaVision**
* Amit Kumar Rana
* Dhagash Desai
* Lina Hashem


### Table of Contents

* [1. Import Required Modules](#sec_1)
* [2. Datasets](#sec_2)
    * [2.1 Moving MNIST](#sec_2_1)
    * [2.2 KTH](#sec_2_2)
    * [2.3 Train/Test Dotaloader Statistics](#sec_2_3)
* [3. Models Architectures](#sec_3)
    * [3.1 Resnet18-style Encoder-Decoder](#sec_3_1)
    * [3.2 VGG19-style Encoder-Decoder](#sec_3_2)
    * [3.3 DCGAN-style Encoder-Decoder](#sec_3_3)
    * [3.4 ConvLSTM ](#sec_3_4)
    * [3.5 Models #Parameters Statistics](#sec_3_5)
* [4. Training Experiments](#sec_4)
    * [Section 2.1](#sec_4_1)
    * [Section 2.2](#sec_4_2)
* [5. Results](#sec_5)
    * [5.1 Results on MMNIST](#sec_5_1)
        * [5.1.1 Visualize Best Metrices](#sec_5_1_1)
        * [5.1.2 Visualize Output for Random Samples](#sec_5_1_2)
        * [5.1.3 Models Comparison](#sec_5_1_3)
    * [5.2 Results on KTH ](#sec_5_2)
        * [5.2.1 Visualize Best Metrices](#sec_5_2_1)
        * [5.2.2 Visualize Output for Random Samples](#sec_5_2_2)
        * [5.2.3 Models Comparison](#sec_5_2_3)
* [6. References](#sec_6)

### **1. Importing Required Module <a class="anchor" id="sec_1"></a>**

In [22]:
import os
import torch
from prettytable import PrettyTable
from utils.visualizations import save_grid_batch, save_gif_batch
from utils.utils import eval_dataset, count_model_params
from models.resnet import Resnet18Encoder, Resnet18Decoder
from models.vgg import VGGEncoder, VGGDecoder
from models.dcgan import DCGANEncoder, DCGANDecoder
from models.predictorLSTM import predictor

%load_ext autoreload
%autoreload 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### **2. Datasets <a class="anchor" id="sec_2"></a>**

In [None]:
#instert directory tree

####  **2.1 Moving MNIST <a class="anchor" id="sec_2_1"></a>**

In [2]:
mmnist_test_loader = eval_dataset(dataset = "MMNIST", batch_size=20)


In [None]:
sample_batch = next(iter(mmnist_test_loader))
print("batch shape: (seq_len, batch_size, channels, height, width")
sample_batch.shape

In [None]:
save_grid_batch(sample_batch, nsamples=5, text = "mmnist_test", show = True)

In [None]:
save_gif_batch(sample_batch, nsamples =5, text = "mmnist_test", show =True)

####  **2.2 KTH <a class="anchor" id="sec_2_2"></a>**

In [None]:
kth_test_loader = eval_dataset(dataset = "KTH", batch_size=20)

In [None]:
sample_batch = next(iter(mmnist_test_loader))
print("batch shape: (seq_len, batch_size, channels, height, width")
sample_batch.shape

In [None]:
save_grid_batch(sample_batch, nsamples=5, text = "kth_test", show = True)

In [None]:
save_gif_batch(sample_batch, nsamples =5, text = "kth_test", show =True)

### **3. Models Architectures <a class="anchor" id="sec_3"></a>**

####  **3.1 Resnet18-style Encoder-Decoder <a class="anchor" id="sec_3_1"></a>**

In [26]:
resnet_encoder = Resnet18Encoder()
resnet_encoder = resnet_encoder.to(device)
resnet_encoder


Resnet18Encoder(
  (c1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.2, inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (layer1): Sequential(
    (0): ResidualBlockEncoder(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (lrelu): LeakyReLU(negative_slope=0.2, inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ResidualBlockEncoder(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, ep

In [27]:
resnet_decoder = Resnet18Decoder(skip_connection=False)
resnet_decoder = resnet_decoder.to(device)
resnet_decoder

Resnet18Decoder(
  (layer3): Sequential(
    (0): ResidualBlockDecoder(
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (lrelu): LeakyReLU(negative_slope=0.2, inplace=True)
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ResidualBlockDecoder(
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (lrelu): LeakyReLU(negative_slope=0.2, inplace=True)
      (conv1): ConvTranspose2d(256, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_runni

####  **3.2 VGG19-style Encoder-Decoder <a class="anchor" id="sec_3_2"></a>**

In [18]:
vgg_encoder = VGGEncoder()
vgg_encoder = vgg_encoder.to(device)
vgg_encoder

VGGEncoder(
  (vgg_block1): Sequential(
    (0): ConvBlock(
      (module): Sequential(
        (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.2, inplace=True)
      )
    )
    (1): ConvBlock(
      (module): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.2, inplace=True)
      )
    )
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block2): Sequential(
    (0): ConvBlock(
      (module): Sequential(
        (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.2

In [19]:
vgg_decoder = VGGDecoder(skip_connection=False)
vgg_decoder = vgg_decoder.to(device)
vgg_decoder

VGGDecoder(
  (vgg_block_dec3): Sequential(
    (0): ConvBlock(
      (module): Sequential(
        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.2, inplace=True)
      )
    )
    (1): ConvBlock(
      (module): Sequential(
        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.2, inplace=True)
      )
    )
    (2): ConvTransposeBlock(
      (module): Sequential(
        (0): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.2, inplace=True)
      )
    )
  )
  (vgg_block_dec2): Sequential(
    (0): ConvBlock(


####  **3.3 DCGAN-style Encoder-Decoder <a class="anchor" id="sec_3_3"></a>**

In [20]:
dcgan_encoder = DCGANEncoder()
dcgan_encoder = dcgan_encoder.to(device)
dcgan_encoder

DCGANEncoder(
  (block1): Sequential(
    (0): ConvBlock(
      (module): Sequential(
        (0): Conv2d(1, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.2, inplace=True)
      )
    )
    (1): ConvBlock(
      (module): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.2, inplace=True)
      )
    )
  )
  (block2): Sequential(
    (0): ConvBlock(
      (module): Sequential(
        (0): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.2, inplace=True)
      )
    )
    (1): ConvBlock(
      (module): Sequential(
        (0):

In [21]:
dcgan_decoder = DCGANDecoder(skip_connection=False)
dcgan_decoder = dcgan_decoder.to(device)
dcgan_decoder

DCGANDecoder(
  (block3): Sequential(
    (0): ConvTransposeBlock(
      (module): Sequential(
        (0): ConvTranspose2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.2, inplace=True)
      )
    )
    (1): ConvTransposeBlock(
      (module): Sequential(
        (0): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.2, inplace=True)
      )
    )
  )
  (block2): Sequential(
    (0): ConvTransposeBlock(
      (module): Sequential(
        (0): ConvTranspose2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.2, inplace=True)
      )
    

####  **3.4 ConvLSTM <a class="anchor" id="sec_3_4"></a>**

In [23]:
predictor_lstm = predictor(batch_size= 20, device = device, mode="zeros", num_layers=2)
predictor_lstm

predictor(
  (convlstm1): predictor_lstm(
    (conv_lstms): ModuleList(
      (0): ConvLSTMCell(
        (conv): Conv2d(128, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      )
      (1): ConvLSTMCell(
        (conv): Conv2d(128, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      )
    )
  )
  (convlstm2): predictor_lstm(
    (conv_lstms): ModuleList(
      (0): ConvLSTMCell(
        (conv): Conv2d(256, 512, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      )
      (1): ConvLSTMCell(
        (conv): Conv2d(256, 512, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      )
    )
  )
  (convlstm3): predictor_lstm(
    (conv_lstms): ModuleList(
      (0): ConvLSTMCell(
        (conv): Conv2d(512, 1024, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      )
      (1): ConvLSTMCell(
        (conv): Conv2d(512, 1024, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      )
    )
  )
)

####  **3.5 Models Paramters Statistics <a class="anchor" id="sec_3_5"></a>**

In [28]:
resnet_encoder_params = count_model_params(resnet_encoder)
resnet_decoder_params = count_model_params(resnet_decoder)
resnet_decoder_skip = Resnet18Decoder(skip_connection=True).to(device)
resnet_decoder_skip_params = count_model_params(resnet_decoder_skip)

vgg_encoder_params = count_model_params(vgg_encoder)
vgg_decoder_params = count_model_params(vgg_decoder)
vgg_decoder_skip = VGGDecoder(skip_connection=True).to(device)
vgg_decoder_skip_params = count_model_params(vgg_decoder_skip)

dcgan_encoder_params = count_model_params(dcgan_encoder)
dcgan_decoder_params = count_model_params(dcgan_decoder)
dcgan_decoder_skip = DCGANDecoder(skip_connection=True).to(device)
dcgan_decoder_skip_params = count_model_params(dcgan_decoder_skip)

convlstm_2_params = count_model_params(predictor_lstm)
predictor_lstm_3 = predictor(batch_size= 20, device = device, mode="zeros", num_layers=3)
convlstm_3_params = count_model_params(predictor_lstm_3)


resnet_params_2 = count_model_params(resnet_encoder) + count_model_params(resnet_decoder) + count_model_params(predictor_lstm)

In [30]:
params_table = PrettyTable()
params_table.field_names = ["Model Type", "#layers in ConvLSTM", "Skip Connection", "num parametes"]
params_table.add_row(["Resnet18", 2, False, resnet_encoder_params+resnet_decoder_params+convlstm_2_params])
params_table.add_row(["Resnet18", 2, True, resnet_encoder_params+resnet_decoder_skip_params+convlstm_2_params])
params_table.add_row(["Resnet18", 3, False, resnet_encoder_params+resnet_decoder_params+convlstm_3_params])
params_table.add_row(["Resnet18", 3, True, resnet_encoder_params+resnet_decoder_skip_params+convlstm_3_params])

params_table.add_row(["VGG19", 2, False, vgg_encoder_params+vgg_decoder_params+convlstm_2_params])
params_table.add_row(["VGG19", 2, True, vgg_encoder_params+vgg_decoder_skip_params+convlstm_2_params])
params_table.add_row(["VGG19", 3, False, vgg_encoder_params+vgg_decoder_params+convlstm_3_params])
params_table.add_row(["VGG19", 3, True, vgg_encoder_params+vgg_decoder_skip_params+convlstm_3_params])

params_table.add_row(["DCGAN", 2, False, dcgan_encoder_params+dcgan_decoder_params+convlstm_2_params])
params_table.add_row(["DCGAN", 2, False, dcgan_encoder_params+dcgan_decoder_skip_params+convlstm_2_params])
params_table.add_row(["DCGAN", 3, False, dcgan_encoder_params+dcgan_decoder_params+convlstm_3_params])
params_table.add_row(["DCGAN", 3, False, dcgan_encoder_params+dcgan_decoder_skip_params+convlstm_3_params])

params_table

Model Type,#layers in ConvLSTM,Skip Connection,num parametes
Resnet18,2,False,42891969
Resnet18,2,True,52484097
Resnet18,3,False,60441025
Resnet18,3,True,70033153
VGG19,2,False,39042113
VGG19,2,True,40410177
VGG19,3,False,56591169
VGG19,3,True,57959233
DCGAN,2,False,38148097
DCGAN,2,False,39516161


### **5. Results <a class="anchor" id="sec_5"></a>**

#### **5.1 Results on MMNIST <a class="anchor" id="sec_5_1"></a>**

##### **5.1.1 Visualizae Best Metrices <a class="anchor" id="sec_5_1_1"></a>**

In [None]:
saved_model_path = "/home/dhagash/Projects/video-prediction/checkpoints/mmnist_resnet_2_lpips_resnet_convlstm/model_90.pth"
evaluation = eval(model_path = saved_model_path)

In [None]:
evaluation.visualize_best_metrices()

##### **5.1.2 Visualizae Output for Random Samples <a class="anchor" id="sec_5_1_2"></a>**

##### **5.1.3 Models Comparison <a class="anchor" id="sec_5_1_3"></a>**

In [None]:
saved_model_path = "/home/dhagash/Projects/video-prediction/checkpoints/mmnist_resnet_2_lpips_resnet_convlstm/model_90.pth"
evaluation = eval(model_path = saved_model_path)

In [None]:
evaluation.visualize_best_metrices()

#### **5.2 Results on KTH <a class="anchor" id="sec_5_2"></a>**

##### **5.2.1 Visualizae Best Metrices <a class="anchor" id="sec_5_2_1"></a>**

##### **5.2.2 Visualizae Output for Random Samples <a class="anchor" id="sec_5_2_2"></a>**

##### **5.2.3 Models Comparison <a class="anchor" id="sec_5_2_3"></a>**

### **6. References <a class="anchor" id="sec_6"></a>**