In [1]:
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from random import seed
from sklearn import preprocessing
seed(1)

In [18]:
class conv_lstm(nn.Module):
    def __init__(self, lr, input_channels = 1, batch_size = 1, lstm_hidden_size = 512, lstm_layer_size = 2):
        super(conv_lstm, self).__init__()
        self.lr = lr
        self.input_channels = input_channels
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        #self.height = height
        #self.width = width
        self.batch_size = batch_size
        self.lstm_hidden_size = lstm_hidden_size
        self.lstm_layer_size = lstm_layer_size
        self.conv_layer = nn.Conv2d(in_channels = self.input_channels, out_channels = 10,
                                   kernel_size = 3, stride = 1)
        conv_layer_init_ = 1.0/np.sqrt(self.conv_layer.weight.data.size()[0])
        self.conv_layer.weight.data.uniform_(-conv_layer_init_, conv_layer_init_)
        self.conv_layer.bias.data.uniform_(-conv_layer_init_, conv_layer_init_)
        self.conv_layer_batchNorm = nn.BatchNorm2d(10)
        self.pool = nn.MaxPool2d((2,4), stride=1)#experiment with maxpool/avgpool
        #second querry - should i maxpool/avgpool then send to lstm layer or leave it as it is? Refer to paper
        
        #entirely dependant on convlayer out shape
        self.lstm_layer = nn.LSTM(input_size = 50,#this needs to be adjusted
                                 hidden_size = self.lstm_hidden_size,
                                 num_layers  = self.lstm_layer_size,
                                 batch_first = True,
                                 dropout = 0.1)
        
        self.to(self.device)
        
    def forward(self, frames):
        x = self.conv_layer(frames)
        x = self.conv_layer_batchNorm(x)
        x = F.relu(x)
        x = self.pool(x)
        
        print("forward shape: ", x.shape)
        batch_size = x.size(0)
        hidden_state = torch.zeros(self.lstm_layer_size, 
                                  batch_size,
                                  self.lstm_hidden_size).to(self.device)
        cell_state = torch.zeros(self.lstm_layer_size,
                                batch_size,
                                self.lstm_hidden_size).to(self.device)
        hidden_lstm_layer = (hidden_state, cell_state)
        out, (hn,cn) = self.lstm_layer(x,(hidden_state, cell_state))
        
        #might need fc layer will check with paper
        return out, hn
        
        
        
        

In [19]:
model = conv_lstm(lr = 0.001)

In [20]:
model

conv_lstm(
  (conv_layer): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1))
  (conv_layer_batchNorm): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=(2, 4), stride=1, padding=0, dilation=1, ceil_mode=False)
  (lstm_layer): LSTM(50, 512, num_layers=2, batch_first=True, dropout=0.1)
)