In [1]:
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from random import seed
from sklearn import preprocessing
seed(1)

In [5]:
class siamese(nn.Module):
    def __init__(self, lr=0.001, input_channels = 1, batch_size = 1, lstm_hidden_size = 512, lstm_layer_size = 2):
        super(siamese, self).__init__()
        self.lr = lr
        self.input_channels = input_channels
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        #self.height = height
        #self.width = width
        self.batch_size = batch_size
        self.lstm_hidden_size = lstm_hidden_size
        self.lstm_layer_size = lstm_layer_size
        
        self.conv_layer = nn.Conv2d(in_channels = self.input_channels, out_channels = 10,
                                   kernel_size = 3, stride = 1)
        conv_layer_init_ = 1.0/np.sqrt(self.conv_layer.weight.data.size()[0])
        self.conv_layer.weight.data.uniform_(-conv_layer_init_, conv_layer_init_)
        self.conv_layer.bias.data.uniform_(-conv_layer_init_, conv_layer_init_)
        self.conv_layer_batchNorm = nn.BatchNorm2d(10)
        self.pool = nn.AvgPool2d((2,4), stride=1)#experiment with maxpool/avgpool
        #second querry - should i maxpool/avgpool then send to lstm layer or leave it as it is? Refer to paper
        
        
        self.conv_layer1 = nn.Conv2d(in_channels = 10, out_channels = 10,
                                   kernel_size = 2, stride = 1)
        conv_layer_init_1 = 1.0/np.sqrt(self.conv_layer1.weight.data.size()[0])
        self.conv_layer1.weight.data.uniform_(-conv_layer_init_1, conv_layer_init_1)
        self.conv_layer1.bias.data.uniform_(-conv_layer_init_1, conv_layer_init_1)
        self.conv_layer_batchNorm1 = nn.BatchNorm2d(10)
        self.pool1 = nn.AvgPool2d((2,2), stride=1)#experiment with maxpool/avgpool
        
        
        
        #entirely dependant on convlayer out shape
        self.lstm_layer = nn.LSTM(input_size = 3,#check forward shape and last shape: adjust accordingly
                                 hidden_size = self.lstm_hidden_size,
                                 num_layers  = self.lstm_layer_size,
                                 batch_first = True,
                                 dropout = 0.2)
        
        self.lstm_layer1 = nn.LSTM(input_size = 512,#check forward shape and last shape: adjust accordingly
                                 hidden_size = self.lstm_hidden_size,
                                 num_layers  = self.lstm_layer_size,
                                 batch_first = True,
                                 dropout = 0.05)
        
        
        self.fc_layer = nn.Linear(self.lstm_hidden_size, self.lstm_hidden_size)#10 to represebt feature length
        fc_init_ = 1.0/np.sqrt(self.fc_layer.weight.data.size()[0])
        #initialize weights and biases
        self.fc_layer.weight.data.uniform_(-fc_init_, fc_init_)
        self.fc_layer.bias.data.uniform_(-fc_init_, fc_init_)
        
        self.fc_layer1 = nn.Linear(self.lstm_hidden_size, 10)
        fc_init_1 = 1.0/np.sqrt(self.fc_layer1.weight.data.size()[0])
        self.fc_layer1.weight.data.uniform_(-fc_init_1, fc_init_1)
        self.fc_layer1.bias.data.uniform_(-fc_init_1, fc_init_1)
        
        self.ln = nn.LayerNorm(self.lstm_hidden_size)
        
        self.to(self.device)
        
    def forward(self, frames):
        x = self.conv_layer(frames)
        x = self.conv_layer_batchNorm(x)
        x = F.relu(x)
        x = self.pool(x)
        
        
        x = self.conv_layer1(x)
        x = self.conv_layer_batchNorm1(x)
        x = F.relu(x)
        x = self.pool1(x)
        
        #print("c_Out: ",x.shape)
        x = x.squeeze()
        batch_size = x.size(0)
        hidden_state = torch.zeros(self.lstm_layer_size, 
                                  batch_size,
                                  self.lstm_hidden_size).to(self.device)
        cell_state = torch.zeros(self.lstm_layer_size,
                                batch_size,
                                self.lstm_hidden_size).to(self.device)
        hidden_lstm_layer = (hidden_state, cell_state)
        
        #print("conv: ", x.shape)
        out, (hn,cn) = self.lstm_layer(x,(hidden_state, cell_state))
        #print("lstm: ",out.shape)
        out, (hn,cn) = self.lstm_layer1(out,(hidden_state, cell_state))
        
        #print("lstm_out shape: ", out.shape)
        modified_out = out[:, -1, :]
        fc_out = self.fc_layer(modified_out)
        fc_out = self.ln(fc_out)
        fc_out = F.relu(fc_out)
        fc_out = self.fc_layer1(fc_out)
        return fc_out, out, hn

In [6]:
class siamese1(nn.Module):
    def __init__(self, lr=0.001, input_channels = 1, batch_size = 1, lstm_hidden_size = 512, lstm_layer_size = 2):
        super(siamese1, self).__init__()
        self.lr = lr
        self.input_channels = input_channels
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        #self.height = height
        #self.width = width
        self.batch_size = batch_size
        self.lstm_hidden_size = lstm_hidden_size
        self.lstm_layer_size = lstm_layer_size
        
        self.conv_layer = nn.Conv2d(in_channels = self.input_channels, out_channels = 10,
                                   kernel_size = 3, stride = 1)
        self.conv_layer_batchNorm = nn.BatchNorm2d(10)
        self.pool = nn.AvgPool2d((2,4), stride=1)#experiment with maxpool/avgpool
        #second querry - should i maxpool/avgpool then send to lstm layer or leave it as it is? Refer to paper
        
        
        self.conv_layer1 = nn.Conv2d(in_channels = 10, out_channels = 10,
                                   kernel_size = 2, stride = 1)
        conv_layer_init_1 = 1.0/np.sqrt(self.conv_layer1.weight.data.size()[0])
        self.conv_layer1.weight.data.uniform_(-conv_layer_init_1, conv_layer_init_1)
        self.conv_layer1.bias.data.uniform_(-conv_layer_init_1, conv_layer_init_1)
        self.conv_layer_batchNorm1 = nn.BatchNorm2d(10)
        self.pool1 = nn.AvgPool2d((2,2), stride=1)#experiment with maxpool/avgpool
        
        
        
        #entirely dependant on convlayer out shape
        self.lstm_layer = nn.LSTM(input_size = 3,#check forward shape and last shape: adjust accordingly
                                 hidden_size = self.lstm_hidden_size,
                                 num_layers  = self.lstm_layer_size,
                                 batch_first = True,
                                 dropout = 0.2)
        
        self.lstm_layer1 = nn.LSTM(input_size = 512,#check forward shape and last shape: adjust accordingly
                                 hidden_size = self.lstm_hidden_size,
                                 num_layers  = self.lstm_layer_size,
                                 batch_first = True,
                                 dropout = 0.05)
        
        
        self.fc_layer = nn.Linear(self.lstm_hidden_size, self.lstm_hidden_size)#10 to represebt feature length
        fc_init_ = 1.0/np.sqrt(self.fc_layer.weight.data.size()[0])
        #initialize weights and biases
        self.fc_layer.weight.data.uniform_(-fc_init_, fc_init_)
        self.fc_layer.bias.data.uniform_(-fc_init_, fc_init_)
        
        self.fc_layer1 = nn.Linear(self.lstm_hidden_size, 10)
        
        self.ln = nn.LayerNorm(self.lstm_hidden_size)
        
        self.to(self.device)
        
    def forward(self, frames):
        x = self.conv_layer(frames)
        x = self.conv_layer_batchNorm(x)
        x = F.relu(x)
        x = self.pool(x)
        
        
        x = self.conv_layer1(x)
        x = self.conv_layer_batchNorm1(x)
        x = F.relu(x)
        x = self.pool1(x)
        
        #print("c_Out: ",x.shape)
        x = x.squeeze()
        batch_size = x.size(0)
        hidden_state = torch.zeros(self.lstm_layer_size, 
                                  batch_size,
                                  self.lstm_hidden_size).to(self.device)
        cell_state = torch.zeros(self.lstm_layer_size,
                                batch_size,
                                self.lstm_hidden_size).to(self.device)
        hidden_lstm_layer = (hidden_state, cell_state)
        
        #print("conv: ", x.shape)
        out, (hn,cn) = self.lstm_layer(x,(hidden_state, cell_state))
        #print("lstm: ",out.shape)
        out, (hn,cn) = self.lstm_layer1(out,(hidden_state, cell_state))
        
        #print("lstm_out shape: ", out.shape)
        modified_out = out[:, -1, :]
        fc_out = self.fc_layer(modified_out)
        fc_out = self.ln(fc_out)
        fc_out = F.relu(fc_out)
        fc_out = self.fc_layer1(fc_out)
        return fc_out, out, hn

In [7]:
model_siamese = siamese()
model_siamese1 = siamese1()