### Pip installs

In [2]:
!apt-get install --no-install-recommends ffmpeg && pip install ffmpeg scikit-video

Reading package lists... Done
Building dependency tree       
Reading state information... Done
ffmpeg is already the newest version (7:3.4.8-0ubuntu0.2).
0 upgraded, 0 newly installed, 0 to remove and 21 not upgraded.
Collecting ffmpeg
  Downloading https://files.pythonhosted.org/packages/f0/cc/3b7408b8ecf7c1d20ad480c3eaed7619857bf1054b690226e906fdf14258/ffmpeg-1.4.tar.gz
Collecting scikit-video
[?25l  Downloading https://files.pythonhosted.org/packages/b1/a6/c69cad508139a342810ae46e946ebb3256aa6e42f690d901bb68f50582e3/scikit_video-1.1.11-py2.py3-none-any.whl (2.3MB)
[K     |████████████████████████████████| 2.3MB 14.4MB/s 
Building wheels for collected packages: ffmpeg
  Building wheel for ffmpeg (setup.py) ... [?25l[?25hdone
  Created wheel for ffmpeg: filename=ffmpeg-1.4-cp36-none-any.whl size=6083 sha256=681034ddf4e20d5973b2c3f932cf3bffdc0228fd6b219cb1051e7c67b7f3fbf9
  Stored in directory: /root/.cache/pip/wheels/b6/68/c3/a05a35f647ba871e5572b9bbfc0b95fd1c6637a2219f959e7a
Suc

## Import Packages

In [3]:
import warnings
warnings.filterwarnings('ignore')
import cv2
import torch
import torchvision
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as t
import torchvision.models as pretrained_model
import torchvision.datasets as ds
from torchvision.utils import make_grid,save_image
from torch.utils.data import DataLoader,Dataset
import torch.nn.init as init
from torch.autograd import Variable
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import skvideo.io
import os
from glob import glob
import random
from math import exp
import imageio
import pickle


In [4]:
torch.manual_seed(2805)
np.random.seed(1310)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Dataset Processing


In [5]:
%matplotlib inline
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    #inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


    

#### Extract Images after every 2 frames

In [6]:
'''
UCF101 Dataset Preparation 
Source: https://github.com/xudejing/video-clip-order-prediction/blob/master/datasets/ucf101.py
'''
class UCF101Dataset(Dataset):
    """UCF101 dataset for recognition. The class index start from 0.

    Args:
        root_dir (string): Directory with videos and splits.
        train (bool): train split or test split.
        clip_len (int): number of frames in clip, 
        transforms_ (object): composed transforms which takes in PIL image and output tensors.
        test_sample_num： number of clips sampled from a video. 1 for clip accuracy.
    """

    def __init__(self, root_dir, annot_dir, clip_len, split='1', train=True, transforms_=None):
        self.root_dir = root_dir
        self.annot_dir= annot_dir
        self.clip_len = clip_len
        self.split = split
        self.train = train
        self.transformation = transforms_
        class_idx_path = os.path.join(self.annot_dir, 'classInd.txt')
        self.class_idx2label = pd.read_csv(class_idx_path, header=None, sep=' ').set_index(0)[1]
        self.class_label2idx = pd.read_csv(class_idx_path, header=None, sep=' ').set_index(1)[0]

        if self.train:
            train_split_path = os.path.join(self.annot_dir, 'trainlist0' + self.split + '.txt')
            self.train_split = pd.read_csv(train_split_path, header=None, sep=' ')[0]
        else:
            test_split_path = os.path.join(self.annot_dir, 'testlist0' + self.split + '.txt')
            self.test_split = pd.read_csv(test_split_path, header=None)[0]
        print('Use split' + self.split)

    def __len__(self):
        if self.train:
            return len(self.train_split)
        else:
            return len(self.test_split)

    def __getitem__(self, idx):
        """
        Returns:
            clip (tensor): [time x channel  x height x width]
            class_idx (tensor): class index, [0-100]
        """
        if self.train:
            videoname = self.train_split[idx]
        else:
            videoname = self.test_split[idx]
        
        class_idx = self.class_label2idx[videoname[:videoname.find('/')]]
        filename = os.path.join(self.root_dir, videoname)
        videodata = skvideo.io.vread(filename)
        length, height, width, channel = videodata.shape
        
        if self.train:
            frames_all = []
            for i in np.linspace(self.clip_len/2,self.clip_len*2.5,self.clip_len):
                frame_start = int(i)
                frame = videodata[frame_start: frame_start + 1].squeeze(0)
                if self.transformation:
                    frame_transform = self.transformation(frame)  # tensor [C x H x W]
                    frames_all.append(frame_transform)
                else:
                    frames_all.append(frame)
            clip = torch.stack(frames_all)

            return clip, torch.tensor(int(class_idx))
        
        else:
            frames_all = []
            for i in np.linspace(self.clip_len/2,self.clip_len*2.5,self.clip_len):
                frame_start = int(i)
                frame = videodata[frame_start: frame_start + 1].squeeze(0)
                if self.transformation:
                    frame_transform = self.transformation(frame)  # tensor [C x H x W]
                    frames_all.append(frame_transform)
                else:
                    frames_all.append(frame)
            clip = torch.stack(frames_all)

            return clip, torch.tensor(int(class_idx))


#### Create Test Data GIF

In [19]:
def gt_resizer(gt):
    gt_resized=[]
    for i in range(gt.size(1)):
        frame = gt[:,i,:,:,:]
        resized_frame = F.interpolate(frame,size=(112,112), mode = 'bilinear')
        gt_resized.append(resized_frame)
    gt_clip = torch.stack(gt_resized,1)
    
    return gt_clip


def create_gif(clips, gif_file_name):
    images = []
    for i in range(3):
        img_file_name = "/content/drive/My Drive/CudaLAB/P R O J E C T/gifs/images/img.jpg"
        img = clips[i,:,:,:]
        save_image(img,img_file_name)
        images.append(imageio.imread(img_file_name))
    
    imageio.mimsave(gif_file_name,images,duration=0.5)
    print("GIF has been saved.")

def gen_gif(ground_truth,batch_idx,prediction=False):
    print(ground_truth.shape)
    for i in range(ground_truth.size(0)):
        clip = ground_truth[i,:,:,:,:]
        if prediction:
            gt_gif_saving_path = os.path.join("/content/drive/My Drive/CudaLAB/P R O J E C T/gifs_predicted",\
                                          "batch_%d_predicted_%d.gif"%(batch_idx,i))
        else: 
            gt_gif_saving_path = os.path.join("/content/drive/My Drive/CudaLAB/P R O J E C T/gifs",\
                                          "batch_%d_gt_%d.gif"%(batch_idx,i))
        create_gif(clip,gt_gif_saving_path)


## Pre-trained Resnet

In [8]:
class PreTrainedResnet(nn.Module):
    def __init__(self):
        super(PreTrainedResnet,self).__init__()
        self.resnet = pretrained_model.resnet18(pretrained= True)
        self.set_forward_hook()
    
    def hook(self,module, input, output):
        setattr(module, "_value_hook", output)

    def set_forward_hook(self):
        for name,module in self.resnet.named_modules():
            for i in range(5):
                if(name== 'layer'+str(i)):
                    module.register_forward_hook(self.hook)
        self.resnet.conv1.register_forward_hook(self.hook) # Register hook in that 7x7 conv at the beginning         

    def forward(self,x):
        res_y = self.resnet(x)
        output_list =[]
        ## Get the value from the hook of 7x7 conv layer
        out_conv = self.resnet.conv1._value_hook
        output_list.append(out_conv)
        for name,module in self.resnet.named_modules():
            for i in range(5):
                if(name== 'layer'+str(i)):
                    name_out = module._value_hook
                    output_list.append(name_out)
        return output_list


## Convolutional GRU

In [9]:
# https://github.com/jacobkimmel/pytorch_convgru/blob/master/convgru.py
class ConvGRUCell(nn.Module):
    def __init__(self, in_ch, out_ch, kernel_size):
        super().__init__()
        self.input_size  = in_ch
        self.hidden_size = out_ch
        self.kernelSize  = kernel_size
        self.padding     = self.kernelSize // 2
        
        self.reset_gate  = nn.Conv2d(in_channels= self.input_size + self.hidden_size, 
                                     out_channels= self.hidden_size, kernel_size= self.kernelSize, padding= self.padding)
        self.update_gate = nn.Conv2d(in_channels= self.input_size + self.hidden_size, 
                                     out_channels= self.hidden_size, kernel_size= self.kernelSize, padding= self.padding)
        self.output_gate = nn.Conv2d(in_channels= self.input_size + self.hidden_size, 
                                     out_channels= self.hidden_size, kernel_size= self.kernelSize, padding= self.padding) # check input channel

        ## Initialize weight by orthogonal initializer
        init.orthogonal_(self.reset_gate.weight)
        init.orthogonal_(self.update_gate.weight)
        init.orthogonal_(self.output_gate.weight)

        init.constant_(self.reset_gate.bias,0.)
        init.constant_(self.update_gate.bias, 0.)
        init.constant_(self.output_gate.bias, 0.)


    def forward(self,input_data,prev_state=None):
        self.batch_size = input_data.size(0)
        self.height = input_data.size(2)
        self.width = input_data.size(3)
        if prev_state is None:
            prev_state = Variable(torch.zeros((self.batch_size,self.hidden_size,self.height,self.width))).to(device)

        # [batch, channel, height, width]
        stacked_data = torch.cat([input_data, prev_state],dim =1).to(device)
        update = torch.sigmoid(self.update_gate(stacked_data))
        reset = torch.sigmoid(self.reset_gate(stacked_data))
        candidate = torch.cat([input_data, prev_state * reset],dim=1)
        output = torch.tanh(self.output_gate(candidate))
        new_state = prev_state * (1 - update) + output * update

        return new_state


class ConvGruModel(nn.Module):
    def __init__(self,input_ch, output_ch, kernel_sizes,num_layer):
        super(ConvGruModel,self).__init__()
        self.input_ch = input_ch
        ## If output_ch and kernel_sizes are list, their length==num_layer
        if type(output_ch) != list:
            self.output_ch = [output_ch]* num_layer       
            
        else: 
            assert len(output_ch) == num_layer,' `output_ch` must have the same length as num_layer'
            self.output_ch = output_ch

        if type(kernel_sizes) != list:
            self.kernel_sizes = [kernel_sizes] * num_layer
        else:
            assert len(kernel_sizes) == num_layer, ' `kernel_sizes` must have the same length as num_layer'
            self.kernel_sizes = kernel_sizes

        self.num_layer = num_layer
        cells=[]
        ## Create the ConvGRU Model ##
        for i in range(self.num_layer):
            if i == 0: 
                in_ch = self.input_ch
            else:
                in_ch = self.output_ch[i-1]
            cell = ConvGRUCell(in_ch, self.output_ch[i], self.kernel_sizes[i]).to(device)
            name = "ConvGRUCell_"+str(i).zfill(2)
            setattr(self,name,cell)

            cells.append(getattr(self,name))
        self.cells = cells

    def forward(self,x, hidden=None ):
        if not hidden: 
            hidden =[None]* self.num_layer
        updated_hidden = []
        x_data = x

        for i in range(self.num_layer):
            cell = self.cells[i]
            hidden_cell = hidden[i]
            out_hidden_cell = cell(x_data,hidden_cell)
            
            updated_hidden.append(out_hidden_cell)
            x_data = out_hidden_cell
        
        return updated_hidden



## Location Dependent Convolution

In [10]:
class LocationDependentConv(nn.Conv2d): #locationAwareConv2d
    def __init__(self,gradient,width,height,in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
        super().__init__(in_channels, out_channels, kernel_size, stride=stride, padding=(kernel_size-2)//2+1, dilation=dilation, groups=groups, bias=bias)
        self.locationBias=nn.Parameter(torch.zeros(width,height,3)).to(device) #w->width, h-> height
        self.locationEncode=torch.tensor(torch.ones(width,height,3)).to(device)
        if gradient:
            for i in range(width):
                self.locationEncode[i,:,1]=self.locationEncode[:,i,0]=(i/float(width-1))
    def forward(self,inputs):
        b=self.locationBias*self.locationEncode
        out = super().forward(inputs)+b[:,:,0]+b[:,:,1]+b[:,:,2]
        return out



## Intermediate Module of the Model

In [11]:
def intermediate_phase(in_data,LDC=True):
    if LDC: 
        loc_conv = LocationDependentConv(True, in_data.size(2),in_data.size(3), in_data.size(1),64,1).to(device)
        out1 = loc_conv(in_data)
        
    else:
        ##1x1 conv
        conv1 = nn.Conv2d(in_channels=64, out_channels=64,kernel_size=1).to(device)
        out1 = conv1(in_data)

    conv_gru_model = ConvGruModel(input_ch= in_data.size(1), output_ch=64 , kernel_sizes=[3,5,7], num_layer=3).to(device)
    
    convgru_out =conv_gru_model(in_data)
    final_cg_out = convgru_out[-1]  
    output = torch.cat([out1,final_cg_out],dim=1)
    
    return output


## Refinement Module

In [13]:
class RefinementModule(nn.Module):
    def __init__(self):
        super(RefinementModule,self).__init__()
        self.processing_layer= nn.Sequential(nn.ReLU(), nn.Conv2d(in_channels=512,out_channels=1024,kernel_size=3,padding=1),
                                             nn.PixelShuffle(2),
                                             nn.Conv2d(in_channels=256,out_channels=64,kernel_size=3, padding=1))
        
    def forward(self,x):
        out = self.processing_layer(x)
        return out


## Our Model


In [14]:
class OurModel(nn.Module):
    def __init__(self):
        super(OurModel,self).__init__()
        self.myResNet = PreTrainedResnet().to(device)
        self.refine_res_out = RefinementModule().to(device)
        self.decoder = nn.Sequential(nn.ReLU(), nn.BatchNorm2d(192),
                                        nn.Conv2d(in_channels=192,out_channels=1024,kernel_size=3,padding=1),
                                        nn.PixelShuffle(2), 
                                        nn.Conv2d(in_channels=256,out_channels=64,kernel_size=3, padding=1))
        
        
        self.last = nn.Conv2d(in_channels=192,out_channels=3,kernel_size=1)

    def forward(self,x):
        model_output = []
        #print("x.shape: ",x.shape) # torch.Size([8, 3, 3, 224, 224])

        for i in range(x.size(1)): 
            x_new = x[:,i,:,:,:]
            #print("Input shape: ", x_new.shape)   # torch.Size([3, 3, 224, 224])    
            ### Pre-trained ResNet
            res_output = self.myResNet(x_new)
            
            #res_output = res_output
            r1,r2,r3,r4,r5 = res_output 
            #print(r1.shape)# r1.shape:  torch.Size([3, 64, 112, 112]), 
            # r2.shape:  torch.Size([3, 64, 56, 56])            
            #r3.shape:  torch.Size([3, 128, 28, 28])
            #r4.shape:  torch.Size([3, 256, 14, 14])
            #r5.shape:  torch.Size([3, 512, 7, 7])
            
            ### Convolutional GRU
            cg1 = intermediate_phase(r1,False) 
            cg2 = intermediate_phase(r2,False) 
            cg3 = intermediate_phase(r3) 
            cg4 = intermediate_phase(r4) 

            ### Decoder of the Network
            tail1 = self.refine_res_out(r5)
        
            concat_data = torch.cat([tail1,cg4],dim=1)
            tail2 = self.decoder(concat_data) 
        
            concat_data2 = torch.cat([tail2,cg3],dim=1)     
            tail3 = self.decoder(concat_data2) 
        
            concat_data3 = torch.cat([tail3,cg2],dim=1) 
            tail4 = self.decoder(concat_data3) 
  
            concat_data4 = torch.cat([tail4,cg1], dim=1)       
            out = self.last(concat_data4)

            model_output.append(out) 
        
        prediction = torch.stack(model_output,1)
        

        return prediction



## UTILITIES



##### Loss Functions


In [15]:
#######     SSIM Loss    #######
def gaussian(window_size, sigma):
    gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)])
    return gauss/gauss.sum()

def create_window(window_size, channel):
    _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
    window = torch.Tensor(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
    return window

def _ssim(img1, img2, window, window_size, channel, size_average = True):
    mu1 = F.conv2d(img1, window, padding = window_size//2, groups = channel)
    mu2 = F.conv2d(img2, window, padding = window_size//2, groups = channel)

    mu1_sq = mu1.pow(2)
    mu2_sq = mu2.pow(2)
    mu1_mu2 = mu1*mu2

    sigma1_sq = F.conv2d(img1*img1, window, padding = window_size//2, groups = channel) - mu1_sq
    sigma2_sq = F.conv2d(img2*img2, window, padding = window_size//2, groups = channel) - mu2_sq
    sigma12 = F.conv2d(img1*img2, window, padding = window_size//2, groups = channel) - mu1_mu2

    C1 = 0.01**2
    C2 = 0.03**2

    ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2))

    if size_average:
        return ssim_map.mean()
    else:
        return ssim_map.mean(1).mean(1).mean(1)

class SSIM(torch.nn.Module):
    def __init__(self, window_size = 11, size_average = True):
        super(SSIM, self).__init__()
        self.window_size = window_size
        self.size_average = size_average
        self.channel = 1
        self.window = create_window(window_size, self.channel)

    def forward(self, img1, img2):
        (_, channel, _, _) = img1.size()

        if channel == self.channel and self.window.data.type() == img1.data.type():
            window = self.window
        else:
            window = create_window(self.window_size, channel)
            
            if img1.is_cuda:
                window = window.cuda(img1.get_device())
            window = window.type_as(img1)
            
            self.window = window
            self.channel = channel


        return _ssim(img1, img2, window, self.window_size, channel, self.size_average)

def ssim(img1, img2, window_size = 11, size_average = True):
    (_, channel, _, _) = img1.size()
    window = create_window(window_size, channel)
    
    if img1.is_cuda:
        window = window.cuda(img1.get_device())
    window = window.type_as(img1)
    
    return _ssim(img1, img2, window, window_size, channel, size_average)


########        DSSIM Loss      ###########

def dssim_loss(ground_truth, prediction): 
    dssim_val=0
    ssim_loss = SSIM(window_size=11)
    ## For Loop over every Batch
    batch_size = ground_truth.size(0)
    for i in range(batch_size):
        ssim_val = ssim_loss(ground_truth[i,:,:,:,:], prediction[i,:,:,:,:])
        dssim_loss = ((1-ssim_val)/2)
        dssim_val += dssim_loss
    dssim_val /= batch_size

    return dssim_val

def loss_manager(ground_truth,prediction,l1_weight=1, l2_weight=1, dssim_weight=1):
    l1_loss = nn.L1Loss()
    l1_loss_val = l1_loss(ground_truth,prediction) 
    l1_weighted_loss = l1_loss_val * l1_weight

    l2_loss = nn.MSELoss()
    l2_loss_val = l2_loss(ground_truth,prediction) 
    l2_weighted_loss = l2_loss_val * l2_weight

    dssim_loss_val = dssim_loss(ground_truth, prediction) 
    dssim_weighted_loss = dssim_loss_val * dssim_weight

    total_loss = l1_weighted_loss + l2_weighted_loss + dssim_weighted_loss

    loss_dict = {'l1_loss': l1_loss_val.item(), 'l2_loss': l2_loss_val.item(),
                 'dssim_loss':dssim_loss_val.item()}

    return total_loss , loss_dict


## Functions :- train and test

In [16]:
def train(model,optimizer,training_data ,epoch):
    model.train()
    train_loss_f_path="/content/drive/My Drive/CudaLAB/P R O J E C T/loss/train_loss02.txt"
    loss_dict_keys = ['l1_loss','l2_loss','dssim_loss']
    train_sep_loss = {key:[] for key in loss_dict_keys}
    train_epoch_loss=[]

    for i in range(epoch):
        total = len(training_data.dataset)
        total_loss = 0
        for batch_idx,(clips,label) in enumerate(training_data):
            optimizer.zero_grad()
            clips = clips.to(device)            
            #print(len(clips)) # batch_size -> 8
            clips_input = clips[:,0:3,:,:,:] ## Take first 3 frames as input
            #print("clips_input.shape: ",clips_input.shape) # torch.Size([8, 3, 3, 224, 224])
            clips_truth = clips[:,1:4,:,:,:] ## Select last 3 frames as ground truth to compare
            #print("clips_truth.shape: ", clips_truth.shape) # torch.Size([8, 3, 3, 224, 224])
            ground_truth =gt_resizer(clips_truth).to(device)
            
            out = our_model(clips_input)
            loss,loss_dict = loss_manager(ground_truth,out,l1_weight=1, l2_weight=0.5, dssim_weight=2)

            ## Append the 'train_sep_loss' dict
            train_sep_loss['l1_loss'].append(loss_dict['l1_loss'])
            train_sep_loss['l2_loss'].append(loss_dict['l2_loss'])
            train_sep_loss['dssim_loss'].append(loss_dict['dssim_loss'])


            total_loss += loss.item()
            loss.backward()
            optimizer.step()
            
            if (batch_idx% 100==0):
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                i, batch_idx, int(total/len(clips)),
                100. * batch_idx / len(training_data), loss.item() / len(clips)))
        
        epoch_loss = total_loss / batch_idx
        print("Epoch: {}, Loss: {:6f}".format(i, epoch_loss))  
        train_epoch_loss.append(epoch_loss)
        
        ## Save Loss for Plotting in a txt file [in "append" mode]
        with open(train_loss_f_path,'a') as f:
            f.write(str(epoch_loss)+ '\n')

        ## Save Checkpoint
        model_saving_path = os.path.join("/content/drive/My Drive/CudaLAB/P R O J E C T/new_saved_models","model_0_%d.pth" % (i+22))
        torch.save({'epoch': epoch, 'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),'loss': loss.item() / len(clips),
                    }, model_saving_path)

    return train_epoch_loss, train_sep_loss

def test(model, testloader):
    model.eval()
    total = len(testloader.dataset)
    print("Total Number of examples in Test Dataset: ", total)
    loss_dict_keys = ['l1_loss','l2_loss','dssim_loss']
    test_sep_loss = {key:[] for key in loss_dict_keys}
    total_loss = 0
    with torch.no_grad():
        for batch_idx,data in enumerate(testloader):
            clips, labels = data
            clips = clips.to(device)  
            clips_input = clips[:,0:3,:,:,:] ## Take first 3 frames as input
            #print("clips_input.shape: ",clips_input.shape)
            clips_truth = clips[:,1:4,:,:,:] ## Select last 3 frames as ground truth to compare
            #print("clips_truth.shape: ", clips_truth.shape)
            ground_truth =gt_resizer(clips_truth).to(device)
            predicted_clip = our_model(clips_input)
            test_loss,test_loss_dict = loss_manager(ground_truth,predicted_clip,l1_weight=1, l2_weight=0.5, dssim_weight=2)
            ## Append the 'test_sep_loss' dict
            test_sep_loss['l1_loss'].append(test_loss_dict['l1_loss'])
            test_sep_loss['l2_loss'].append(test_loss_dict['l2_loss'])
            test_sep_loss['dssim_loss'].append(test_loss_dict['dssim_loss'])
            
            total_loss += test_loss.item()
            
            if (batch_idx ==73):
                gen_gif(ground_truth,batch_idx)
                gen_gif(predicted_clip,batch_idx,True)
                print("Now you can terminate the program")
            
        test_all_loss=total_loss / batch_idx
    
    return test_all_loss,test_sep_loss

   

## Create DataLoaders

In [17]:
train_transformations = t.Compose([t.ToPILImage(),
                             t.Resize((224,224)),
                             t.RandomHorizontalFlip(),
                             t.ToTensor(),
                             t.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

test_transformations = t.Compose([t.ToPILImage(),
                             t.Resize((224,224)),
                             t.ToTensor(),
                             t.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

data_dir = "/content/drive/My Drive/CudaLAB/P R O J E C T/UCF-101"
annotation_dir = "/content/drive/My Drive/CudaLAB/P R O J E C T/data/ucfLabel"
bs = 16
train_dataset = UCF101Dataset(data_dir,annotation_dir,4,transforms_=train_transformations)
train_dataloader = DataLoader(train_dataset, batch_size=bs,num_workers=32)
test_dataset = UCF101Dataset(data_dir,annotation_dir,4,train=False, transforms_=test_transformations)
test_dataloader = DataLoader(test_dataset, batch_size=bs,num_workers=32)


Use split1
Use split1


## Training 

In [None]:
'''
our_model = OurModel().to(device)
lr = 0.0001
optimizer = optim.Adam(our_model.parameters(),lr)
loss, loss_dict = train(our_model, optimizer, train_dataloader,epoch=5)
'''

#### Training from saved Checkpoints

In [None]:
saved_model_path= "/content/drive/My Drive/CudaLAB/P R O J E C T/new_saved_models/model_0_21.pth"

our_model = OurModel().to(device)
lr = 0.0001
optimizer = optim.Adam(our_model.parameters(),lr)
checkpoint = torch.load(saved_model_path)
our_model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

train_loss, train_loss_dict = train(our_model, optimizer, train_dataloader,epoch=5)

Epoch: 0, Loss: 1.390611
Epoch: 1, Loss: 1.381736
Epoch: 2, Loss: 1.383765
Epoch: 3, Loss: 1.376829
Epoch: 4, Loss: 1.375644


In [None]:
print(train_loss)


[1.3906108499973413, 1.3817360404153798, 1.383764588392821, 1.3768290333299829, 1.375643541148845]


## Validation

#### Inference from saved Checkpoints

In [20]:
our_model = OurModel().to(device)
saved_model_path= "/content/drive/My Drive/CudaLAB/P R O J E C T/new_saved_models/model_0_26.pth"
checkpoint = torch.load(saved_model_path)
our_model.load_state_dict(checkpoint['model_state_dict'])
test_all_loss,test_sep_loss = test(our_model,test_dataloader)

Total Number of examples in Test Dataset:  3783
torch.Size([16, 3, 3, 112, 112])
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
torch.Size([16, 3, 3, 112, 112])
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
GIF has been saved.
Now you can terminate the program


In [22]:
mean_sep_loss= {k: sum(v) / float(len(v)) for k, v in test_sep_loss.items()}
print("Average of each loss: ",mean_sep_loss)

print("Test Loss: ",test_all_loss)
print("Weighted Average of Test Loss: ", test_all_loss/3)

Average of each loss:  {'l1_loss': 0.9134166155183365, 'l2_loss': 1.1786907830821813, 'dssim_loss': 0.43543484467494337}
Test Loss:  2.383689466169325
Weighted Average of Test Loss:  0.794563155389775
