In [1]:
import torch
import torchvision
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import matplotlib.pyplot as plt
from torch.utils.data.sampler import SubsetRandomSampler
import math
import UNet

# Network Details:
![Architecture](img/Arch.png)
## <font color='red' >Flow Computation Network:</font>
* U-Net Architecture (in_channels = 6, out_Channels = 4)
* input I0 , I1
* output F0->1 , F1->0
* taking two input images I0 and I1, to jointly predict the forward optical flow F0→1 and backward optical          flow F1→0 between them.
        
## <font color='red' >Arbitary-time flow interpolation:</font>
* U-Net Architecture (in_channels = 20, out_Channels = 5)
* inputs I1 , g(I1,Ft->1) , Ft->1, ft->0 , g(I0,Ft->0) , I0
* outputs I1 , Vt<-1 , ▲Ft->1 , ▲Ft->0 , Vt<-0 , I0

### I(t) is computed from Arbitart-time flow interpolation outputs

# <font color='red' >Loss Function:</font>

## <center><font color='blue' > L = λr lr + λp lp + λw lw + λs ls </font></center>
* lr: Reconstruction loss to model how good the reconstruction of the intermediate frames
* lp: Perceptual loss to preserve details of the predictions, and make interpolated frames sharper
* lw: Wraping loss to model quality of computed optical flow
* ls: Smoothness loss to encourage neighbboring pixels to have similir flow values
* λr = 0.8 , λp = 0.005 , λw = 0.4 , λs = 1 

    

In [4]:
#Global Variables
batch_size = 6
mean = [0.429, 0.431, 0.397]
std  = [1, 1, 1]
data_transform = transforms.Compose([    
        transforms.ToTensor(),
        transforms.Normalize(mean,std)
    ])
#train_dataset = 
#validation_dataset = 
#test_dataset = 
#train_dataloader = torch.utils.data.DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
#validation_dataloader = torch.utils.data.DataLoader(validation_dataset,batch_size=batch_size,shuffle=True)
#test_dataloader = torch.utils.data.DataLoader(test_dataset,batch_size=batch_size,shuffle=True)

In [2]:
flow_model = UNet.UNet(6,4)
arb_time_flow = UNet.UNet(20,5)

In [3]:
recon_loss = nn.L1Loss()
percep_loss = nn.MSELoss()
#loading Vgg16's conv_4_3 to use in loss calculation
vgg16_model = torchvision.models.vgg16()#(pretrained=True)
vgg16_conv_4_3 = nn.Sequential(*list(vgg16_model.children())[0][:22])
for parameter in vgg16_conv_4_3.parameters():
    parameter.requires_grad = False
vgg16_conv_4_3

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(

In [None]:
#np.linspace(0.125, 0.875, 7)
def get_intermediate_flow(F0_1,F1_0,weigths,index):
    Ft_0 = (-(1-weigths[index])*weigths[index]*F0_1) + (math.pow(weigths[index],2)  * F1_0)
    Ft_1 = ( math.pow(1-weigths[index],2) * F0_1) - ( (1-weigths[index]) * weigths[index] * F1_0)
    return Ft_0, Ft_1
def get_intermediate_image():
    return 