In [1]:
import torch
import numpy as np
import torchvision.models as models
import Custom as CustomDataset
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.autograd import Variable

"""
Do not run this code block without cuda()

Load in vgg16, 
"""
vgg16 = models.vgg16(pretrained=True).cuda()
#vgg16 = models.vgg16(pretrained=True)
customDataloader = CustomDataset.main() #for dataloader

for param in vgg16.parameters():
    param.requires_grad = False
    # Replace the last fully-connected layer
    # Parameters of newly constructed modules have requires_grad=True by default
vgg16_fcn = vgg16.features 
vgg16_fcn.cuda()




~~~| Custom.py Execution |~~~
Loaded dataset
~~~| Custom.py Complete |~~~



Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(

In [2]:
def getDataPoint(element, index):
    scores = []
    for frame in range(len(element['video'][index])):
        video = element['video'][index]
        #plt.imshow(video[frame].numpy())
        #plt.show(block=False)
        
        #print(data['video'][seg][frame].shape)
        #print(element['scores'][frame][index])
        scores.append(int(element['scores'][frame][index]))
    points = np.zeros((7,3), dtype=int)
    frames = np.add(*np.indices((7, 3)))
    if (1 in scores):
        boundary = scores.index(1)
        for i in range(7):
            for j in range(3):
                frames[i][j] = i + j
                if (i + j == boundary):
                    points[i][j] = 1
    return points.tolist(), frames.tolist(), video, index

def parseVideoMatrix(vid, positions):
    vid = vid.numpy()
    newData = np.zeros((7, 3, 3, 224,224))
    count = 0
    for i in positions:
        frames = np.zeros((3, 3, 224, 224))
        for j in range(len(i)):
            #Models expect 3xHxW
            #Current format before swaps, WxHx3
            frames[j,...] = np.swapaxes(vid[i[j]], 0, 2)
        newData[count,...] = frames
        count += 1
    return newData
    
def parseViewVids(vid, positions):
    vid = vid.numpy()
    newData = np.zeros((7, 3, 224, 224,3))
    count = 0
    for i in positions:
        frames = np.zeros((3, 224, 224, 3))
        for j in range(len(i)):
            #Models expect 3xHxW
            #Current format before swaps, WxHx3
            frames[j,...] = vid[i[j]]
        newData[count,...] = frames
        count += 1
    return newData

def viewVideo(vid):
    for i in vid:
        for j in i:
            plt.imshow(j)
            plt.show(block=False)
            

"""
Reading in data to get single batch for training
"""
for batch_i, data in enumerate(customDataloader): 
        for i in range(len(data['video'])):
            currSegment = data['video'][i]
            showFrame = data['video'][i][0]
            #plt.imshow(showFrame.numpy())
            #plt.show(block=False)
            
            batchData = data
        break
        

In [6]:
scoreList, frameNums, vidData, index = getDataPoint(batchData, 3)
dataItem = parseVideoMatrix(vidData, frameNums)

#Can be commented out if not wanting to view
#viewItem = parseViewVids(vidData, frameNums)
#viewVideo(viewItem)
#print(scoreList)
#print(frameNums)

dataItem = np.asarray(dataItem)
dataItem = torch.tensor(dataItem)
"""
for elem in dataItem:
    out = vgg16_fcn(elem.float().cuda())
    print(out.shape)
"""
batchOne = dataItem[0:4]
scoreOnes = np.squeeze(scoreList[0:4])
batchTwo = dataItem[3:]
scoreTwo = np.squeeze(scoreList[3:])
print(batchOne.shape)
print(scoreOnes)
print(batchTwo.shape)
print(scoreTwo)

torch.Size([4, 3, 3, 224, 224])
[[0 0 0]
 [0 0 0]
 [0 0 1]
 [0 1 0]]
torch.Size([4, 3, 3, 224, 224])
[[0 1 0]
 [1 0 0]
 [0 0 0]
 [0 0 0]]


In [50]:
"""
import torch.nn as nn
from torch.nn.parameter import Parameter
class MyModel(nn.Module):
    def __init__(self, inputDim, outputDim):
        super(MyModel, self).__init__()
        vgg16 = models.vgg16(pretrained=True)
        #vgg16 = models.vgg16(pretrained=True).cuda()
        for param in vgg16.parameters():
            param.requires_grad = False
            # Replace the last fully-connected layer
            # Parameters of newly constructed modules have requires_grad=True by default
        self.vgg16_fcn = vgg16.features
        #self.vgg16_fcn.cuda()
        self.lstm = torch.nn.LSTM(inputDim, outputDim, 1, True, True, 0.5);
        self.fc = nn.Linear(outputDim, 1)
        self.flatten_parameters()
    
    def flatten_parameters(self):
        self.lstm.flatten_parameters()
        
    def forward(self, inDim, x):
        newBatch = []
        for t in range(len(x[1])):
            newBatch.append(self.vgg16_fcn(x[:,t,:,:,:].float()))
        grad = False
        
        #4 x 3 x (512 x 7 x 7)
        vggOut = torch.stack(newBatch, 1).detach_()
        print(vggOut.shape)
        
        #4 x 3 x 25088
        test = vggOut.view((4,3,-1))
        print(test.shape)
        
        #Output from LSTM 4 x 3 x 256
        lstmOut, _ = self.lstm(test)
        print(lstmOut.shape)
        
        #Output from Fully Connected Layer 4 x 3 x 1
        fcOut = self.fc(lstmOut)
        #print(fcOut)
        return fcOut
                
                
model = MyModel(512*7*7, 256)
print(batchOne.shape)
print(batchTwo.shape)
print(batchOne[:,0,:,:,:].shape)

out = np.squeeze(model(512*7*7, batchOne))
print(out.shape)
print(out)
out2 = np.squeeze(model(512*7*7, batchTwo))
print(out2.shape)
print(out2)
"""
        

  "num_layers={}".format(dropout, num_layers))


torch.Size([4, 3, 3, 224, 224])
torch.Size([4, 3, 3, 224, 224])
torch.Size([4, 3, 224, 224])
torch.Size([4, 3, 512, 7, 7])
torch.Size([4, 3, 25088])
torch.Size([4, 3, 256])
torch.Size([4, 3])
tensor([[ 0.1549,  0.2016,  0.1307],
        [ 0.1839,  0.1291,  0.1011],
        [ 0.1060,  0.0818, -0.0234],
        [ 0.0690, -0.0328, -0.2472]], grad_fn=<SqueezeBackward0>)
torch.Size([4, 3, 512, 7, 7])
torch.Size([4, 3, 25088])
torch.Size([4, 3, 256])
torch.Size([4, 3])
tensor([[ 0.0690, -0.0328, -0.2472],
        [-0.0291, -0.2410, -0.2251],
        [-0.2034, -0.2088, -0.1241],
        [-0.1392, -0.0747, -0.0169]], grad_fn=<SqueezeBackward0>)


In [9]:
"""
USING SIGMOID
"""

import torch.nn as nn
from torch.nn.parameter import Parameter
class MyModel(nn.Module):
    def __init__(self, inputDim, outputDim):
        super(MyModel, self).__init__()
        vgg16 = models.vgg16(pretrained=True)
        #vgg16 = models.vgg16(pretrained=True).cuda()
        for param in vgg16.parameters():
            param.requires_grad = False
            # Replace the last fully-connected layer
            # Parameters of newly constructed modules have requires_grad=True by default
        self.vgg16_fcn = vgg16.features
        #self.vgg16_fcn.cuda()
        self.lstm = torch.nn.LSTM(inputDim, outputDim, 1, True, True, 0.5);
        self.fc = nn.Linear(outputDim, 1)
        self.flatten_parameters()
        self.sigmoid = nn.Sigmoid()
    
    def flatten_parameters(self):
        self.lstm.flatten_parameters()
        
    def forward(self, inDim, x):
        newBatch = []
        for t in range(len(x[1])):
            newBatch.append(self.vgg16_fcn(x[:,t,:,:,:].float()))
        grad = False
        
        #4 x 3 x (512 x 7 x 7)
        vggOut = torch.stack(newBatch, 1).detach_()
        print(vggOut.shape)
        
        #4 x 3 x 25088
        test = vggOut.view((4,3,-1))
        print(test.shape)
        
        #Output from LSTM 4 x 3 x 256
        lstmOut, _ = self.lstm(test)
        print(lstmOut.shape)
        
        #Output from Fully Connected Layer 4 x 3 x 1
        #fcOut = self.fc(lstmOut)
        #print(fcOut)
        #return fcOut
        
        sigOut = self.sigmoid(self.fc(lstmOut))
        print(sigOut)
        return sigOut

            

In [8]:
class LossFunction(nn.Module):
    def __init__(self):
        super(LossFunction, self).__init__()
        loss = nn.BCELoss()
    def forward(self, estimate, groundTruth):
        return loss(estimate, groundTruth)


        

In [12]:
model = MyModel(512*7*7, 256)
#print(batchOne.shape)
#print(batchTwo.shape)
data = [batchOne, batchTwo]
#Ground Truth
scoreOnes = np.squeeze(scoreList[0:4])
scoreTwo = np.squeeze(scoreList[3:])
GT = [scoreOnes, scoreTwo]

loss = LossFunction()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

num_epochs = 100
for epoch in range(num_epochs):
    for i in range(len(data)):
        out = np.squeeze(model(512*7*7, data[i]))
        error = loss(out, GT[i])
        optimizer.zero_grad()
        error.backward()        
        optimizer.step()
    if epoch % 5 == 0:
        print('epoch %d:' % epoch, error.item())

torch.Size([4, 3, 512, 7, 7])
torch.Size([4, 3, 25088])
torch.Size([4, 3, 256])
tensor([[[0.5313],
         [0.5442],
         [0.5598]],

        [[0.5347],
         [0.5545],
         [0.5751]],

        [[0.5419],
         [0.5707],
         [0.5713]],

        [[0.5597],
         [0.5624],
         [0.5556]]], grad_fn=<SigmoidBackward>)


RecursionError: maximum recursion depth exceeded while calling a Python object