In [1]:
import torch
import os
from models.FrameNet import MultiResFrameNet, FrameNet
from data import VideoDataset, FrameDataset
from torchvision import transforms
import pandas as pd

In [14]:
config = {
        "n_classes": 5,
        "batch_size": 1024,
        "lr": 1e-3,
        "gradient_clip_val": 0.5,
        "num_epochs": 50,
        "cnn1_in": 3,
        "cnn2_in": 96,
        "cnn3_in": 256,
        "cnn4_in": 384,
        "cnn5_in": 384,
        "cnn5_out": 256,
        "linear_in": 4096,
        "dropout": 0.5,
        "kernel_size": 3,
    }

In [13]:
x = torch.rand(1,3, 224, 224)
cnn = torch.nn.Sequential( torch.nn.Conv2d(3, 32, kernel_size=5, stride=3), torch.nn.MaxPool2d(2), torch.nn.Conv2d(32, 64, kernel_size=5), torch.nn.MaxPool2d(2), torch.nn.Conv2d(64, 128, kernel_size=3))
cnn(x).shape

torch.Size([1, 128, 14, 14])

In [15]:
FrameNet(config)

FrameNet(
  (cnn): ConvLayer(
    (conv1): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4), padding=(1, 1))
    (batchnorm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (batchnorm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv4): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv5): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pooling): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (classifier): Classifier(
    (fc_net): Sequential(
      (0): Dropout(p=0.5, inplace=False)
      (1): Linear(in_features=4096, out_features=5, bias=True)
    )
  )
)

In [5]:
x = torch.rand(1,3,178, 178)
cnn = FrameNet(config)

In [6]:
cnn(x)

tensor([[ 0.1673, -0.0653, -0.1619, -0.0608,  0.3181]],
       grad_fn=<AddmmBackward0>)

In [2]:
data_transforms = transforms.Compose([
        transforms.ToTensor(),\
        transforms.Resize(size=(170, 170), interpolation=transforms.functional.InterpolationMode.NEAREST),\
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
         ])

In [5]:
test_file = '/home/gaurangajitk/DL/data/sports-video-data/test_images.csv'
testset = FrameDataset(pd.read_csv(test_file, usecols=['frame', 'label']), data_transforms)
test_loader = torch.utils.data.DataLoader(testset, batch_size=config['batch_size'], shuffle=True, num_workers=1)

In [6]:
first = next(iter(test_loader))
first[0].shape

torch.Size([1024, 3, 170, 170])

In [5]:
video_file = '/home/gaurangajitk/DL/data/sports-video-data/test_videos.csv'
videoset = VideoDataset(pd.read_csv(video_file, usecols=['video', 'label']), data_transforms)
video_loader = torch.utils.data.DataLoader(videoset, batch_size=1, shuffle=True, num_workers=1)

In [6]:
first = next(iter(video_loader))

In [7]:
image = first[0][0]
image.shape

torch.Size([3, 170, 170])

In [9]:
data_transforms = transforms.Compose([transforms.CenterCrop(80)])
image = data_transforms(image)
image.shape

torch.Size([3, 80, 80])