In [11]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
from matplotlib import pyplot as plt
import cv2



filter = np.array(
        [[1, 0, -1],
        [1, 0, -1],
        [1, 0, -1]]
        )




cap = cv2.VideoCapture("D:\\Dataset\\archive\\UCF101\\UCF-101\\ApplyLipstick\\v_ApplyLipstick_g01_c02.avi")
frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))



for frame in range(frames):
    print(frame)
    ret, img = cap.read()
    filtered_image = np.full((img.shape[0] - 2, img.shape[1] - 2, 3), 0).astype(int)
    
    if not ret:
        break

    else:
        if frame % 100 == 0:
            for row in range(img.shape[0]): # stride is 1
                for col in range(img.shape[1]): # stride is 1
                    if row > 2 and col > 2:
                        # Apply filter on first channel of image
                        filtered_image[row - 2:row, col - 2:col, :] =  np.sum(img[row -3 :row, col - 3:col, 0] * filter).astype(int)              
            plt.imshow(filtered_image, vmin=filtered_image.min(), vmax=filtered_image.max())
            plt.show()
    



In [8]:
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        # Initialize super class
        super(CNN, self).__init__()

        # Define architecture of CNN
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=5, kernel_size=3, padding=1) 
        self.conv2 = nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3, padding=1) 
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(in_channels=10, out_channels=20, kernel_size=5, padding=2)
        self.conv4 = nn.Conv2d(in_channels=20, out_channels=30, kernel_size=5, padding=2)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Classifier, Input are 30 Feature maps with 128 x 128 values
        self.fc1 = nn.Linear(30 * 128 * 128, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 8)

    def forward(self, x):
        # Feature extraction
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))

        # Flatten and feed to classifier
        x = x.flatten(start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    

In [9]:
device = torch.device("cuda:0")
net = CNN()
net = net.to(device)
net

CNN(
  (conv1): Conv2d(3, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv4): Conv2d(20, 30, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=491520, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=8, bias=True)
)