In [3]:
# Commonly used CV tools
import os
import cv2     # for capturing videos
import math   # for mathematical operations
import pickle
import matplotlib.pyplot as plt    # for plotting the images
%matplotlib inline
import pandas as pd
import numpy as np    # for mathematical operations
from skimage.transform import resize   # for resizing images
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import accuracy_score
from glob import glob
from tqdm import tqdm

# for model architectures
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
import torch.utils.data as data
import torchvision
from torch.autograd import Variable
from functions import *


### Read file names into the train dataframe

In [6]:
# open the .txt file which have names of training videos
f = open("trainlist01.txt", "r")
temp = f.read()
videos = temp.split('\n')

# creating a dataframe having video names
train = pd.DataFrame()
train['video_name'] = videos
train = train[:-1]
train.head()

Unnamed: 0,video_name
0,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi 1
1,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c02.avi 1
2,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c03.avi 1
3,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c04.avi 1
4,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c05.avi 1


### Get the tagnames from folder names

In [15]:
train_video_tag = []
for i in range(train.shape[0]):
    train_video_tag.append(train['video_name'][i].split('/')[0])

train['tag'] = train_video_tag
train.head()

Unnamed: 0,video_name,tag
0,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi 1,ApplyEyeMakeup
1,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c02.avi 1,ApplyEyeMakeup
2,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c03.avi 1,ApplyEyeMakeup
3,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c04.avi 1,ApplyEyeMakeup
4,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c05.avi 1,ApplyEyeMakeup


### Create test data frame and corresponding tags

In [14]:
# open the .txt file which have names of test videos
f = open("testlist01.txt", "r")
temp = f.read()
videos = temp.split('\n')

# creating a dataframe having video names
test = pd.DataFrame()
test['video_name'] = videos
test = test[:-1]

# creating tags for test videos
test_video_tag = []
for i in range(test.shape[0]):
    test_video_tag.append(test['video_name'][i].split('/')[0])
    
test['tag'] = test_video_tag
test.head()

Unnamed: 0,video_name,tag
0,ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c01.avi,ApplyEyeMakeup
1,ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c02.avi,ApplyEyeMakeup
2,ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c03.avi,ApplyEyeMakeup
3,ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c04.avi,ApplyEyeMakeup
4,ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c05.avi,ApplyEyeMakeup


## Create a new folder 'train_1' to contain extracted frames
use `cap.get( )` from `cv2` to get certain properties of the video capture

In [57]:
# storing the frames from training videos
for i in tqdm(range(train.shape[0])):
    count = 0
    videoFile = train['video_name'][i]
    cap = cv2.VideoCapture('UCF-101/'+videoFile.split(' ')[0])
    frameRate = cap.get(5) # get frames per second
    print(f'The video is taking at {frameRate} frames per second')
    
    while(cap.isOpened()):
        frameId = cap.get(1) # get current frame number
        ret, frame = cap.read()
        if(ret != True):
            break
        if (frameId % math.floor(frameRate) == 0):
            # storing the frames in a new folder named train_1
            filename = 'train_1/' + videoFile.split('/')[1].split(' ')[0] +"_frame%d.jpg" % count;count+=1
            cv2.imwrite(filename, frame)
    cap.release()

100%|██████████| 9537/9537 [06:39<00:00, 23.86it/s]


## Next, create a `.csv` file that contains paths to these images as well as their `class`

In [64]:
# get the names of all the images
images = glob('train_1/*.jpg')
train_image = []
train_class = []
for i in tqdm(range(len(images))):
    # create the image name
    train_image.append(images[i].split('/')[1])
    # create the class of this image, the activity name
    train_class.append(images[i].split('/')[1].split('_')[1])
    
# storing the images and their class in a dataframe
train_data = pd.DataFrame()
train_data['image'] = train_image
train_data['class'] = train_class

# save dataframe into `.csv` file
train_data.to_csv('UCF-101/train_new.csv', header = True, index=False)

100%|██████████| 73844/73844 [00:00<00:00, 600840.34it/s]


# Training most basic video classification model

## Here we will consider using the most basic architecure 3D-CNN with a very light base architecture: VGG-16
We have created our training image names are corresponding classes in a dataframe.
Now we just need to:
* Define model architecture
* Train and validate performance using unseen data
* Hyper-parameter tuning
* Upgrade model capability and repeat process for better accuracy