**Build Video Classification Model Using Deep Learning**




Import Necessary Libraries

In [None]:
import keras
from keras.models import Sequential
from keras.applications.vgg16 import VGG16
from keras.layers import Dense, InputLayer, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D
from PIL import Image
import math   # for mathematical operations
import matplotlib.pyplot as plt    # for plotting the images
%matplotlib inline
import pandas as pd
from keras.preprocessing import image   # for preprocessing the images
import numpy as np    # for mathematical operations
from keras.utils import np_utils
from skimage.transform import resize   # for resizing images
from sklearn.model_selection import train_test_split
from glob import glob
from tqdm import tqdm
import tensorflow as tf
import cv2
import math
import os
from scipy import stats as s

Connect to drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


storing the name of training videos in dataframe

In [None]:
# open the .txt file which have names of training videos
f = open("/content/drive/MyDrive/project2/trainlist.txt", "r")
temp = f.read()
videos = temp.split('\n')

# creating a dataframe having video names
train = pd.DataFrame()
train['video_name'] = videos

train = train[:-1]
train.head()

Unnamed: 0,video_name
0,BodyWeightSquats/v_BodyWeightSquats_g08_c01.avi 1
1,BodyWeightSquats/v_BodyWeightSquats_g08_c02.avi 1
2,BodyWeightSquats/v_BodyWeightSquats_g08_c03.avi 1
3,BodyWeightSquats/v_BodyWeightSquats_g08_c04.avi 1
4,BodyWeightSquats/v_BodyWeightSquats_g09_c01.avi 1


storing the name of testing videos in dataframe

In [None]:
# open the .txt file which have names of test videos
f = open("/content/drive/MyDrive/project2/testlist.txt", "r")
temp = f.read()
videos = temp.split('\n')

# creating a dataframe having video names
test = pd.DataFrame()
test['video_name'] = videos
test = test[:-1]
test.head()

Unnamed: 0,video_name
0,BodyWeightSquats/v_BodyWeightSquats_g01_c01.avi
1,BodyWeightSquats/v_BodyWeightSquats_g01_c02.avi
2,BodyWeightSquats/v_BodyWeightSquats_g01_c03.avi
3,BodyWeightSquats/v_BodyWeightSquats_g01_c04.avi
4,BodyWeightSquats/v_BodyWeightSquats_g02_c01.avi


Creating tags for both training and testing videos

In [None]:
# creating tags for training videos
train_video_tag = []
for i in range(train.shape[0]):
    train_video_tag.append(train['video_name'][i].split('/')[0])
    
train['tag'] = train_video_tag

# creating tags for test videos
test_video_tag = []
for i in range(test.shape[0]):
    test_video_tag.append(test['video_name'][i].split('/')[0])
    
test['tag'] = test_video_tag

In [None]:
# storing the frames from training videos
for i in tqdm(range(train.shape[0])):
    count = 0
    videoFile = train['video_name'][i]
    cap = cv2.VideoCapture('/content/drive/MyDrive/project2/Train/'+videoFile.split(' ')[0].split('/')[1])   # capturing the video from the given path
    frameRate = cap.get(5) #frame rate
    x=1
    while(cap.isOpened()):
        frameId = cap.get(1) #current frame number
        ret, frame = cap.read()
        if (ret != True):
            break
        if (frameId % math.floor(frameRate) == 0):
            # storing the frames in a new folder named train_1
            filename ='/content/drive/MyDrive/project2/train_1/' + videoFile.split('/')[1].split(' ')[0] +"_frame%d.jpg" % count;count+=1
            cv2.imwrite(filename, frame)
    cap.release()

100%|██████████| 185/185 [01:22<00:00,  2.24it/s]


In [None]:
# getting the names of all the images
images = glob("/content/drive/MyDrive/project2/train_1/*.jpg")
train_image = []
train_class = []

for i in tqdm(range(len(images))):
    # creating the image name
    train_image.append(images[i].split('/')[-1])
    # creating the class of image
    train_class.append(images[i].split('/')[-1].split('_')[1])
    
# storing the images and their class in a dataframe
train_data = pd.DataFrame()
train_data['image'] = train_image
train_data['class'] = train_class

# converting the dataframe into csv file 
train_data.to_csv('/content/drive/MyDrive/project2/train_new.csv',header=True, index=False)

100%|██████████| 897/897 [00:00<00:00, 523850.00it/s]


Training the video classification model

In [None]:
#read the .csv file contents first 5 row
train = pd.read_csv('/content/drive/MyDrive/project2/train_new.csv')
train.head()

Unnamed: 0,image,class
0,v_BodyWeightSquats_g08_c01.avi_frame0.jpg,BodyWeightSquats
1,v_BodyWeightSquats_g08_c01.avi_frame1.jpg,BodyWeightSquats
2,v_BodyWeightSquats_g08_c01.avi_frame2.jpg,BodyWeightSquats
3,v_BodyWeightSquats_g08_c01.avi_frame3.jpg,BodyWeightSquats
4,v_BodyWeightSquats_g08_c02.avi_frame0.jpg,BodyWeightSquats


storing earlier extracted frames as a NumPy array

In [None]:
# creating an empty list
train_image = []

# for loop to read and store frames
for i in tqdm(range(train.shape[0])):
    # loading the image and keeping the target size as (224,224,3)
    img = image.load_img('/content/drive/MyDrive/project2/train_1/'+train['image'][i], target_size=(224,224,3))
    # converting it to array
    img = image.img_to_array(img)
    # normalizing the pixel value
    img = img/255
    # appending the image to the train_image list
    train_image.append(img)
    
# converting the list to numpy array
X = np.array(train_image)

# shape of the array
X.shape

100%|██████████| 897/897 [00:05<00:00, 157.70it/s]


(897, 224, 224, 3)

creating validation set

In [None]:
# separating the target
y = train['class']

# creating the training and validation set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2, stratify = y)

In [None]:
# creating dummies of target variable for train and validation set
y_train = pd.get_dummies(y_train)
y_test = pd.get_dummies(y_test)

For this particular dataset, we will be using the VGG-16 pre-trained model. Let’s create a base model of the pre-trained model:

In [None]:
# creating the base model of pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


extract features from this pre trained model

In [None]:
# extracting features for training frames
X_train = base_model.predict(X_train)
X_train.shape

(717, 7, 7, 512)

In [None]:
# extracting features for validation frames
X_test = base_model.predict(X_test)
X_test.shape

(180, 7, 7, 512)

We will use a fully connected network now to fine-tune the model. This fully connected network takes input in single dimension. So, we will reshape the images into a single dimension

In [None]:
# reshaping the training as well as validation frames in single dimension
X_train = X_train.reshape(717, 7*7*512)
X_test = X_test.reshape(180, 7*7*512)

normalize the pixel values, i.e., keep the pixel values between 0 and 1. This helps the model to converge faster.

In [None]:
# normalizing the pixel values
max = X_train.max()
X_train = X_train/max
X_test = X_test/max

In [None]:
# shape of images
X_train.shape

(717, 25088)

Creating architecture

In [None]:
#defining the model architecture
model = Sequential()
model.add(Dense(1024, activation='relu', input_shape=(25088,)))
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

Build function to save the weights of the model

In [None]:
# defining a function to save the weights of best model
from keras.callbacks import ModelCheckpoint
mcp_save = ModelCheckpoint('weight.hdf5', save_best_only=True, monitor='val_loss', mode='min')

In [None]:
# compiling the model
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])


In [None]:
# training the model
model.fit(X_train, y_train, epochs=200, validation_data=(X_test, y_test), callbacks=[mcp_save], batch_size=128)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x7f94f29b07d0>

Evaluating our Video Classification Model

In [None]:
base_model = VGG16(weights='imagenet', include_top=False)

his is the pre-trained model and we will fine-tune it

In [None]:
#defining the model architecture
model = Sequential()
model.add(Dense(1024, activation='relu', input_shape=(25088,)))
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

In [None]:
# compiling the model
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'], run_eagerly=True)

Creating the test data

In [None]:
# getting the test list
f = open("/content/drive/MyDrive/project2/testlist.txt", "r")
temp = f.read()
videos = temp.split('\n')

# creating the dataframe
test = pd.DataFrame()
test['video_name'] = videos
test = test[:-1]
test_videos = test['video_name']
test.head()

Unnamed: 0,video_name
0,BodyWeightSquats/v_BodyWeightSquats_g01_c01.avi
1,BodyWeightSquats/v_BodyWeightSquats_g01_c02.avi
2,BodyWeightSquats/v_BodyWeightSquats_g01_c03.avi
3,BodyWeightSquats/v_BodyWeightSquats_g01_c04.avi
4,BodyWeightSquats/v_BodyWeightSquats_g02_c01.avi


In [None]:
# creating the tags
train = pd.read_csv('/content/drive/MyDrive/Project/train_new.csv')
y = train['class']
y = pd.get_dummies(y)

Generating predictions for test videos

In [None]:
# creating two lists to store predicted and actual tags
predict = []
actual = []

# for loop to extract frames from each test video
for i in tqdm(range(test_videos.shape[0])):
    count = 0
    videoFile = test_videos[i]
    cap = cv2.VideoCapture('/content/drive/MyDrive/project2/Test/'+videoFile.split(' ')[0].split('/')[1])   # capturing the video from the given path
    frameRate = cap.get(5) #frame rate
    x=1
    # removing all other files from the temp folder
    files = glob('/content/drive/MyDrive/Project/temp/*')
    for f in files:
        os.remove(f)
    while(cap.isOpened()):
        frameId = cap.get(1) #current frame number
        ret, frame = cap.read()
        if (ret != True):
            break
        if (frameId % math.floor(frameRate) == 0):
            # storing the frames of this particular video in temp folder
            filename ='/content/drive/MyDrive/Project/temp/' + "_frame%d.jpg" % count;count+=1
            cv2.imwrite(filename, frame)
    cap.release()
    
    # reading all the frames from temp folder
    images = glob("/content/drive/MyDrive/Project/temp/*.jpg")
    
    prediction_images = []
    for i in range(len(images)):
        img = image.load_img(images[i], target_size=(224,224,3))
        img = image.img_to_array(img)
        img = img/255
        prediction_images.append(img)
        
    # converting all the frames for a test video into numpy array
    prediction_images = np.array(prediction_images)
    # extracting features using pre-trained model
    prediction_images = base_model.predict(prediction_images)
    # converting features in one dimensional array
    prediction_images = prediction_images.reshape(prediction_images.shape[0], 7*7*512)
    # predicting tags for each array
    #prediction = model.predict_classes(prediction_images) 
    prediction = (model.predict(prediction_images) > 0.5).astype("int32")
    # appending the mode of predictions in predict list to assign the tag to the video
    predict.append(y.columns.values[s.mode(prediction)[0][0]])
    # appending the actual tag of the video
    actual.append(videoFile.split('/')[1].split('_')[1])


100%|██████████| 66/66 [03:28<00:00,  3.16s/it]


Evaluating the model

In [None]:
# checking the accuracy of the predicted tags
from sklearn.metrics import accuracy_score
accuracy_score(predict, actual)*100
