In [1]:
import os
import re
import math
import cv2
import pandas as pd
import numpy as np

from tqdm import tqdm
from glob import glob
from tools.settings import *
from keras.models import Sequential
from keras.preprocessing import image
from keras.layers import Dense, Dropout
from keras.applications.vgg16 import VGG16
from tensorflow.keras.utils import to_categorical
from tools.train_val_test_spliter import split

In [2]:
# To split dataset if already splits folder already exits no need to run it
split()

Splitting the ginen dataset into Train Test=0.3 Validation=0.2
Done


In [3]:
train = pd. read_csv(os.path.join(dataset_path, "train.csv"))
test = pd. read_csv(os.path.join(dataset_path, "test.csv"))
val = pd. read_csv(os.path.join(dataset_path, "val.csv"))

In [4]:
train.head()

Unnamed: 0,Video_url,action
0,H:/THESIS/dataset\slap/slap_130.mp4,slap
1,H:/THESIS/dataset\punch/punch_41.mp4,punch
2,H:/THESIS/dataset\kick/KICK_126.mp4,kick
3,H:/THESIS/dataset\kick/kick_168.mp4,kick
4,H:/THESIS/dataset\kick/KICK_155.mp4,kick


In [5]:
test.head()

Unnamed: 0,Video_url,action
0,H:/THESIS/dataset\slap/Slap_87.mp4,slap
1,H:/THESIS/dataset\kick/KICK_151.mp4,kick
2,H:/THESIS/dataset\punch/punch_183.mp4,punch
3,H:/THESIS/dataset\punch/punch_20.mp4,punch
4,H:/THESIS/dataset\punch/punch_155.mp4,punch


In [6]:
val.head()

Unnamed: 0,Video_url,action
0,H:/THESIS/dataset\punch/punch_163.mp4,punch
1,H:/THESIS/dataset\punch/punch_171.wmv,punch
2,H:/THESIS/dataset\slap/slap_16.mp4,slap
3,H:/THESIS/dataset\punch/punch_113.mp4,punch
4,H:/THESIS/dataset\kick/kick_97.mp4,kick


In [7]:
print(train.shape)
print(test.shape)
print(val.shape)

(283, 2)
(166, 2)
(111, 2)


In [8]:
def convert_to_frame(data, folder_name):
    '''
    Generated filenames format dataset_path/folder_name/video_name_frame{number}_action.jpg
    '''
    directory = os.path.join(dataset_path, folder_name)
    if not os.path.exists(directory):
        os.makedirs(directory)
    for i in tqdm(range(data.shape[0])):
        video_file = data['Video_url'][i]
        action = data['action'][i]
        video_name_list = video_file.split('/')[-1].split('.')
        video_name_list = video_name_list[:-1]
        video_name = ""
        for n in video_name_list:
            video_name += n
        # capturing the video from the given path
        capture = cv2.VideoCapture(video_file) 
        #frame rate
        frame_rate = capture.get(5)
        count = 0
        while(capture.isOpened()):
            #current frame number
            frame_id = capture.get(1) 
            read_correctly, frame = capture.read()
            if not read_correctly:
                break
            if (frame_id % math.floor(frame_rate) == 0):
                # storing the frames in a new folder named train_1
                filename = directory + "/" + video_name + "_frame{}_".format(count) + action +".jpg"
                count += 1
                cv2.imwrite(filename, frame)
        capture.release()
    print("Successfully Converted")

In [9]:
convert_to_frame(train, train_frames_path_name)

100%|████████████████████████████████████████████████████████████████████████████████| 283/283 [03:03<00:00,  1.54it/s]

Successfully Converted





In [10]:
convert_to_frame(val, val_frames_path_name)

100%|████████████████████████████████████████████████████████████████████████████████| 111/111 [01:13<00:00,  1.51it/s]

Successfully Converted





In [12]:
def create_paths_csv(directory, file_name):
    images = os.listdir(directory)
    images_path_list = []
    images_action_list = [] 
    for image in images:
        images_path_list.append(directory + image)
        images_action_list.append(image.split('.')[0].split('_')[-1])
    df = pd.DataFrame()
    df['image'] = images_path_list
    df['action'] = images_action_list
    print(os.path.join(dataset_path, file_name+'.csv'))
    df.to_csv(os.path.join(dataset_path, file_name+'.csv'), index=False)

In [13]:
create_paths_csv(train_frames_path, train_frames_path_name)

H:/THESIS/dataset\train_frames.csv


In [14]:
create_paths_csv(val_frames_path, val_frames_path_name)

H:/THESIS/dataset\val_frames.csv


In [15]:
train_image = pd.read_csv(os.path.join(dataset_path, 'train_frames.csv'))
train_image.head()

Unnamed: 0,image,action
0,H:/THESIS/dataset\train_frames\kick_02_frame0_...,kick
1,H:/THESIS/dataset\train_frames\kick_02_frame1_...,kick
2,H:/THESIS/dataset\train_frames\kick_04_frame0_...,kick
3,H:/THESIS/dataset\train_frames\kick_04_frame1_...,kick
4,H:/THESIS/dataset\train_frames\kick_04_frame2_...,kick


In [16]:
print(train_image.shape)

(1164, 2)


In [17]:
val_image = pd.read_csv(os.path.join(dataset_path, 'val_frames.csv'))
val_image.head()

Unnamed: 0,image,action
0,H:/THESIS/dataset\val_frames\kick_103_frame0_k...,kick
1,H:/THESIS/dataset\val_frames\kick_103_frame1_k...,kick
2,H:/THESIS/dataset\val_frames\kick_103_frame2_k...,kick
3,H:/THESIS/dataset\val_frames\kick_104_frame0_k...,kick
4,H:/THESIS/dataset\val_frames\kick_104_frame1_k...,kick


In [18]:
print(val_image.shape)

(479, 2)


In [19]:
action_values = list(train_image['action'].unique())
action_values

['kick', 'punch', 'slap']

In [20]:
def create_class_columns(df):
    for value in action_values:
        df[value] = np.where(df['action'].str.contains(value), 1, 0)
    df.drop('action', axis='columns', inplace=True)

In [21]:
create_class_columns(train_image)
train_image.head()

Unnamed: 0,image,kick,punch,slap
0,H:/THESIS/dataset\train_frames\kick_02_frame0_...,1,0,0
1,H:/THESIS/dataset\train_frames\kick_02_frame1_...,1,0,0
2,H:/THESIS/dataset\train_frames\kick_04_frame0_...,1,0,0
3,H:/THESIS/dataset\train_frames\kick_04_frame1_...,1,0,0
4,H:/THESIS/dataset\train_frames\kick_04_frame2_...,1,0,0


In [22]:
create_class_columns(val_image)
val_image.head()

Unnamed: 0,image,kick,punch,slap
0,H:/THESIS/dataset\val_frames\kick_103_frame0_k...,1,0,0
1,H:/THESIS/dataset\val_frames\kick_103_frame1_k...,1,0,0
2,H:/THESIS/dataset\val_frames\kick_103_frame2_k...,1,0,0
3,H:/THESIS/dataset\val_frames\kick_104_frame0_k...,1,0,0
4,H:/THESIS/dataset\val_frames\kick_104_frame1_k...,1,0,0


In [23]:
def convert_to_array_and_split(image_data):
    image_value = []
    for i in tqdm(range(image_data.shape[0])):
        img = image.load_img(image_data['image'][i], target_size=(224,224,3))
        img = image.img_to_array(img)
        # normalizing the pixel value
        img = img / 255
        image_value.append(img)

    X = np.array(image_value)
    y = image_data
    y.drop('image', axis='columns', inplace=True)
    return X, y

In [24]:
X_train, y_train = convert_to_array_and_split(train_image)
print(X_train.shape)

100%|██████████████████████████████████████████████████████████████████████████████| 1164/1164 [00:12<00:00, 90.71it/s]


(1164, 224, 224, 3)


In [25]:
X_val, y_val = convert_to_array_and_split(val_image)
print(X_val.shape)

100%|████████████████████████████████████████████████████████████████████████████████| 479/479 [00:05<00:00, 93.35it/s]


(479, 224, 224, 3)


In [26]:
y_train.head()

Unnamed: 0,kick,punch,slap
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0


In [27]:
y_val.head()

Unnamed: 0,kick,punch,slap
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0


In [28]:
'''This model was trained on a dataset that has 1,000 classes. 
include_top = False will remove the last layer of this model so that we can tune it as per our need.
'''
base_model = VGG16(weights='imagenet', include_top=False)

In [29]:
# extracting features for training frames
X_train = base_model.predict(X_train)
X_train.shape

(1164, 7, 7, 512)

In [30]:
X_val = base_model.predict(X_val)
X_val.shape

(479, 7, 7, 512)

In [31]:
# reshaping the training as well as validation frames in single dimension
X_train = X_train.reshape(1164, 7*7*512)
X_val = X_val.reshape(479, 7*7*512)

In [32]:
# normalizing the pixel values
max_pixel = X_train.max()
X_train = X_train / max_pixel
X_val = X_val / max_pixel
print(X_train.shape)
print(X_val.shape)

(1164, 25088)
(479, 25088)


In [33]:
# The input shape will be 25,088
model = Sequential()
model.add(Dense(1024, activation='relu', input_shape=(25088,)))
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1024)              25691136  
_________________________________________________________________
dropout (Dropout)            (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 128)               3

In [34]:
# defining a function to save the weights of best model
from keras.callbacks import ModelCheckpoint
mcp_weight = ModelCheckpoint('weight.hdf5', save_best_only=True, monitor='val_loss', mode='min')

In [35]:
model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), callbacks=[mcp_weight], batch_size=128)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x2398030ae50>

In [36]:
model.load_weights("weight.hdf5")
# model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1024)              25691136  
_________________________________________________________________
dropout (Dropout)            (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 128)               3

In [37]:
from scipy import stats as s
predict = []
actual = []
if not os.path.exists(test_frames_path):
    os.makedirs(test_frames_path)

for i in tqdm(range(test.shape[0])):
    video_file = test['Video_url'][i]
    action = test['action'][i]
    video_name_list = video_file.split('/')[-1].split('.')
    video_name_list = video_name_list[:-1]
    video_name = ""
    for n in video_name_list:
        video_name += n
    # capturing the video from the given path
    capture = cv2.VideoCapture(video_file) 
    #frame rate
    frame_rate = capture.get(5)
    count = 0
    files = glob(test_frames_path + '/*')
    #removing all files from folder
    for f in files:
        os.remove(f)
    while(capture.isOpened()):
        #current frame number
        frame_id = capture.get(1) 
        read_correctly, frame = capture.read()
        if not read_correctly:
            break
        if (frame_id % math.floor(frame_rate) == 0):
            # storing the frames in a new folder named train_1
            filename = test_frames_path + "/" + video_name + "_frame{}_".format(count) + action +".jpg"
            count += 1
            cv2.imwrite(filename, frame)
    capture.release()
    
    # reading all the frames from temp folder
    images = glob(test_frames_path + '/*.jpg')
    prediction_images = []
    for i in range(len(images)):
        img = image.load_img(images[i], target_size=(224,224,3))
        img = image.img_to_array(img)
        img = img / 255
        prediction_images.append(img)
        
    # converting all the frames for a test video into numpy array
    prediction_images = np.array(prediction_images)
    # extracting features using pre-trained model
    prediction_images = base_model.predict(prediction_images)
    # converting features in one dimensional array
    prediction_images = prediction_images.reshape(prediction_images.shape[0], 7*7*512)
    # predicting tags for each array
    prediction = np.argmax(model.predict(prediction_images), axis=-1)
    # appending the mode of predictions in predict list to assign the tag to the video
    predict.append(y_train.columns.values[s.mode(prediction)[0][0]])
    # appending the actual tag of the video
    actual.append(action)

100%|████████████████████████████████████████████████████████████████████████████████| 166/166 [03:18<00:00,  1.20s/it]


In [38]:
y_train.columns.values[0]

'kick'

In [39]:
from sklearn.metrics import accuracy_score
accuracy_score(predict, actual)*100

69.27710843373494

In [63]:
# for i in range(0, len(predict)):
#     print(predict[i] + " " + actual[i])

In [None]:
#CNN Model