In [1]:
import os
import re
import math
import cv2
import pandas as pd
import numpy as np

from tqdm import tqdm
from glob import glob
from tools.settings import *
from keras.models import Sequential
from keras.preprocessing import image
from keras.layers import Dense, Dropout
from keras.applications.vgg16 import VGG16
from tensorflow.keras.utils import to_categorical
from tools.train_val_test_spliter import split

In [2]:
# To split dataset if already splits folder already exits no need to run it
split()

Splitting the ginen dataset into Train Test=0.3 Validation=0.2
Done


In [3]:
train = pd. read_csv(os.path.join(dataset_path, "train.csv"))
test = pd. read_csv(os.path.join(dataset_path, "test.csv"))
val = pd. read_csv(os.path.join(dataset_path, "val.csv"))

In [4]:
train.head()

Unnamed: 0,Video_url,action
0,H:/THESIS/dataset\punch/punch_14.mp4,punch
1,H:/THESIS/dataset\kick/kick_117.mp4,kick
2,H:/THESIS/dataset\punch/punch_149.mp4,punch
3,H:/THESIS/dataset\kick/kick_164.mp4,kick
4,H:/THESIS/dataset\kick/KICK_127.mp4,kick


In [5]:
test.head()

Unnamed: 0,Video_url,action
0,H:/THESIS/dataset\slap/slap_15.mp4,slap
1,H:/THESIS/dataset\punch/punch_134.mp4,punch
2,H:/THESIS/dataset\punch/punch_47.mp4,punch
3,H:/THESIS/dataset\kick/kick_101.mp4,kick
4,H:/THESIS/dataset\kick/kick_180.wmv,kick


In [6]:
val.head()

Unnamed: 0,Video_url,action
0,H:/THESIS/dataset\punch/punch_195.mp4,punch
1,H:/THESIS/dataset\punch/Punch_87.mp4,punch
2,H:/THESIS/dataset\punch/Punch_67.mp4,punch
3,H:/THESIS/dataset\kick/kick_51.mp4,kick
4,H:/THESIS/dataset\slap/Slap_169.mp4,slap


In [7]:
print(train.shape)
print(test.shape)
print(val.shape)

(283, 2)
(166, 2)
(111, 2)


In [10]:
def convert_to_frame(data, folder_name):
    '''
    Generated filenames format dataset_path/folder_name/video_name_frame{number}_action.jpg
    '''
    directory = os.path.join(dataset_path, folder_name)
    if not os.path.exists(directory):
        os.makedirs(directory)
    for i in tqdm(range(data.shape[0])):
        video_file = data['Video_url'][i]
        action = data['action'][i]
        video_name_list = video_file.split('/')[-1].split('.')
        video_name_list = video_name_list[:-1]
        video_name = ""
        for n in video_name_list:
            video_name += n
        # capturing the video from the given path
        capture = cv2.VideoCapture(video_file) 
        #frame rate
        frame_rate = capture.get(5)
        count = 0
        while(capture.isOpened()):
            #current frame number
            frame_id = capture.get(1) 
            read_correctly, frame = capture.read()
            if not read_correctly:
                break
            if (frame_id % math.floor(frame_rate) == 0):
                # storing the frames in a new folder named train_1
                filename = directory + "/" + video_name + "_frame{}_".format(count) + action +".jpg"
                count += 1
                cv2.imwrite(filename, frame)
        capture.release()
    print("Successfully Converted")

In [11]:
convert_to_frame(train, train_frames_path_name)

100%|████████████████████████████████████████████████████████████████████████████████| 283/283 [02:53<00:00,  1.63it/s]

Successfully Converted





In [12]:
convert_to_frame(val, val_frames_path_name)

100%|████████████████████████████████████████████████████████████████████████████████| 111/111 [01:08<00:00,  1.62it/s]

Successfully Converted





In [13]:
def create_paths_csv(directory, file_name):
    images = os.listdir(directory)
    images_path_list = []
    images_action_list = [] 
    for image in images:
        images_path_list.append(directory + image)
        images_action_list.append(image.split('.')[0].split('_')[-1])
    df = pd.DataFrame()
    df['image'] = images_path_list
    df['action'] = images_action_list
    print(os.path.join(dataset_path, file_name+'.csv'))
    df.to_csv(os.path.join(dataset_path, file_name+'.csv'), index=False)

In [14]:
create_paths_csv(train_frames_path, train_frames_path_name)

H:/THESIS/dataset\train_frames.csv


In [15]:
create_paths_csv(val_frames_path, val_frames_path_name)

H:/THESIS/dataset\val_frames.csv


In [16]:
train_image = pd.read_csv(os.path.join(dataset_path, 'train_frames.csv'))
train_image.head()

Unnamed: 0,image,action
0,H:/THESIS/dataset\train_frames\kick_01_frame0_...,kick
1,H:/THESIS/dataset\train_frames\kick_01_frame1_...,kick
2,H:/THESIS/dataset\train_frames\kick_01_frame2_...,kick
3,H:/THESIS/dataset\train_frames\kick_01_frame3_...,kick
4,H:/THESIS/dataset\train_frames\kick_02_frame0_...,kick


In [17]:
print(train_image.shape)

(1732, 2)


In [18]:
val_image = pd.read_csv(os.path.join(dataset_path, 'val_frames.csv'))
val_image.head()

Unnamed: 0,image,action
0,H:/THESIS/dataset\val_frames\kick_01_frame0_ki...,kick
1,H:/THESIS/dataset\val_frames\kick_01_frame1_ki...,kick
2,H:/THESIS/dataset\val_frames\kick_01_frame2_ki...,kick
3,H:/THESIS/dataset\val_frames\kick_01_frame3_ki...,kick
4,H:/THESIS/dataset\val_frames\kick_07_frame0_ki...,kick


In [19]:
print(val_image.shape)

(819, 2)


In [20]:
action_values = list(train_image['action'].unique())
action_values

['kick', 'punch', 'slap']

In [21]:
def create_class_columns(df):
    for value in action_values:
        df[value] = np.where(df['action'].str.contains(value), 1, 0)
    df.drop('action', axis='columns', inplace=True)

In [22]:
create_class_columns(train_image)
train_image.head()

Unnamed: 0,image,kick,punch,slap
0,H:/THESIS/dataset\train_frames\kick_01_frame0_...,1,0,0
1,H:/THESIS/dataset\train_frames\kick_01_frame1_...,1,0,0
2,H:/THESIS/dataset\train_frames\kick_01_frame2_...,1,0,0
3,H:/THESIS/dataset\train_frames\kick_01_frame3_...,1,0,0
4,H:/THESIS/dataset\train_frames\kick_02_frame0_...,1,0,0


In [23]:
create_class_columns(val_image)
val_image.head()

Unnamed: 0,image,kick,punch,slap
0,H:/THESIS/dataset\val_frames\kick_01_frame0_ki...,1,0,0
1,H:/THESIS/dataset\val_frames\kick_01_frame1_ki...,1,0,0
2,H:/THESIS/dataset\val_frames\kick_01_frame2_ki...,1,0,0
3,H:/THESIS/dataset\val_frames\kick_01_frame3_ki...,1,0,0
4,H:/THESIS/dataset\val_frames\kick_07_frame0_ki...,1,0,0


In [24]:
def convert_to_array_and_split(image_data):
    image_value = []
    for i in tqdm(range(image_data.shape[0])):
        img = image.load_img(image_data['image'][i], target_size=(224,224,3))
        img = image.img_to_array(img)
        # normalizing the pixel value
        img = img / 255
        image_value.append(img)

    X = np.array(image_value)
    y = image_data
    y.drop('image', axis='columns', inplace=True)
    return X, y

In [25]:
X_train, y_train = convert_to_array_and_split(train_image)
print(X_train.shape)

100%|█████████████████████████████████████████████████████████████████████████████| 1732/1732 [00:16<00:00, 102.05it/s]


(1732, 224, 224, 3)


In [26]:
X_val, y_val = convert_to_array_and_split(val_image)
print(X_val.shape)

100%|███████████████████████████████████████████████████████████████████████████████| 819/819 [00:07<00:00, 107.09it/s]


(819, 224, 224, 3)


In [27]:
y_train.head()

Unnamed: 0,kick,punch,slap
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0


In [28]:
y_val.head()

Unnamed: 0,kick,punch,slap
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0


In [29]:
'''This model was trained on a dataset that has 1,000 classes. 
include_top = False will remove the last layer of this model so that we can tune it as per our need.
'''
base_model = VGG16(weights='imagenet', include_top=False)

In [30]:
# extracting features for training frames
X_train = base_model.predict(X_train)
X_train.shape

(1732, 7, 7, 512)

In [31]:
X_val = base_model.predict(X_val)
X_val.shape

(819, 7, 7, 512)

In [32]:
# reshaping the training as well as validation frames in single dimension
X_train = X_train.reshape(1732, 7*7*512)
X_val = X_val.reshape(819, 7*7*512)

In [33]:
# normalizing the pixel values
max_pixel = X_train.max()
X_train = X_train / max_pixel
X_val = X_val / max_pixel
print(X_train.shape)
print(X_val.shape)

(1732, 25088)
(819, 25088)


In [34]:
# The input shape will be 25,088
model = Sequential()
model.add(Dense(1024, activation='relu', input_shape=(25088,)))
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1024)              25691136  
_________________________________________________________________
dropout (Dropout)            (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 128)               3

In [35]:
# defining a function to save the weights of best model
from keras.callbacks import ModelCheckpoint
mcp_weight = ModelCheckpoint('weight.hdf5', save_best_only=True, monitor='val_loss', mode='min')

In [36]:
model.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val), callbacks=[mcp_weight], batch_size=128)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x2578a6d45b0>

In [37]:
model.load_weights("weight.hdf5")
# model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1024)              25691136  
_________________________________________________________________
dropout (Dropout)            (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 128)               3

In [None]:
from scipy import stats as s
predict = []
actual = []
if not os.path.exists(test_frames_path):
    os.makedirs(test_frames_path)

for i in tqdm(range(test.shape[0])):
    video_file = test['Video_url'][i]
    action = test['action'][i]
    video_name_list = video_file.split('/')[-1].split('.')
    video_name_list = video_name_list[:-1]
    video_name = ""
    for n in video_name_list:
        video_name += n
    # capturing the video from the given path
    capture = cv2.VideoCapture(video_file) 
    #frame rate
    frame_rate = capture.get(5)
    count = 0
    files = glob(test_frames_path + '/*')
    #removing all files from folder
    for f in files:
        os.remove(f)
    while(capture.isOpened()):
        #current frame number
        frame_id = capture.get(1) 
        read_correctly, frame = capture.read()
        if not read_correctly:
            break
        if (frame_id % math.floor(frame_rate) == 0):
            # storing the frames in a new folder named train_1
            filename = test_frames_path + "/" + video_name + "_frame{}_".format(count) + action +".jpg"
            count += 1
            cv2.imwrite(filename, frame)
    capture.release()
    
    # reading all the frames from temp folder
    images = glob(test_frames_path + '/*.jpg')
    prediction_images = []
    for i in range(len(images)):
        img = image.load_img(images[i], target_size=(224,224,3))
        img = image.img_to_array(img)
        img = img / 255
        prediction_images.append(img)
        
    # converting all the frames for a test video into numpy array
    prediction_images = np.array(prediction_images)
    # extracting features using pre-trained model
    prediction_images = base_model.predict(prediction_images)
    # converting features in one dimensional array
    prediction_images = prediction_images.reshape(prediction_images.shape[0], 7*7*512)
    # predicting tags for each array
    prediction = np.argmax(model.predict(prediction_images), axis=-1)
    # appending the mode of predictions in predict list to assign the tag to the video
    predict.append(y_train.columns.values[s.mode(prediction)[0][0]])
    # appending the actual tag of the video
    actual.append(action)

 98%|██████████████████████████████████████████████████████████████████████████████▌ | 163/166 [03:33<00:03,  1.17s/it]

In [None]:
y_train.columns.values[0]

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(predict, actual)*100

In [63]:
# for i in range(0, len(predict)):
#     print(predict[i] + " " + actual[i])

In [None]:
#CNN Model