In [5]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing import image
import numpy as np
import pandas as pd
from tqdm import tqdm
from keras.applications.resnet_v2 import ResNet101V2
import cv2
import math
import os
from glob import glob
from scipy import stats as s
from sklearn.externals import joblib 

In [6]:
base_model_RGB = ResNet101V2(weights='imagenet', include_top=False)

In [7]:
#defining the model architecture
model_RGB = Sequential()
model_RGB.add(Dense(1024, activation='relu', input_shape=(100352,)))
model_RGB.add(Dropout(0.5))
model_RGB.add(Dense(512, activation='relu'))
model_RGB.add(Dropout(0.5))
model_RGB.add(Dense(256, activation='relu'))
model_RGB.add(Dropout(0.5))
model_RGB.add(Dense(128, activation='relu'))
model_RGB.add(Dropout(0.5))
model_RGB.add(Dense(51, activation='softmax'))

In [8]:
# loading the trained weights
model_RGB.load_weights("../Models/weightResNet101V2.hdf5")

In [9]:
# compiling the model
model_RGB.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])

In [10]:
base_model_OF = ResNet101V2(weights='imagenet', include_top=False)

In [11]:
#defining the model architecture
model_OF = Sequential()
model_OF.add(Dense(1024, activation='relu', input_shape=(100352,)))
model_OF.add(Dropout(0.5))
model_OF.add(Dense(512, activation='relu'))
model_OF.add(Dropout(0.5))
model_OF.add(Dense(256, activation='relu'))
model_OF.add(Dropout(0.5))
model_OF.add(Dense(128, activation='relu'))
model_OF.add(Dropout(0.5))
model_OF.add(Dense(51, activation='softmax'))

In [12]:
# loading the trained weights
model_OF.load_weights("../Models/weightResNet101V2_OF.hdf5")

In [13]:
# compiling the model
model_OF.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])

In [14]:
location='../data/test'
classes=[]
videos=[]
test=pd.DataFrame()
for foldername in sorted(os.listdir(location)):
    classes.append(foldername)
    for files in sorted(os.listdir(location+'/'+foldername)):
        videos.append(foldername+'/'+files)
test['video_name']=videos
print(classes)
test_videos=test['video_name']
test.head()

['brush_hair', 'cartwheel', 'catch', 'chew', 'clap', 'climb', 'climb_stairs', 'dive', 'draw_sword', 'dribble', 'drink', 'eat', 'fall_floor', 'fencing', 'flic_flac', 'golf', 'handstand', 'hit', 'hug', 'jump', 'kick', 'kick_ball', 'kiss', 'laugh', 'pick', 'pour', 'pullup', 'punch', 'push', 'pushup', 'ride_bike', 'ride_horse', 'run', 'shake_hands', 'shoot_ball', 'shoot_bow', 'shoot_gun', 'sit', 'situp', 'smile', 'smoke', 'somersault', 'stand', 'swing_baseball', 'sword', 'sword_exercise', 'talk', 'throw', 'turn', 'walk', 'wave']


Unnamed: 0,video_name
0,brush_hair/April_09_brush_hair_u_nm_np1_ba_goo...
1,brush_hair/April_09_brush_hair_u_nm_np1_ba_goo...
2,brush_hair/April_09_brush_hair_u_nm_np1_ba_goo...
3,brush_hair/Blonde_being_brushed_brush_hair_f_n...
4,brush_hair/Blonde_being_brushed_brush_hair_u_c...


In [15]:
# creating the tags
train = pd.read_csv('../data/train.csv')
y = train['class']
y = pd.get_dummies(y)

In [16]:
# creating two lists to store predicted and actual tags
predict = []
actual = []

# for loop to extract frames from each test video
for i in tqdm(range(test_videos.shape[0])):
    count = 0
    videoFile = test_videos[i]
    # print('../data/test/'+videoFile)
    cap = cv2.VideoCapture('../data/test/'+videoFile.split(' ')[0].split('/')[0]+'/'+videoFile.split(' ')[0].split('/')[1])   # capturing the video from the given path
    
    frameRate = cap.get(5) #frame rate
    x=1
    # removing all other files from the temp folder
    files = glob('../data/temp/*')
    for f in files:
        os.remove(f)
    while(cap.isOpened()):
        frameId = cap.get(1) #current frame number
        ret, frame = cap.read()
        if (ret != True):
            break
        if (frameId % math.floor(frameRate) == 0):
            # storing the frames of this particular video in temp folder
#            print('../data/temp/' + "_frame%d.jpg" % count)
            filename ='../data/temp/' + "_frame%d.jpg" % count;count+=1
            cv2.imwrite(filename, frame)
    cap.release()
    
    # reading all the frames from temp folder
    images = glob("../data/temp/*.jpg")
    
    rgb_images = []
    for i in range(len(images)):
        img = image.load_img(images[i], target_size=(224,224,3))
        img = image.img_to_array(img)
        img = img/255
        rgb_images.append(img)
        
    # print(test_images.shape)
    prediction=[]
    # converting all the frames for a test video into numpy array
    rgb_images = np.array(rgb_images)     
    # extracting features using pre-trained model
    prediction_images = base_model_RGB.predict(rgb_images)
    # converting features in one dimensional array
    prediction_images = prediction_images.reshape(prediction_images.shape[0], 7*7*2048)
    # predicting tags for each array
    prediction_RGB = model_RGB.predict_classes(prediction_images)
    
    cap = cv2.VideoCapture('../data/test/'+videoFile.split(' ')[0].split('/')[0]+'/'+videoFile.split(' ')[0].split('/')[1])   # capturing the video from the given path
    frameRate = cap.get(5) #frame rate
#    print(frameRate)
    frameRate=frameRate/2
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
#    print(length)
    
    ret, frame1 = cap.read()  
    prvs = cv2.cvtColor(frame1,cv2.COLOR_BGR2GRAY)
    hsv = np.zeros_like(frame1)
    hsv[...,1] = 255
    x=1
    
    # removing all other files from the temp folder
    files = glob('../data/temp/*')
    for f in files:
        os.remove(f)
    
    #Iterate through every frame
    while(x<length):        
        count = count + 1        
        frameId = cap.get(1) #current frame number
        ret, frame2 = cap.read()
        x+=1
        
        if (ret != True):
            break
            
        #Select frames according to frame rate
        if (frameId % math.floor(frameRate) == 0):   
            # storing the frames in a new folder named train_1
            next = cv2.cvtColor(frame2,cv2.COLOR_BGR2GRAY)
            flow = cv2.calcOpticalFlowFarneback(prvs,next, None, 0.5, 3, 15, 3, 5, 1.2, 0)
            mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
            hsv[...,0] = ang*180/np.pi/2
            hsv[...,2] = cv2.normalize(mag,None,0,255,cv2.NORM_MINMAX)
            rgb = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
            
            filename ='../data/temp/' + "_flow%d.jpg" % count;
            cv2.imwrite(filename, rgb)
    cap.release()
    
    # reading all the frames from temp folder
    images = glob("../data/temp/*.jpg")
    
    of_images = []
    for i in range(len(images)):
        img = image.load_img(images[i], target_size=(224,224,3))
        img = image.img_to_array(img)
        img=img/255
        of_images.append(img)

    # converting all the frames for a test video into numpy array
    of_images = np.array(of_images)
    # print(prediction_images.shape)
    # extracting features using pre-trained model
    prediction_images = base_model_OF.predict(of_images)
    # converting features in one dimensional array
    prediction_images = prediction_images.reshape(prediction_images.shape[0], 7*7*2048)
    # predicting tags for each array
    prediction_OF = model_OF.predict_classes(prediction_images)
    # appending the mode of predictions in predict list to assign the tag to the video
    prediction=np.concatenate((prediction_RGB,prediction_OF))
    
    predict.append(y.columns.values[s.mode(prediction)[0][0]])
    # print(predict)
    # appending the actual tag of the video
    actual.append(videoFile.split('/')[0])

100%|██████████| 1530/1530 [3:10:56<00:00,  5.36s/it]  


In [17]:
# checking the accuracy of the predicted tags
from sklearn.metrics import accuracy_score
accuracy_score(predict, actual)*100

28.169934640522875

In [18]:
from sklearn import metrics
print(metrics.classification_report(actual,predict))

  'precision', 'predicted', average, warn_for)


                precision    recall  f1-score   support

    brush_hair       0.20      0.43      0.27        30
     cartwheel       0.23      0.10      0.14        30
         catch       0.64      0.47      0.54        30
          chew       0.30      0.60      0.40        30
          clap       0.00      0.00      0.00        30
         climb       0.21      0.63      0.31        30
  climb_stairs       0.52      0.40      0.45        30
          dive       0.35      0.20      0.26        30
    draw_sword       0.34      0.47      0.39        30
       dribble       0.71      0.80      0.75        30
         drink       0.05      0.03      0.04        30
           eat       0.18      0.57      0.28        30
    fall_floor       0.23      0.27      0.25        30
       fencing       0.53      0.30      0.38        30
     flic_flac       0.25      0.07      0.11        30
          golf       0.36      0.83      0.50        30
     handstand       0.15      0.20      0.17  

In [19]:
cm=metrics.confusion_matrix(actual,predict)
print(cm)

[[13  0  0 ...  0  0  0]
 [ 1  3  0 ...  0  0  0]
 [ 1  0 14 ...  0  0  0]
 ...
 [ 3  0  0 ...  0  3  0]
 [ 1  0  0 ...  0  7  0]
 [ 1  1  0 ...  0  0  0]]


In [20]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(9,9))
sns.heatmap(cm, annot=True, fmt=".3f", linewidths=.5, square=True, cmap="Blues_r");
plt.ylabel("Actual label");
plt.xlabel("Predicted label")
all_sample_title="Accuracy score: {0}".format(metrics.accuracy_score(predict, actual)*100)
plt.title(all_sample_title,size=15)

Text(0.5, 1, 'Accuracy score: 28.169934640522875')