In [1]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing import image
import numpy as np
import pandas as pd
from tqdm import tqdm
from keras.applications.resnet_v2 import ResNet101V2
from keras.applications.inception_v3 import InceptionV3
import cv2
import math
import os
from glob import glob
from scipy import stats as s
from sklearn.externals import joblib 

Using TensorFlow backend.


In [2]:
base_model_ResNet101V2 = ResNet101V2(weights='imagenet', include_top=False)

In [3]:
#defining the model architecture
model_ResNet101V2 = Sequential()
model_ResNet101V2.add(Dense(1024, activation='relu', input_shape=(100352,)))
model_ResNet101V2.add(Dropout(0.5))
model_ResNet101V2.add(Dense(512, activation='relu'))
model_ResNet101V2.add(Dropout(0.5))
model_ResNet101V2.add(Dense(256, activation='relu'))
model_ResNet101V2.add(Dropout(0.5))
model_ResNet101V2.add(Dense(128, activation='relu'))
model_ResNet101V2.add(Dropout(0.5))
model_ResNet101V2.add(Dense(51, activation='softmax'))

In [4]:
# loading the trained weights
model_ResNet101V2.load_weights("../Models/weightResNet101V2.hdf5")

In [5]:
# compiling the model
model_ResNet101V2.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])

In [6]:
base_model_InceptionV3 = InceptionV3(include_top=False, weights='imagenet')

In [7]:
#defining the model architecture
model_InceptionV3 = Sequential()
model_InceptionV3.add(Dense(1024, activation='relu', input_shape=(51200,)))
model_InceptionV3.add(Dropout(0.5))
model_InceptionV3.add(Dense(512, activation='relu'))
model_InceptionV3.add(Dropout(0.5))
model_InceptionV3.add(Dense(256, activation='relu'))
model_InceptionV3.add(Dropout(0.5))
model_InceptionV3.add(Dense(128, activation='relu'))
model_InceptionV3.add(Dropout(0.5))
model_InceptionV3.add(Dense(51, activation='softmax'))

In [8]:
# loading the trained weights
model_InceptionV3.load_weights("../Models/weightInceptionV3.hdf5")

In [9]:
# compiling the model
model_InceptionV3.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])

In [10]:
location='../data/test'
classes=[]
videos=[]
test=pd.DataFrame()
for foldername in sorted(os.listdir(location)):
    classes.append(foldername)
    for files in sorted(os.listdir(location+'/'+foldername)):
        videos.append(foldername+'/'+files)
test['video_name']=videos
print(classes)
test_videos=test['video_name']
test.head()

['brush_hair', 'cartwheel', 'catch', 'chew', 'clap', 'climb', 'climb_stairs', 'dive', 'draw_sword', 'dribble', 'drink', 'eat', 'fall_floor', 'fencing', 'flic_flac', 'golf', 'handstand', 'hit', 'hug', 'jump', 'kick', 'kick_ball', 'kiss', 'laugh', 'pick', 'pour', 'pullup', 'punch', 'push', 'pushup', 'ride_bike', 'ride_horse', 'run', 'shake_hands', 'shoot_ball', 'shoot_bow', 'shoot_gun', 'sit', 'situp', 'smile', 'smoke', 'somersault', 'stand', 'swing_baseball', 'sword', 'sword_exercise', 'talk', 'throw', 'turn', 'walk', 'wave']


Unnamed: 0,video_name
0,brush_hair/April_09_brush_hair_u_nm_np1_ba_goo...
1,brush_hair/April_09_brush_hair_u_nm_np1_ba_goo...
2,brush_hair/April_09_brush_hair_u_nm_np1_ba_goo...
3,brush_hair/Blonde_being_brushed_brush_hair_f_n...
4,brush_hair/Blonde_being_brushed_brush_hair_u_c...


In [11]:
# creating the tags
train = pd.read_csv('../data/train.csv')
y = train['class']
y = pd.get_dummies(y)

In [12]:
# creating two lists to store predicted and actual tags
predict = []
actual = []

# for loop to extract frames from each test video
for i in tqdm(range(test_videos.shape[0])):
    count = 0
    videoFile = test_videos[i]
    # print('../data/test/'+videoFile)
    cap = cv2.VideoCapture('../data/test/'+videoFile.split(' ')[0].split('/')[0]+'/'+videoFile.split(' ')[0].split('/')[1])   # capturing the video from the given path
    frameRate = cap.get(5) #frame rate
    x=1
    # removing all other files from the temp folder
    files = glob('../data/temp/*')
    for f in files:
        os.remove(f)
    while(cap.isOpened()):
        frameId = cap.get(1) #current frame number
        ret, frame = cap.read()
        if (ret != True):
            break
        if (frameId % math.floor(frameRate) == 0):
            # storing the frames of this particular video in temp folder
#            print('../data/temp/' + "_frame%d.jpg" % count)
            filename ='../data/temp/' + "_frame%d.jpg" % count;count+=1
            cv2.imwrite(filename, frame)
    cap.release()
    
    # reading all the frames from temp folder
    images = glob("../data/temp/*.jpg")
    
    test_images = []
    for i in range(len(images)):
        img = image.load_img(images[i], target_size=(224,224,3))
        img = image.img_to_array(img)
        img = img/255
        test_images.append(img)
        
    # converting all the frames for a test video into numpy array
    test_images = np.array(test_images)
    # print(test_images.shape)
    prediction=[]
     
    # extracting features using pre-trained model
    prediction_images = base_model_ResNet101V2.predict(test_images)
    # converting features in one dimensional array
    prediction_images = prediction_images.reshape(prediction_images.shape[0], 7*7*2048)
    # predicting tags for each array
    prediction_ResNet101V2 = model_ResNet101V2.predict_classes(prediction_images)
    # appending the mode of predictions in predict list to assign the tag to the video
    # print('prediction')
    # print(prediction_VGG16)
        
    # extracting features using pre-trained model
    prediction_images = base_model_InceptionV3.predict(test_images)
    # converting features in one dimensional array
    prediction_images = prediction_images.reshape(prediction_images.shape[0], 5*5*2048)
    # predicting tags for each array
    prediction_InceptionV3 = model_InceptionV3.predict_classes(prediction_images)
    # print('prediction')
    # print(prediction_InceptionV3)
    # appending the mode of predictions in predict list to assign the tag to the video
    prediction=np.concatenate((prediction_ResNet101V2,prediction_InceptionV3))
    
    predict.append(y.columns.values[s.mode(prediction)[0][0]])
    # print(predict)
    # appending the actual tag of the video
    # print(videoFile.split('/')[0])
    actual.append(videoFile.split('/')[0])

100%|██████████| 1530/1530 [44:17<00:00,  1.32s/it] 


In [13]:
# checking the accuracy of the predicted tags
from sklearn.metrics import accuracy_score
accuracy_score(predict, actual)*100

37.84313725490196

In [14]:
from sklearn import metrics
print(metrics.classification_report(actual,predict))

  'precision', 'predicted', average, warn_for)


                precision    recall  f1-score   support

    brush_hair       0.28      0.50      0.36        30
     cartwheel       0.00      0.00      0.00        30
         catch       0.44      0.47      0.45        30
          chew       0.22      0.60      0.32        30
          clap       0.12      0.10      0.11        30
         climb       0.37      0.77      0.49        30
  climb_stairs       0.48      0.70      0.57        30
          dive       0.29      0.33      0.31        30
    draw_sword       0.43      0.67      0.52        30
       dribble       0.56      0.93      0.70        30
         drink       0.08      0.30      0.13        30
           eat       0.24      0.50      0.32        30
    fall_floor       0.00      0.00      0.00        30
       fencing       0.50      0.27      0.35        30
     flic_flac       0.44      0.13      0.21        30
          golf       0.33      0.90      0.48        30
     handstand       0.23      0.43      0.30  

In [15]:
cm=metrics.confusion_matrix(actual,predict)
print(cm)

[[15  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]
 [ 0  0 14 ...  0  0  0]
 ...
 [ 3  0  0 ...  0  3  2]
 [ 2  0  0 ...  0  8  1]
 [ 1  0  0 ...  0  0  1]]


In [16]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(9,9))
sns.heatmap(cm, annot=True, fmt=".3f", linewidths=.5, square=True, cmap="Blues_r");
plt.ylabel("Actual label");
plt.xlabel("Predicted label")
all_sample_title="Accuracy score: {0}".format(metrics.accuracy_score(predict, actual)*100)
plt.title(all_sample_title,size=15)

Text(0.5, 1, 'Accuracy score: 37.84313725490196')