In [None]:
# Mount the Drive.

from google.colab import drive
drive.mount('/content/drive')

# 2.3.1 keras  ,  2.2.0rc3 tf
!pip install keras==2.2.4
!pip install tensorflow==1.13.1

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Base Model Preparation [VGG-16].

from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing.image import img_to_array
from keras.optimizers import SGD
from keras import backend as K
import numpy as np
import cv2

K.set_image_dim_ordering('tf')

Base_model = VGG16(include_top=False, weights='imagenet')
Base_model.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=['accuracy'])

def extract_vgg16_features(video_path):
  count = 0
  vidcap = cv2.VideoCapture(video_path)
  success, image = vidcap.read()
  features = []
  success = True
  prev = None
  while success:
    vidcap.set(cv2.CAP_PROP_POS_MSEC, (count * 1000))
    success, image = vidcap.read()
    if success:
      img = cv2.resize(image, (224, 224), interpolation=cv2.INTER_AREA)
      if count != 0:
        err = np.sum((img.astype("float") - prev.astype("float")) ** 2)
        err /= float(img.shape[0] * prev.shape[1])
        if err == 0:
          break
      input = img_to_array(img)
      input = np.expand_dims(input, axis=0)
      input = preprocess_input(input)
      feature = Base_model.predict(input).ravel()
      features.append(feature)
      prev = img
      count = count + 1
  unscaled_features = np.array(features)
  return unscaled_features

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Instructions for updating:
Colocations handled automatically by placer.


In [None]:
# Extract the Features from all Given Data using pre-trained VGG-166 Model.

from tqdm import tqdm
import os

def get_label(cls_name):
  ret = None
  if "Diving" in cls_name:
    ret = 0
  elif "Jumping" in cls_name:
    ret = 1
  elif "Basketball" in cls_name:
    ret = 2
  elif "Tennis" in cls_name:
    ret = 3
  else:
    ret = 4
  return ret

TRAIN_DIR = "/content/drive/My Drive/Video Action Recognition/Training_set/Training"
TEST_DIR = "/content/drive/My Drive/Video Action Recognition/Testing_set"

(X_train, y_train, X_test) = ([], [], [])

print("Starting, Extract Train Features..")
for folder in tqdm(os.listdir(TRAIN_DIR)):
  folder_path = os.path.join(TRAIN_DIR, folder)
  for vid in tqdm(os.listdir(folder_path)):
    vid_path = os.path.join(folder_path, vid)
    feats = extract_vgg16_features(vid_path)
    label = get_label(folder)
    X_train.append(feats)
    y_train.append(label)   
print("Ending, Train Features are Extracted..")

print("Starting, Extract Test Features..")
for vid in tqdm(os.listdir(TEST_DIR)):
  vid_path = os.path.join(TEST_DIR, vid)
  feats = extract_vgg16_features(vid_path)
  X_test.append((feats, vid))
print("Ending, Test Features are Extracted..")

X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
np.save('/content/drive/My Drive/Video Action Recognition/Xtrain.npy', X_train)
np.save('/content/drive/My Drive/Video Action Recognition/Ytrain.npy', y_train)
np.save('/content/drive/My Drive/Video Action Recognition/Xtest.npy', X_test)

print("Done..")

  0%|          | 0/5 [00:00<?, ?it/s]
  0%|          | 0/154 [00:00<?, ?it/s][A

Starting, Extract Train Features..



  1%|          | 1/154 [00:02<05:58,  2.35s/it][A
  1%|▏         | 2/154 [00:06<07:16,  2.87s/it][A
  2%|▏         | 3/154 [00:08<06:47,  2.70s/it][A
  3%|▎         | 4/154 [00:12<07:47,  3.12s/it][A
  3%|▎         | 5/154 [00:15<07:46,  3.13s/it][A
  4%|▍         | 6/154 [00:19<07:44,  3.14s/it][A
  5%|▍         | 7/154 [00:22<07:41,  3.14s/it][A
  5%|▌         | 8/154 [00:25<07:39,  3.15s/it][A
  6%|▌         | 9/154 [00:29<08:15,  3.42s/it][A
  6%|▋         | 10/154 [00:33<08:21,  3.49s/it][A
  7%|▋         | 11/154 [00:36<08:05,  3.39s/it][A
  8%|▊         | 12/154 [00:40<08:31,  3.60s/it][A
  8%|▊         | 13/154 [00:44<08:47,  3.74s/it][A
  9%|▉         | 14/154 [00:48<08:58,  3.85s/it][A
 10%|▉         | 15/154 [00:52<09:03,  3.91s/it][A
 10%|█         | 16/154 [00:56<09:05,  3.95s/it][A
 11%|█         | 17/154 [00:59<08:10,  3.58s/it][A
 12%|█▏        | 18/154 [01:02<07:30,  3.31s/it][A
 12%|█▏        | 19/154 [01:04<07:03,  3.13s/it][A
 13%|█▎        | 20/

Ending, Train Features are Extracted..
Starting, Extract Test Features..


100%|██████████| 134/134 [09:44<00:00,  4.36s/it]


Ending, Test Features are Extracted..
Done..


In [None]:
# Preprocessing..
import numpy as np

# Load Features.
X_train = np.load('/content/drive/My Drive/Video Action Recognition/Xtrain.npy', allow_pickle=True)
y_train = np.load('/content/drive/My Drive/Video Action Recognition/Ytrain.npy', allow_pickle=True)
X_test = np.load('/content/drive/My Drive/Video Action Recognition/Xtest.npy', allow_pickle=True)

'''for i in range(len(X_train)):
  if X_train[i].shape[0] == 0:
    print(i)'''
X_train = np.delete(X_train, [239])
y_train = np.delete(y_train, [239])

# Equalize #Frames in all Videos.
num_input_tokens = X_train[0].shape[1]
expected_frames = 0
for x in X_train:
  frames = x.shape[0]
  expected_frames = max(expected_frames, frames)

# Train Set
for i in range(len(X_train)):
  x = X_train[i]
  frames = x.shape[0]
  if frames > expected_frames:
    x = x[0:expected_frames, :]
    X_train[i] = x
  elif frames < expected_frames:
    temp = np.zeros(shape=(expected_frames, x.shape[1]))
    temp[0:frames, :] = x
    for j in range(0, expected_frames - frames):
      temp[frames + j] = temp[j % frames]
    X_train[i] = temp

# Test Set
for i in range(len(X_test)):
  x = X_test[i][0]
  frames = x.shape[0]
  if frames > expected_frames:
    x = x[0:expected_frames, :]
    X_test[i][0] = x
  elif frames < expected_frames:
    temp = np.zeros(shape=(expected_frames, x.shape[1]))
    temp[0:frames, :] = x
    for j in range(0, expected_frames - frames):
      temp[frames + j] = temp[j % frames]
    X_test[i][0] = temp

Xtrain = []
Ytrain = []
Xtest = []
for i in X_train:
  Xtrain.append(i)
for i in y_train:
  Ytrain.append(i)
for i in X_test:
  Xtest.append(i)


In [None]:
# LSTM Classifier Creation..

from keras.layers import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from keras import backend as K
from sklearn.model_selection import train_test_split

K.set_image_dim_ordering('tf')

BATCH_SIZE = 126
NUM_EPOCHS = 31
VERBOSE = 1
HIDDEN_UNITS = 800

def generate_batch(x_samples, y_samples):
  num_batches = len(x_samples) // BATCH_SIZE
  while True:
    for batchIdx in range(0, num_batches):
      start = batchIdx * BATCH_SIZE
      end = (batchIdx + 1) * BATCH_SIZE
      yield np.array(x_samples[start:end]), y_samples[start:end]


model = Sequential()
model.add(LSTM(units=HIDDEN_UNITS, input_shape=(None, num_input_tokens), return_sequences=False, dropout=0.5))
model.add(Dense(700, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

model.summary()


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 800)               82844800  
_________________________________________________________________
dense_1 (Dense)              (None, 700)               560700    
_________________________________________________________________
dropout_1 (Dropout)          (None, 700)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 3505      
_________________________________________________________________
activation_1 (Activation)    (None, 5)                 0         
Total params: 83,409,005
Trainable params: 83,409,005
Non-trainable params: 0
_________

In [None]:
# Training..

Ytrain = np_utils.to_categorical(Ytrain, 5)

# Xtrain, Xvalid, Ytrain, Yvalid = train_test_split(Xtrain, Ytrain, test_size=0.3, random_state=42)

train_gen = generate_batch(Xtrain, Ytrain)
# test_gen = generate_batch(Xvalid, Yvalid)

train_num_batches = len(Xtrain) // BATCH_SIZE
# test_num_batches = len(Xvalid) // BATCH_SIZE

# model.load_weights(weight_file_path)
weight_file_path = '/content/drive/My Drive/Video Action Recognition/Weights/-weights1.h5'
checkpoint = ModelCheckpoint(filepath=weight_file_path, save_best_only=True)
history = model.fit_generator(generator=train_gen, steps_per_epoch=train_num_batches,
                              epochs=NUM_EPOCHS,
                              verbose=1,
                              callbacks=[checkpoint])


model.save_weights(weight_file_path)

Instructions for updating:
Use tf.cast instead.
Epoch 1/31
Epoch 2/31




Epoch 3/31
Epoch 4/31
Epoch 5/31
Epoch 6/31
Epoch 7/31
Epoch 8/31
Epoch 9/31
Epoch 10/31
Epoch 11/31
Epoch 12/31
Epoch 13/31
Epoch 14/31
Epoch 15/31
Epoch 16/31
Epoch 17/31
Epoch 18/31
Epoch 19/31
Epoch 20/31
Epoch 21/31
Epoch 22/31
Epoch 23/31
Epoch 24/31
Epoch 25/31
Epoch 26/31
Epoch 27/31
Epoch 28/31
Epoch 29/31
Epoch 30/31
Epoch 31/31


In [None]:
# Testing..

import csv

# weight_file_path = '/content/drive/My Drive/Video Action Recognition/Weights/-weights.h5'
# model.load_weights(weight_file_path)


New_Xtest = []
test_name = []
# Sort Videos From 1 to 126.
for i in range(1, 127):
  for j in range(0, len(Xtest)):
    if Xtest[j][1] == "test_image (" + str(i) + ").mpg":
      New_Xtest.append(Xtest[j][0])
      test_name.append(Xtest[j][1])

print(len(New_Xtest)) 
# Predictions...
vid_name = []
vid_pred = []
for i in range(0, len(New_Xtest)):
  predicted_class = np.argmax(model.predict(np.array([New_Xtest[i]]))[0])
  vid_pred.append(predicted_class)
  vid_name.append(test_name[i])


for i in range(0, len(vid_name)):
  with open('/content/drive/My Drive/Video Action Recognition/submit.csv', 'a', newline='') as file:
    writer = csv.writer(file)
    if i == 0:
      writer.writerow(['Video', 'Label'])
    writer.writerow([vid_name[i], vid_pred[i]])


126
