In [2]:
import os
import glob
import random
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
gpus = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(gpus[0], 'GPU')
# tf.config.LogicalDeviceConfiguration(memory_limit=1024)

In [5]:
if 1:
    !pip install scikit-video==1.1.11
import skvideo.io

Collecting scikit-video==1.1.11
  Downloading scikit_video-1.1.11-py2.py3-none-any.whl (2.3 MB)
[K     |████████████████████████████████| 2.3 MB 5.3 MB/s 
Installing collected packages: scikit-video
Successfully installed scikit-video-1.1.11


In [13]:
classes = [
    'walking',
    'jogging',
    'running',
    'boxing',
    'handwaving',
    'handclapping',
]

dataset = []
data_root = '/content/drive/MyDrive/Colab_Notebooks/CV/L7/'
for cls in classes:
    print('Processing class: {}'.format(cls))
    for fpath in glob.glob(os.path.join(data_root, cls, '*.avi')):
        cls_idx = classes.index(cls)
        dataset.append((fpath, cls_idx))

Processing class: walking
Processing class: jogging
Processing class: running
Processing class: boxing
Processing class: handwaving
Processing class: handclapping


In [14]:
SUBSET_LEN = 200
random.shuffle(dataset)
dataset_train = dataset[:SUBSET_LEN]
dataset_test=dataset[SUBSET_LEN:int(SUBSET_LEN*1.1)]
print('Dataset samples (subset):', len(dataset_train))

Dataset samples (subset): 200


In [27]:
weight_decay=0.001
wd=tf.keras.regularizers.L2(weight_decay)

In [31]:
model = tf.keras.Sequential([
    tf.keras.layers.ConvLSTM2D(64, (3, 3),strides=3,padding='same',kernel_regularizer=wd,  activation='relu'),
    tf.keras.layers.LayerNormalization(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(64, activation='relu',kernel_regularizer=wd),
     tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(6, activation=None),
])

In [32]:
NUM_EPOCHS = 3
LEARNING_RATE = 0.00001

model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
    optimizer=tf.keras.optimizers.Adam(LEARNING_RATE))


In [33]:
global_step = 0
for ep in range(NUM_EPOCHS):
  res=[]
  for iter, (fpath, label) in enumerate(dataset_train):
      videodata = skvideo.io.vread(fpath)
      videodata = videodata.astype(np.float32) / 255.
      videodata=tf.image.resize(videodata, (60,80))
      videodata= np.mean(videodata, axis=3, keepdims=True)
      x = videodata[None, ...]
    
      y = np.array(label)[None, ...]

      loss_value = model.train_on_batch(x, y)
      res.append(loss_value)


      if iter % 40 == 0:
        res_test=[]

        for (fpath_test, label_test) in dataset_test:

          videodata_test = skvideo.io.vread(fpath_test)
          videodata_test = videodata_test.astype(np.float32) / 255.
          videodata_test=tf.image.resize(videodata_test, (60,80))
          videodata_test= np.mean(videodata_test, axis=3, keepdims=True)
          x_test = videodata_test[None, ...]    
          y_test = np.array(label_test)[None, ...]    
          loss_value_test=model.test_on_batch(x_test,y_test)
          res_test.append(loss_value_test)

        print(f'[{ep}/{NUM_EPOCHS}][{iter}/{len(dataset)}] Loss = {round(loss_value,4)} Val_Loss= {round(np.mean(res_test),4)}')
  print(f'{ep} epoch_loss_mean: ', np.mean(res))

[0/3][0/599] Loss = 1.6768 Val_Loss= 1.9116
[0/3][40/599] Loss = 2.006 Val_Loss= 1.9105
[0/3][80/599] Loss = 2.614 Val_Loss= 1.9103
[0/3][120/599] Loss = 1.9727 Val_Loss= 1.8986
[0/3][160/599] Loss = 1.8228 Val_Loss= 1.8921
0 epoch_loss_mean:  1.907724906206131
[1/3][0/599] Loss = 1.8194 Val_Loss= 1.8808
[1/3][40/599] Loss = 1.9764 Val_Loss= 1.8785
[1/3][80/599] Loss = 2.4484 Val_Loss= 1.8795
[1/3][120/599] Loss = 1.5212 Val_Loss= 1.8709
[1/3][160/599] Loss = 1.6603 Val_Loss= 1.8738
1 epoch_loss_mean:  1.8865015083551406
[2/3][0/599] Loss = 1.9076 Val_Loss= 1.8681
[2/3][40/599] Loss = 1.7923 Val_Loss= 1.8675
[2/3][80/599] Loss = 1.7948 Val_Loss= 1.87
[2/3][120/599] Loss = 2.1192 Val_Loss= 1.8644
[2/3][160/599] Loss = 1.7684 Val_Loss= 1.866
2 epoch_loss_mean:  1.8690517675876617


In [None]:
fpath_test=dataset_test[0][0]
label_test=dataset_test[0][1]
video_test = skvideo.io.vread(fpath_test)
video_test = video_test.astype(np.float32) / 255.
video_test=tf.image.resize(video_test, (60,80))
video_test= np.mean(video_test, axis=3, keepdims=True)
x_test = video_test[None, ...]    

In [36]:
out = model(x_test)[0]
cls_pred = np.argmax(out.numpy())

In [39]:
cls_pred, label_test

(3, 3)