In [1]:
# TensorFlow and TF-Hub modules.
from absl import logging

import tensorflow as tf
import tensorflow_hub as hub
# from tensorflow_docs.vis import embed

logging.set_verbosity(logging.ERROR)

# Some modules to help with reading the UCF101 dataset.
import random
import re
import os
import tempfile
import ssl
import cv2
import numpy as np

# Some modules to display an animation using imageio.
import imageio
from IPython import display

from urllib import request  # requires python3

import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

import keras
from keras.src.applications.xception import Xception
from keras.applications.xception import preprocess_input

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler

In [2]:
# Utilities to fetch videos from UCF101 dataset
UCF_ROOT = "https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/"
_VIDEO_LIST = None
_CACHE_DIR = tempfile.mkdtemp()
# As of July 2020, crcv.ucf.edu doesn't use a certificate accepted by the
# default Colab environment anymore.
unverified_context = ssl._create_unverified_context()


def list_ucf_videos():
  """Lists videos available in UCF101 dataset."""
  global _VIDEO_LIST
  if not _VIDEO_LIST:
    index = request.urlopen(UCF_ROOT, context=unverified_context).read().decode("utf-8")
    videos = re.findall("(v_[\w_]+\.avi)", index)
    _VIDEO_LIST = sorted(set(videos))
  return list(_VIDEO_LIST)


def fetch_ucf_video(video):
  """Fetches a video and cache into local filesystem."""
  cache_path = os.path.join(_CACHE_DIR, video)
  if not os.path.exists(cache_path):
    urlpath = request.urljoin(UCF_ROOT, video)
    print("Fetching %s => %s" % (urlpath, cache_path))
    data = request.urlopen(urlpath, context=unverified_context).read()
    open(cache_path, "wb").write(data)
  return cache_path


# Utilities to open video files using CV2
def crop_center_square(frame):
  y, x = frame.shape[0:2]
  min_dim = min(y, x)
  start_x = (x // 2) - (min_dim // 2)
  start_y = (y // 2) - (min_dim // 2)
  return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]


def load_video(path, max_frames=20, resize=(224, 224)):
  cap = cv2.VideoCapture(path)
  frames = []
  try:
    while cap.isOpened():
      ret, frame = cap.read()
      if not ret:
        break
      
      frame = crop_center_square(frame)
      frame = cv2.resize(frame, resize)
      frame = frame[:, :, [2, 1, 0]]

      frame = datagen.random_transform(frame)  # Augment each frame

      frames.append(frame)

      if len(frames) == max_frames:
        break

    cap.release()

    if len(frames) < max_frames:
        frames += [np.zeros_like(frames[0])] * (max_frames - len(frames))
    
  except Exception as e:
     print(e)
      
  return np.array(frames) / 255.0


# def video_to_frames(video_path, img_size, sequence_length):
#     cap = cv2.VideoCapture(video_path)
#     frames = []
#     while cap.isOpened():
#         ret, frame = cap.read()
#         if not ret:
#             break
#         frame = cv2.resize(frame, img_size)
#         frames.append(frame)
#         if len(frames) == sequence_length:
#             break
#     cap.release()

#     if len(frames) < sequence_length:
#         return None  # Ignore short videos

#     return np.array(frames)


def to_gif(images):
  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
  gif = imageio.mimsave('./animation.gif', converted_images, duration=40)
  return gif


# List files and ignore .DS_Store if on a Mac
def list_files(directory):
    visible_files = []
    for file in os.listdir(directory):
        if not file.startswith('.'):
            visible_files.append(file)

    return visible_files


def video_to_frames(video_path, img_size=(64, 64), sequence_length=30):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # frame = cv2.resize(frame, img_size)
        img = keras.utils.load_img(frame, target_size=(224, 224))
        x = keras.utils.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        frames.append(x)
        if len(frames) == sequence_length:
            break
    cap.release()

    if len(frames) < sequence_length:
        return None  # Ignore short videos

    return np.array(frames)


# Get paths and labels
def load_dataset(folder_path, print_path=False):
    # classes = os.listdir(folder_path)
    classes = list_files(folder_path)
    paths, true_labels, activities = [], [], []

    for label, activity in enumerate(classes):
        activity_folder = os.path.join(folder_path, activity)
        files = list_files(activity_folder)
        num_files = len(files)
        current = 1
        for video_file in files:
            video_path = os.path.join(activity_folder, video_file)
            paths.append(video_path)
            true_labels.append(label)
            activities.append(activity)

            if print_path:
                print(f'{current}/{num_files}\t{video_path}')

            current += 1

    return paths, true_labels, activities


Build model

In [14]:
num_classes = 12

# Augmentations
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

In [22]:
def built_model(print_details=True):
    # Load i3d-kinetics-400 model
    i3d = hub.load("https://tfhub.dev/deepmind/i3d-kinetics-400/1").signatures['default']

    class I3DModelLayer(tf.keras.layers.Layer):
        def __init__(self, i3d_model):
            super(I3DModelLayer, self).__init__()
            self.i3d_model = i3d_model

        def call(self, inputs):
            output = self.i3d_model(rgb_input=inputs)['default']
            return output
        
    # Freeze layers
    i3d.trainable = False

    inputs = tf.keras.Input(shape=(20, 224, 224, 3))
    i3d_layer = I3DModelLayer(i3d)
    x = i3d_layer(inputs) 
    # print(f"Shape after I3DLayer: {x.shape}")
    x = tf.keras.layers.Dropout(0.2)(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs, outputs)

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                loss='categorical_crossentropy',
                metrics=['accuracy'])

    if print_details:
        model.summary()

    return model


Load dataset

In [23]:
# Get paths and labels
def load_dataset(folder_path, print_path=False):
    # classes = os.listdir(folder_path)
    classes = list_files(folder_path)
    paths, true_labels, activities = [], [], []

    for label, activity in enumerate(classes):
        activity_folder = os.path.join(folder_path, activity)
        files = list_files(activity_folder)
        num_files = len(files)
        current = 1
        for video_file in files:
            video_path = os.path.join(activity_folder, video_file)
            paths.append(video_path)
            true_labels.append(label)
            activities.append(activity)

            if print_path:
                print(f'{current}/{num_files}\t{video_path}')

            current += 1

    return paths, true_labels, activities


def make_dataset(paths, labels):
    all_data = []
    all_labels = []

    for p in enumerate(paths): 
        print(f'{p[0] + 1}/{len(paths)}\t{p[1]}')
        frames = load_video(p[1])
        all_data.append(frames)
        lab = labels[p[0]]
        all_labels.append(lab)

    return all_data, all_labels


In [33]:
train_dir = f"../../downloads/fr_10s/train_fr_10s"
train_paths, train_labels, train_activities = load_dataset(train_dir)

test_dir = f"../../downloads/fr_10s/test_fr_10s"
test_paths, test_labels, test_activities = load_dataset(test_dir)

In [None]:
X_train, y_train = make_dataset(train_paths, train_labels)
X_test, y_test = make_dataset(test_paths, test_labels)


In [20]:
X_train = np.array(X_train)
y_train = tf.keras.utils.to_categorical(y_train, num_classes)

Train model

In [31]:
# Callbacks
lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: 0.0001 * 10 ** (epoch / 10)
)

early_stopping = EarlyStopping(
    monitor='val_accuracy', 
    patience=3,
    restore_best_weights=True
)

# Build model
model = built_model(print_details=False)

In [32]:
history = model.fit(X_train,
                    y_train,
                    epochs=10,
                    validation_split=0.2, 
                    callbacks=[early_stopping, lr_schedule])

Epoch 1/10
Epoch 2/10

KeyboardInterrupt: 