In [1]:
# TensorFlow and TF-Hub modules.
from absl import logging

import tensorflow as tf
import tensorflow_hub as hub
# from tensorflow_docs.vis import embed

logging.set_verbosity(logging.ERROR)

# Some modules to help with reading the UCF101 dataset.
import random
import re
import os
import tempfile
import ssl
import cv2
import numpy as np
import h5py

# Some modules to display an animation using imageio.
import imageio
from IPython import display

from urllib import request  # requires python3

import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

import keras
from keras.src.applications.xception import Xception
from keras.applications.xception import preprocess_input

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from tensorflow.keras.utils import to_categorical




In [2]:
num_classes = 12

# Augmentations
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

In [13]:
# Utilities to fetch videos from UCF101 dataset
UCF_ROOT = "https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/"
_VIDEO_LIST = None
_CACHE_DIR = tempfile.mkdtemp()
# As of July 2020, crcv.ucf.edu doesn't use a certificate accepted by the
# default Colab environment anymore.
unverified_context = ssl._create_unverified_context()


def list_ucf_videos():
  """Lists videos available in UCF101 dataset."""
  global _VIDEO_LIST
  if not _VIDEO_LIST:
    index = request.urlopen(UCF_ROOT, context=unverified_context).read().decode("utf-8")
    videos = re.findall("(v_[\w_]+\.avi)", index)
    _VIDEO_LIST = sorted(set(videos))
  return list(_VIDEO_LIST)


def fetch_ucf_video(video):
  """Fetches a video and cache into local filesystem."""
  cache_path = os.path.join(_CACHE_DIR, video)
  if not os.path.exists(cache_path):
    urlpath = request.urljoin(UCF_ROOT, video)
    print("Fetching %s => %s" % (urlpath, cache_path))
    data = request.urlopen(urlpath, context=unverified_context).read()
    open(cache_path, "wb").write(data)
  return cache_path


# Utilities to open video files using CV2
def crop_center_square(frame):
  y, x = frame.shape[0:2]
  min_dim = min(y, x)
  start_x = (x // 2) - (min_dim // 2)
  start_y = (y // 2) - (min_dim // 2)
  return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]


def data_generator(paths, labels, batch_size=16):
    while True:
        for i in range(0, len(paths), batch_size):
            batch_paths = paths[i:i+batch_size]
            batch_labels = labels[i:i+batch_size]
            
            # Handle if the last batch is smaller than batch_size
            if len(batch_paths) < batch_size:
                # Optionally pad the batch or discard it
                pad_size = batch_size - len(batch_paths)
                batch_paths += batch_paths[:pad_size]
                batch_labels += batch_labels[:pad_size]
            
            batch_data = []
            for path in batch_paths:
                frames = load_video(path)
                if frames is not None:
                    batch_data.append(frames)
            
            batch_data = np.array(batch_data) 
            yield batch_data, np.array(batch_labels)


def load_video(path, max_frames=10, resize=(224, 224)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]

            frame = datagen.random_transform(frame)

            frames.append(frame)

            if len(frames) == max_frames:
                break

        cap.release()

        # Ensure exactly max_frames are returned
        if len(frames) < max_frames:
            frames += [np.zeros((resize[0], resize[1], 3))] * (max_frames - len(frames))
        
    except Exception as e:
        print(e)
        frames = [np.zeros((resize[0], resize[1], 3))] * max_frames

    return np.array(frames) / 255.0



def to_gif(images):
  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
  gif = imageio.mimsave('./animation.gif', converted_images, duration=40)
  return gif


# List files and ignore .DS_Store if on a Mac
def list_files(directory):
    visible_files = []
    for file in os.listdir(directory):
        if not file.startswith('.'):
            visible_files.append(file)

    return visible_files


def video_to_frames(video_path, img_size=(64, 64), sequence_length=30):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # frame = cv2.resize(frame, img_size)
        img = keras.utils.load_img(frame, target_size=(224, 224))
        x = keras.utils.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        frames.append(x)
        if len(frames) == sequence_length:
            break
    cap.release()

    if len(frames) < sequence_length:
        return None  # Ignore short videos

    return np.array(frames)


# Get paths and labels
def load_dataset(folder_path, print_path=False):
    # classes = os.listdir(folder_path)
    classes = list_files(folder_path)
    paths, true_labels, activities = [], [], []

    for label, activity in enumerate(classes):
        activity_folder = os.path.join(folder_path, activity)
        files = list_files(activity_folder)
        num_files = len(files)
        current = 1
        for video_file in files:
            video_path = os.path.join(activity_folder, video_file)
            paths.append(video_path)
            true_labels.append(label)
            activities.append(activity)

            if print_path:
                print(f'{current}/{num_files}\t{video_path}')

            current += 1

    return paths, true_labels, activities


def built_model(print_details=True):
    # Load i3d-kinetics-400 model
    i3d = hub.load("https://tfhub.dev/deepmind/i3d-kinetics-400/1").signatures['default']

    class I3DModelLayer(tf.keras.layers.Layer):
        def __init__(self, i3d_model):
            super(I3DModelLayer, self).__init__()
            self.i3d_model = i3d_model

        def call(self, inputs):
            output = self.i3d_model(rgb_input=inputs)['default']
            return output
        
    # Freeze layers
    i3d.trainable = False

    inputs = tf.keras.Input(shape=(10, 224, 224, 3))
    i3d_layer = I3DModelLayer(i3d)
    x = i3d_layer(inputs) 
    # print(f"Shape after I3DLayer: {x.shape}")
    x = tf.keras.layers.Dropout(0.2)(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs, outputs)

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, 
                                                     clipvalue=1.0),
                 loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

    if print_details:
        model.summary()

    return model


# Get paths and labels
def load_dataset(folder_path, print_path=False):
    # classes = os.listdir(folder_path)
    classes = list_files(folder_path)
    paths, true_labels, activities = [], [], []

    for label, activity in enumerate(classes):
        activity_folder = os.path.join(folder_path, activity)
        files = list_files(activity_folder)
        num_files = len(files)
        current = 1
        for video_file in files:
            video_path = os.path.join(activity_folder, video_file)
            paths.append(video_path)
            true_labels.append(label)
            activities.append(activity)

            if print_path:
                print(f'{current}/{num_files}\t{video_path}')

            current += 1

    return paths, true_labels, activities


def make_dataset(paths, labels):
    all_data = []
    all_labels = []

    for p in enumerate(paths): 
        print(f'{p[0] + 1}/{len(paths)}\t{p[1]}')
        frames = load_video(p[1])
        all_data.append(frames)
        lab = labels[p[0]]
        all_labels.append(lab)

    return all_data, all_labels


Load dataset

In [7]:
train_dir = f"../../downloads/fr_10s/train_fr_10s"
train_paths, train_labels, train_activities = load_dataset(train_dir)

In [8]:
X_train, y_train = make_dataset(train_paths, train_labels)

1/1136	../../downloads/fr_10s/train_fr_10s\Asleep-Trying to sleep\7393437904103853201_s_1.mp4
2/1136	../../downloads/fr_10s/train_fr_10s\Asleep-Trying to sleep\7393439141054434449_s_1.mp4
3/1136	../../downloads/fr_10s/train_fr_10s\Asleep-Trying to sleep\7393443393072057489_s_1.mp4
4/1136	../../downloads/fr_10s/train_fr_10s\Asleep-Trying to sleep\7393443504741207185_s_1.mp4
5/1136	../../downloads/fr_10s/train_fr_10s\Asleep-Trying to sleep\7393443844043623569_s_1.mp4
6/1136	../../downloads/fr_10s/train_fr_10s\Asleep-Trying to sleep\7393443844043623569_s_2.mp4
7/1136	../../downloads/fr_10s/train_fr_10s\Asleep-Trying to sleep\7393444200525909137_s_1.mp4
8/1136	../../downloads/fr_10s/train_fr_10s\Asleep-Trying to sleep\7393444432454143121_s_1.mp4
9/1136	../../downloads/fr_10s/train_fr_10s\Asleep-Trying to sleep\7393444599957867665_s_1.mp4
10/1136	../../downloads/fr_10s/train_fr_10s\Asleep-Trying to sleep\7393446128966225041_s_1.mp4
11/1136	../../downloads/fr_10s/train_fr_10s\Asleep-Trying t

In [9]:
# X_train = np.array(X_train)
y_train = to_categorical(y_train, num_classes=num_classes)

# Train, test, split
train_paths, val_paths, train_labels, val_labels = train_test_split(
    train_paths, train_labels, test_size=0.2, random_state=42
)

train_gen = data_generator(train_paths, train_labels, batch_size=16)
val_gen = data_generator(val_paths, val_labels, batch_size=16)

# train_gen = data_generator(train_paths, train_labels, batch_size=16)

In [10]:
sample_batch, sample_labels = next(train_gen)
print(f"Batch shape: {sample_batch.shape}")  # Expected: (16, 10, 224, 224, 3)
print(f"Labels shape: {sample_labels.shape}")  # Expected: (16,)

Batch shape: (16, 10, 224, 224, 3)
Labels shape: (16,)


Train model

In [34]:
# Callbacks
lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    # lambda epoch: 0.0001 * 10 ** (epoch / 20)
    # lambda epoch: 0.0001 * 10 ** (epoch / 5)
    lambda epoch: 0.001
)

early_stopping = EarlyStopping(
    monitor='val_sparse_categorical_accuracy', 
    patience=5,
    mode='max',
    restore_best_weights=True
)

# Build model
model = built_model(print_details=False)

In [35]:
history = model.fit(
    train_gen,
    steps_per_epoch=len(train_paths) // 16,
    validation_data=val_gen,
    validation_steps=len(val_paths) // 16,
    epochs=20,
    callbacks=[early_stopping, lr_schedule]
)

Epoch 1/20
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 4s/step - loss: 4.2490 - sparse_categorical_accuracy: 0.1545 - val_loss: 2.6283 - val_sparse_categorical_accuracy: 0.2593 - learning_rate: 0.0010
Epoch 2/20
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 3s/step - loss: 2.9869 - sparse_categorical_accuracy: 0.2392 - val_loss: 2.2665 - val_sparse_categorical_accuracy: 0.3380 - learning_rate: 0.0010
Epoch 3/20
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 3s/step - loss: 2.7508 - sparse_categorical_accuracy: 0.2816 - val_loss: 2.1206 - val_sparse_categorical_accuracy: 0.3333 - learning_rate: 0.0010
Epoch 4/20
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m169s[0m 3s/step - loss: 2.6817 - sparse_categorical_accuracy: 0.2735 - val_loss: 1.8791 - val_sparse_categorical_accuracy: 0.4213 - learning_rate: 0.0010
Epoch 5/20
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 3s/step - loss: 2.2497 - sparse

In [41]:
import datetime as dt

# Save model
date_time_format = '%Y-%m-%d-%H-%M-%S'
current_date_time_dt = dt.datetime.now()
current_date_time_string = dt.datetime.strftime(current_date_time_dt, date_time_format)

model_name = f'./{current_date_time_string}-i3d-transfer-model.keras'

# model.save(model_name)
model.save_weights(f'{current_date_time_string}-i3d-transfer.weights.h5')

print(f"Saved {model_name}")

Saved ./2024-11-20-23-53-45-i3d-transfer-model.keras


Test

In [None]:
# If loading weights
model.load_weights('./2024-11-20-23-53-45-i3d-transfer-model.keras.h5')

In [None]:
test_dir = f"../../downloads/fr_10s/test_fr_10s"
test_paths, test_labels, test_activities = load_dataset(test_dir)

In [None]:
sample_batch, sample_labels = next(train_gen)
print(f"Batch shape: {sample_batch.shape}")  # Expected: (16, 10, 224, 224, 3)
print(f"Labels shape: {sample_labels.shape}")  # Expected: (16,)

Batch shape: (16, 10, 224, 224, 3)
Labels shape: (16,)


In [None]:
X_test, y_test = make_dataset(test_paths, test_labels)
y_test = to_categorical(y_test, num_classes=num_classes)
test_gen = data_generator(test_paths, test_labels, batch_size=16)

In [None]:
predictions = model.predict(X_test)
predicted_classes = np.argmax(predictions, axis=1)

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test loss: {loss}, Test accuracy: {accuracy}")