In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
!pip install loguru



In [7]:
from pathlib import Path 
import shutil
import pandas as pd 
import numpy as np 
import tensorflow as tf 
from sklearn.model_selection import train_test_split
import cv2
import random
from loguru import logger

In [8]:
from pathlib import Path 

drive_dir = Path('/content/drive/MyDrive')
ucf_dir = drive_dir / "UCF101/UCF-101"

In [9]:
train_dfs, test_dfs, val_dfs = [], [], []

for path in ucf_dir.iterdir():
    if path.is_dir():
        folder_name = path.name

        # find the .avi files
        avi_paths = list(path.glob("*.avi"))

        # create train, val, test split 60, 20, 20
        train_r, val_r, test_r = 0.6, 0.2, 0.2
        X_train, X_val = train_test_split(avi_paths, train_size=train_r)
        X_val, X_test = train_test_split(X_val, train_size= val_r / (val_r + test_r))

        df_train = pd.DataFrame(X_train, columns=['path'])
        df_train['action'] = folder_name

        df_val = pd.DataFrame(X_val, columns=['path'])
        df_val['action'] = folder_name

        df_test = pd.DataFrame(X_test, columns=['path'])
        df_test['action'] = folder_name

        train_dfs.append(df_train)
        val_dfs.append(df_val)
        test_dfs.append(df_test)



In [10]:
train_df = pd.concat(train_dfs)
val_df = pd.concat(val_dfs)
test_df = pd.concat(test_dfs)

In [7]:
train_df.to_csv(drive_dir / f"UCF101/train_paths.parquet", index=False)
val_df.to_csv(drive_dir / f"UCF101/val_paths.parquet", index=False)
test_df.to_csv(drive_dir / f"UCF101/test_paths.parquet", index=False)

NameError: name 'train_df' is not defined

In [9]:
train_df = pd.read_csv(drive_dir / f"UCF101/train_paths.parquet")

In [10]:
log_path = drive_dir / "UCF101/logs" 
log_path.mkdir(exist_ok=True)
logger.remove()
logger.add(str(log_path / "first_frame_capture.log"), level='DEBUG')

1

In [49]:
def format_frame(frame, output_shape):
    # convert the values from uint8 to float 32 and also normalize values
    # frame = tf.image.convert_image_dtype(frame, tf.float32)

    # pad the image with black while preserving the aspect ratio
    # frame = tf.image.resize_with_pad(frame, *output_size)

    frame = frame.astype(np.float32) / 255.0

    h, w = frame.shape[:2]
    target_h, target_w = output_shape
    scale = min(target_h / h, target_w / w) # scale is selected so that the new image has a size that fits into target_h, target_w

    new_h = int(h * scale)
    new_w = int(w * scale)

    # resize the image
    resized = cv2.resize(frame, (new_w, new_h)) # the resize dim is width x height

    # compute the padding
    pad_h = (target_h - new_h) // 2
    pad_w = (target_w - new_w) // 2

    new_frame = np.zeros((target_h, target_w, 3))
    new_frame[pad_h:pad_h + new_h, pad_w:pad_w + new_w] = resized
    
    return new_frame

def capture_frames(video_path, frame_step=4, n_frames=32, output_size=(224, 224), mode='train'):
    video_path = str(video_path)
    video = cv2.VideoCapture(video_path) # open the video

    if not video.isOpened():
        video.release()
        logger.warning(f'Failed to open video for {video_path}')
        return

    total_frames = video.get(cv2.CAP_PROP_FRAME_COUNT) # get total frame count
    total_frames = int(total_frames) # cv2 return a float
    required_frames = 1 + (n_frames - 1) * frame_step
    result = []

    if required_frames > total_frames:
        start = 0 # if the required frames is greater than total frames available then we start sampling from the first frame
    else: 
        # otherwise we pick a random starting point
        max_start = total_frames - required_frames

        # during inference or validation the start is not random
        if mode == 'train':
            start = random.randint(0, max_start) # 0 and max_start are inclusive
        else:
            start = 0
    
    # move the pointer to start
    video.set(cv2.CAP_PROP_POS_FRAMES, start)

    # capture the first frame
    # ret is a boolean indicating if frame was captured successfully
    # frame is a numpy array of image (height, width, 3 channels BGR)
    ret, frame = video.read() 
    if ret:
        result.append(format_frame(frame, output_size))
        logger.info(f'First frame captured from {video_path}')
    else:
        result.append()
        logger.warning(f'Failed to capture first frame from {video_path}')
        return 
    
    # now start capturing remaining frames
    for _ in range(n_frames - 1):
        for _ in range(frame_step):
            # skip frame_step number of frames
            ret, frame = video.read()
        if ret: # store the last frame in since skipping frame_step
            result.append(format_frame(frame, output_size))
        else:
            result.append(np.zeros_like(result[0]))
    
    video.release()
    # this [..., [2, 1, 0]] reads as all dimensions before the last one
    # [2, 1, 0] change channel order from BGR to RGB
    result_arr = np.array(result)[..., [2, 1, 0]]

    return result_arr

In [50]:
dataset = tf.data.Dataset.from_tensor_slices((train_df.path.values, train_df.action.values))
dataset = dataset.map(
    lambda path, label: tf.py_function(
        lambda p, l: (capture_frames(p.numpy().decode('utf-8')), l),
        [path, label],
        [tf.float32, tf.string]
    )
)
batch_size = 32
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(tf.data.AUTOTUNE)
# for frames, label in dataset:
#     print(frames.shape)
#     break

In [51]:
# I will make the game scenes...
for frames, label in dataset:
    pass

In [None]:
dataset