## Create Frames for each video

This script will create the frames for each video, that will help us in the future with the following steps.

In [None]:
# Install the ffmpeg library
! pip install ffmpeg-python;

In [None]:
# Import the libraries
import ffmpeg
import shutil, os, random
import time
from pathlib import Path

#### Create the folders that contains the frames for each video

In [None]:
# Loop through all the video clips names 
base_path = ''
videos_path = '/videos'

# Create a frames folder for cleaner project
frames_path =  '/frames'
os.makedirs(frames_path)


for video in os.listdir(videos_path):
  # Define the name of the folder for each video clips frames
  base_video_name = video.lower().replace('.mp4', '').replace('-', ' ').split(' ')
  folder_name_lst = base_video_name if len(base_video_name) < 4 else base_video_name[2:]
  folder_name = ' '.join(folder_name_lst).replace(' ', '_')

  # Create the directory for the frames
  os.makedirs(os.path.join(frames_path, folder_name))
  
  # Get the input video from the videos path
  input_file = ffmpeg.input(os.path.join(videos_path, video))

  # Create the output images in the new directory we made just above
  output_file_pattern = os.path.join(os.path.join(frames_path, folder_name), 'frame_%d.png')

  # Check how long is a video to take a custom frequency for each video
  probe = ffmpeg.probe(os.path.join(videos_path, video))
  video_info = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
  duration = float(video_info['duration'])

  # Based on the duration of the video we have a different frame frequency
  # Short video duration -> High frequency
  # Long  video duration -> Low frequency
  if (duration >= 300):
    fps = 'fps=0.5'
  elif (duration < 300 and duration >= 200):
    fps = 'fps=1'
  else:
    fps = 'fps=2'

  # Get two frames per second of the video clip
  output = ffmpeg.output(input_file, output_file_pattern, vf=fps, start_number=0)
  ffmpeg.run(output)

## Create the Train-Test Split in the data above

In order to load the data correctly we need to create a train-test split of the
datasets.

In [None]:
train_folder = 'train'  # Path to the train folder
test_folder = 'test'  # Path to the test folder
split_ratio = 0.9  # Train-test split ratio

# Create train and test folders if they don't exist
os.makedirs(train_folder, exist_ok=True)
os.makedirs(test_folder, exist_ok=True)

# Get the list of video folders in the data folder
video_folders = [folder for folder in os.listdir(frames_path) if os.path.isdir(os.path.join(frames_path, folder))]

# Process each video folder
for video_folder in video_folders:
    video_path = os.path.join(frames_path, video_folder)

    # Create train and test sub-folders for the current video folder
    train_video_folder = os.path.join(train_folder, video_folder)
    test_video_folder = os.path.join(test_folder, video_folder)
    os.makedirs(train_video_folder, exist_ok=True)
    os.makedirs(test_video_folder, exist_ok=True)

    # Get the list of image files in the current video folder
    image_files = [file for file in os.listdir(video_path) if os.path.isfile(os.path.join(video_path, file))]

    # Shuffle the image files randomly
    random.shuffle(image_files)

    # Calculate the split indices based on the split ratio
    split_index = int(len(image_files) * split_ratio)

    # Move files to the train folder
    for file in image_files[:split_index]:
        file_name, file_ext = os.path.splitext(file)
        frame_number  = file_name.split('_')[1]
        new_file_name = f'{video_folder}_{frame_number}{file_ext}'
        shutil.move(os.path.join(video_path, file), os.path.join(train_video_folder, new_file_name))

    # Move files to the test folder
    for file in image_files[split_index:]:
        file_name, file_ext = os.path.splitext(file)
        frame_number  = file_name.split('_')[1]
        new_file_name = f'{video_folder}_{frame_number}{file_ext}'
        shutil.move(os.path.join(video_path, file), os.path.join(test_video_folder, new_file_name))


### Remove frames folder

In [None]:
# After creating the train and test folder we can delete the frames folder
shutil.rmtree(frames_path)