# GIF Loader

In [None]:
from PIL import Image, ImageSequence
import vidaug.augmentors as va
import os

INPUT_PATH = "bowmore_data"
OUTPUT_PATH = "augmentation"

def gif_loader(path, modality="RGB"):
    frames = []
    with open(path, 'rb') as f:
        with Image.open(f) as video:
            index = 1
            for frame in ImageSequence.Iterator(video):
                frames.append(frame.convert(modality))
                index += 1
        return frames
    
frames = gif_loader(os.path.join("augmentation", "bored-boring.gif"))

# frames[0]

In [4]:
sometimes = lambda aug: va.Sometimes(1, aug) # Used to apply augmentor with 100% probability
seq = va.Sequential([ # randomly rotates the video with a degree randomly choosen from [-10, 10]  
    sometimes(va.HorizontalFlip()) # horizontally flip the video with 100% probability
])

#augment the frames
video_aug = seq(frames)

# save augmentad frames as gif 
video_aug[0].save(os.path.join(OUTPUT_PATH,"out.gif"), save_all=True, append_images=video_aug[1:], duration=100, loop=0)

# Video Loader

## Outdated

In [50]:
from PIL import Image, ImageSequence
import vidaug.augmentors as va
import os
import cv2
import matplotlib.pyplot as plt

DATA_FOLDER = r"bowmore_data\videos"
ACTION_SUBFOLDER = "ILoveYou"
video_filename = "1"
video_filetype = ".avi"
INPUT_PATH = os.path.join(DATA_FOLDER, ACTION_SUBFOLDER)
VIDEO_INPUT_PATH = os.path.join(INPUT_PATH, video_filename + video_filetype)

OUTPUT_FOLDER = "augmentation"

def video_loader(path, modality="RGB"):
    frames = []
    video = cv2.VideoCapture(path)
    if not video.isOpened():
        print("Error: Could not open video.")
        return frames
    fps = video.get(cv2.CAP_PROP_FPS)  # Get the frame rate
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))  # Get frame width
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))  # Get frame height
    fourcc = int(video.get(cv2.CAP_PROP_FOURCC))  # Get codec
    print(f"FPS: {fps}, Width: {width}, Height: {height}, FourCC: {fourcc}")
    while True:
        ret, frame = video.read()
        if not ret:
            break
        if modality == "RGB":
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame)
    video.release()
    print(f"Loaded {len(frames)} frames.")
    return frames, fps, width, height, fourcc



# LOAD the Video Frames and their properties
frames, fps, width, height, fourcc = video_loader(VIDEO_INPUT_PATH)
if len(frames) == 0:
    print("No frames were loaded. Please check the video file and path.")

# plt.imshow(frames[0]): This line is responsible for creating the image display but does not actually render the image. The imshow function in matplotlib takes a data array and creates an image object from it. In this case, it's creating an image from the first frame of your video.
# plt.show(): This line is what actually renders the image and displays it to you. Without this line, the image object created by plt.imshow would not be displayed. The show function in matplotlib outputs the current figure that you've been building with your plot commands (like imshow). It essentially opens up a window and displays the image for you.

# plt.imshow(frames[0])
# plt.show()

sometimes = lambda aug: va.Sometimes(1, aug) # Used to apply augmentor with 100% probability
seq = va.Sequential([ # randomly rotates the video with a degree randomly choosen from [-10, 10]  
    sometimes(va.HorizontalFlip()), # horizontally flip the video with 100% probability
    # sometimes(va.VerticalFlip()), 
    # sometimes(va.Rotate(degrees=10)), 
    # sometimes(va.GaussianBlur(sigma=1.0))
])

#augment the frames
video_aug = seq(frames)

# save augmentad frames as video
if not os.path.exists(OUTPUT_PATH):
    os.makedirs(OUTPUT_PATH)


# SAVE VIDEO
augmented_video_filename = video_filename + "_aug" + video_filetype
OUTPUT_VIDEO_PATH = os.path.join(OUTPUT_FOLDER, augmented_video_filename)
OUTPUT_VIDEO_PATH = os.path.join(OUTPUT_FOLDER, video_filename + "_aug" + video_filetype)
# automatic video properties
out = cv2.VideoWriter(OUTPUT_VIDEO_PATH, fourcc, fps, (width, height))

# fixed video properties
# fourcc = cv2.VideoWriter_fourcc(*'XVID')
# out = cv2.VideoWriter(os.path.join(OUTPUT_VIDEO_PATH), fourcc, 20.0, (frames[0].shape[1], frames[0].shape[0]))

# Write augmented frames to video
for frame in video_aug:
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    out.write(frame)

out.release()
print(f"Augmented video saved at: {OUTPUT_VIDEO_PATH}")

FPS: 10.0, Width: 1280, Height: 720, FourCC: 877677894
Loaded 30 frames.


## New

In [1]:
import vidaug.augmentors as va
import os
import cv2
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
import random


DATA_FOLDER = r"bowmore_data\videos"
# ACTION_SUBFOLDER = "ILoveYou"
# INPUT_PATH = os.path.join(DATA_FOLDER, ACTION_SUBFOLDER)
# video_filename = "1"
video_filetype = ".avi"
# VIDEO_INPUT_PATH = os.path.join(INPUT_PATH, video_filename + video_filetype)
OUTPUT_FOLDER = "augmentation"

def video_loader(path, modality="RGB"):
    frames = []
    video = cv2.VideoCapture(path)

    if not video.isOpened():
        print(f"Error: Could not open video {path}.")
        return frames, 0, 0, 0, 0
    
    fps = video.get(cv2.CAP_PROP_FPS)
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = int(video.get(cv2.CAP_PROP_FOURCC))
    print(f"FPS: {fps}, Width: {width}, Height: {height}, FourCC: {fourcc}")

    while True:
        ret, frame = video.read()
        if not ret:
            break
        if modality == "RGB":
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame)
    video.release()

    print(f"Loaded {len(frames)} frames from {path}.")

    return frames, fps, width, height, fourcc

# def augment_video(frames):
#     sometimes = lambda aug: va.Sometimes(1, aug)
#     seq = va.Sequential([
#         sometimes(va.HorizontalFlip()),              # Horizontal flipping
#         sometimes(va.RandomRotate(degrees=45)),      # Small random rotations
#         sometimes(va.RandomTranslate(x=10, y=10)),   # Random translations
#         sometimes(va.RandomResize(scale=(0.9, 1.1))),# Random scaling
#         sometimes(va.RandomBrightness()),            # Random brightness adjustment
#         sometimes(va.RandomContrast()),              # Random contrast adjustment
#         sometimes(va.AddGaussianNoise()),            # Adding Gaussian noise
#         sometimes(va.GaussianBlur(sigma=1.0))        # Applying Gaussian blur
#     ])
#     return seq(frames)


def save_augmented_video(frames, output_path, fps, width, height, fourcc):
    if not os.path.exists(os.path.dirname(output_path)):
        os.makedirs(os.path.dirname(output_path))

    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    # This change ensures that frames are converted back to an appropriate data type that cv2.cvtColor can handle, preventing "Unsupported depth of input image" error where input image depth is CV_64F (double-precision floating-point) which is not supported by cv2.cvtColor for color conversion
    for frame in frames:
        frame = (frame * 255).astype(np.uint8) if frame.dtype == np.float64 else frame
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        out.write(frame)
    out.release()

# Custom RandomResize class using cv2.resize
class CustomRandomResize(object):
    def __init__(self, rate=0.2, original_width=None, original_height=None):
        self.rate = rate
        self.original_width = original_width
        self.original_height = original_height

    def __call__(self, clip):
        if self.original_width is None or self.original_height is None:
            self.original_height, self.original_width = clip[0].shape[:2]
        
        scaling_factor = random.uniform(1 - self.rate, 1 + self.rate)
        new_width = int(self.original_width * scaling_factor)
        new_height = int(self.original_height * scaling_factor)

        resized_clip = []
        for img in clip:
            if isinstance(img, np.ndarray):
                resized_img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
                resized_img = cv2.resize(resized_img, (self.original_width, self.original_height), interpolation=cv2.INTER_LINEAR)  # Resize back to original size
                resized_clip.append(resized_img)
            else:
                raise TypeError('Expected numpy.ndarray but got list of {0}'.format(type(img)))
        return resized_clip

# Define each augmentation
"""
Horizontal Flipping:
Why: It helps the model generalize by teaching it to recognize gestures from both the left and right sides.
How: Flip the video frames horizontally.

Rotation:
Why: Small rotations can make the model invariant to slight tilts of the camera.
How: Apply small random rotations (e.g., between -10 to 10 degrees).

Scaling/Resizing:
Why: To make the model robust to different distances from the camera.
How: Randomly scale the video frames up or down.

Brightness and Contrast Adjustment:
Why: To handle different lighting conditions.
How: Randomly adjust the brightness and contrast of the frames.

Gaussian Noise:
Why: To make the model robust to noise in the video feed.
How: Add small amounts of Gaussian noise to the frames.

Translation:
Why: To handle slight movements of the camera or the subject.
How: Randomly translate the frames horizontally and vertically.
"""
augmentations = {
    "HorizontalFlip": va.HorizontalFlip(),
    "RandomRotate": va.RandomRotate(degrees=45),
    "RandomTranslate": va.RandomTranslate(x=10, y=10),
    "RandomResize": CustomRandomResize(rate=0.2),  # Apply a resizing rate of 20%
    # "RandomBrightness": va.RandomBrightness(),
    # "RandomContrast": va.RandomContrast(),
    # "AddGaussianNoise": va.AddGaussianNoise(),
    "GaussianBlur": va.GaussianBlur(sigma=1.0)
}

# Get all video files from the input directory and its subdirectories
# Path(INPUT_PATH): This creates a Path object from the INPUT_PATH string. Path is a class from the pathlib module which helps in handling file system paths.
# .rglob(f"*{video_filetype}"): This is a method that finds all files matching a pattern. f"*{video_filetype}" is a string that represents all files with the extension specified by video_filetype (e.g., .avi). The rglob method searches recursively, meaning it looks in all subdirectories as well.
# list(): This converts the result of rglob (which is an iterable) into a list of file paths.
video_folders = list(Path(DATA_FOLDER).rglob(f"*"))

# process each video file in video_files and convert the Path object to a string, making it easier to work with.
for video_folder in video_folders:
    video_files = list(Path(video_folder).rglob(f"*{video_filetype}"))
    for video_file in video_files:
        video_file = str(video_file)
        frames, fps, width, height, fourcc = video_loader(video_file)
        
        # check if video could be loaded, i.e. frames is not empty
        if len(frames) == 0:
            continue
        
        for aug_name, aug in augmentations.items():
            print(aug_name)
            augmented_frames = aug(frames)
            print(f"Augmented video {video_file} with {aug_name}.") 

            # Calculate the relative path of the current video file with respect to the data folder
            relative_path = os.path.relpath(video_file, DATA_FOLDER)
            # Construct the output path by adding the augmentation name to the filename
            base, ext = os.path.splitext(relative_path)
            augmented_filename = f"{base}_{aug_name}{ext}"
            output_video_path = os.path.join(OUTPUT_FOLDER, augmented_filename)
            
            save_augmented_video(augmented_frames, output_video_path, fps, width, height, fourcc)

print("All videos have been augmented and saved.")

FPS: 10.0, Width: 1280, Height: 720, FourCC: 877677894
Loaded 30 frames from bowmore_data\videos\background\0.avi.
HorizontalFlip
Augmented video bowmore_data\videos\background\0.avi with HorizontalFlip.
RandomRotate
Augmented video bowmore_data\videos\background\0.avi with RandomRotate.
RandomTranslate
Augmented video bowmore_data\videos\background\0.avi with RandomTranslate.
RandomResize
Augmented video bowmore_data\videos\background\0.avi with RandomResize.
GaussianBlur
Augmented video bowmore_data\videos\background\0.avi with GaussianBlur.
FPS: 10.0, Width: 1280, Height: 720, FourCC: 877677894
Loaded 30 frames from bowmore_data\videos\background\1.avi.
HorizontalFlip
Augmented video bowmore_data\videos\background\1.avi with HorizontalFlip.
RandomRotate
Augmented video bowmore_data\videos\background\1.avi with RandomRotate.
RandomTranslate
Augmented video bowmore_data\videos\background\1.avi with RandomTranslate.
RandomResize
Augmented video bowmore_data\videos\background\1.avi with