In [None]:
!pip install opencv-python-headless
#!pip install -U "tensorflow>=2.10.0"
!pip install remotezip tqdm opencv-python
!pip install -q git+https://github.com/tensorflow/docs

Collecting remotezip
  Downloading remotezip-0.12.3-py3-none-any.whl (8.1 kB)
Installing collected packages: remotezip
Successfully installed remotezip-0.12.3
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for tensorflow-docs (setup.py) ... [?25l[?25hdone


In [None]:
import cv2
import os
import pandas as pd

import tqdm
import random
import pathlib
import itertools
import collections

import os
import cv2
import numpy as np
import remotezip as rz
import random
import shutil
import glob

import tensorflow as tf

# Some modules to display an animation using imageio.
import imageio
from IPython import display
from urllib import request
from tensorflow_docs.vis import embed
from tensorflow import keras
from imutils import paths
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.utils import plot_model
from IPython.display import Image
import imgaug.augmenters as iaa

In [None]:
#Mount Google Drive.
from google.colab import drive
drive.mount("/content/drive")
#/content/drive/MyDrive/csvs/

Mounted at /content/drive


# Preparing the dataset:
Source of the dataset: https://ieee-dataport.org/open-access/handball-action-dataset-uniri-hbd

The dataset consists of 751 videos, each containing the performance one of the handball actions out of 7 categories (passing, shooting, jump-shot, dribbling, running, crossing, defence). The videos were manually extracted from longer videos recorded in handball practice sessions. They were recorded in at least full HD (1920 × 1080) resolution at 30 or more frames per second, and mostly one or two players perform the action of interest.<br>
The directory actions/ contains **the videos of performances of actions by single players isolated from original whole scenes** (found in the scenes/ directory). The files are arranged in subdirectories according to the performed action class. Files are named so that the beginning of the name matches the original video from which the action is extracted.
Hence, the dataset is already cleaned. Only minor details remain to be reviewed:
- imbalance of data.
- reorganisation in a suitable file structure.

## Dealing with imbalance data:

In [None]:
dataset_path = os.listdir('/content/drive/My Drive/csvs/handball_s/actions')

label_types = os.listdir('/content/drive/My Drive/csvs/handball_s/actions')
print(label_types)

['crossing', 'passing', 'jump-shot', 'dribbling', 'defence', 'running', 'shot']


In [None]:
# Define the directory path
directory = "/content/drive/My Drive/csvs/handball_s/actions"

# Initialize an empty dictionary to store the count of videos for each action
video_counts = {}

# Iterate through each folder in the directory
for action_folder in os.listdir(directory):
    # Construct the full path to the action folder
    action_folder_path = os.path.join(directory, action_folder)
    # Check if the path is a directory
    if os.path.isdir(action_folder_path):
        # Count the number of files with a ".avi" extension in the folder
        num_videos = len([file for file in os.listdir(action_folder_path) if file.endswith(".avi")])
        # Store the count in the dictionary with the action name as the key
        video_counts[action_folder] = num_videos

# Print the counts
for action, count in video_counts.items():
    print(f"Action: {action}, Number of Videos: {count}")


Action: crossing, Number of Videos: 250
Action: passing, Number of Videos: 141
Action: jump-shot, Number of Videos: 248
Action: dribbling, Number of Videos: 26
Action: defence, Number of Videos: 35
Action: running, Number of Videos: 13
Action: shot, Number of Videos: 96


### Deleting not used categories:
In order to preserve a copy of the original data, the dataset will be copied to a directory called "cleaned_actions". All of the following preprocessing steps will be conducted to this dataset.

In [None]:
# Define the source and destination paths
source_directory = "/content/drive/My Drive/csvs/handball_s/actions"
destination_directory = "/content/drive/My Drive/csvs/handball_test/cleaned_actions"

# Copy the entire directory to the destination
shutil.copytree(source_directory, destination_directory)

# Define the folders to delete
folders_to_delete = ['running', 'dribbling']

# Iterate through each folder to delete
for folder in folders_to_delete:
    folder_path = os.path.join(destination_directory, folder)
    # Check if the folder exists before trying to delete
    if os.path.exists(folder_path):
        # Delete the folder and its contents
        shutil.rmtree(folder_path)
        print(f"Deleted folder: {folder}")

print("Copy and deletion completed successfully.")


Deleted folder: running
Deleted folder: dribbling
Copy and deletion completed successfully.


### Augmentating data for oversampling

In [None]:
def augment_videos(input_directory, output_directory, desired_num_videos):
    """
    Augments videos from the input directory and saves the augmented videos to the output directory.

    Args:
    - input_directory (str): Path to the directory containing the input videos.
    - output_directory (str): Path to the directory where augmented videos will be saved.
    - desired_num_videos (int): Total number of augmented videos desired.

    Returns:
    - None
    """

    # Create the output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)

    # Define augmentation pipeline
    seq = iaa.Sequential([
        iaa.Sometimes(0.5, iaa.Sequential([
            iaa.Cutout(nb_iterations=(1, 3), size=(0.1, 0.2), squared=False),  # randomly cut frames from the beginning or end
        ])),
        iaa.Fliplr(0.5),  # horizontally flip 50% of the videos
        iaa.Rotate((-10, 10)),  # rotate videos by -10 to 10 degrees
        iaa.GaussianBlur(sigma=(0, 1.0)),  # apply Gaussian blur
        iaa.AdditiveGaussianNoise(scale=(0, 0.1 * 255))  # add Gaussian noise
    ], random_order=True)

    # Iterate over the input videos and perform augmentation
    video_files = os.listdir(input_directory)
    num_augmented_videos = 0
    while num_augmented_videos < desired_num_videos:
        # Choose a random video file
        video_file = random.choice(video_files)
        video_path = os.path.join(input_directory, video_file)

        # Read the video
        video_capture = cv2.VideoCapture(video_path)
        frames = []
        while True:
            ret, frame = video_capture.read()
            if not ret:
                break
            frames.append(frame)
        video_capture.release()

        # Apply augmentation to frames
        augmented_frames = seq.augment_images(frames)

        # Save augmented frames as a new video
        output_video_path = os.path.join(output_directory, f"augmented_video_{num_augmented_videos}.avi")
        height, width, _ = augmented_frames[0].shape
        fourcc = cv2.VideoWriter_fourcc(*"XVID")  # Use appropriate codec for AVI format
        out = cv2.VideoWriter(output_video_path, fourcc, 30, (width, height))
        for frame in augmented_frames:
            out.write(frame)
        out.release()

        num_augmented_videos += 1

    print("Augmentation completed.")

In [None]:
def move_rename_directories(old_directory, new_directory):
    """
    Move the contents of the old directory to the new directory,
    delete the old directory, and rename the new directory.

    Args:
    - old_directory (str): Path to the old directory.
    - new_directory (str): Path to the new directory.

    Returns:
    - None
    """

    # Move the contents of the old directory to the new directory
    for item in os.listdir(old_directory):
        source = os.path.join(old_directory, item)
        destination = os.path.join(new_directory, item)
        shutil.move(source, destination)

    # Delete the old directory
    shutil.rmtree(old_directory)

    # Rename the new directory to match the old directory name
    os.rename(new_directory, old_directory)

    print("Directories moved, deleted, and renamed successfully.")

**DEFENCE:**<br>
Final 'defence' folder should contain 140 videos.

In [None]:
# Example usage:
input_directory = "/content/drive/My Drive/csvs/handball_test/cleaned_actions/defence"
output_directory = "/content/drive/My Drive/csvs/handball_test/cleaned_actions/defence_aug"
desired_num_videos = 105

augment_videos(input_directory, output_directory, desired_num_videos)


Augmentation completed.


In [None]:
# Example usage:
old_directory = "/content/drive/My Drive/csvs/handball_test/cleaned_actions/defence"
new_directory = "/content/drive/My Drive/csvs/handball_test/cleaned_actions/defence_aug"

move_rename_directories(old_directory, new_directory)


Directories moved, deleted, and renamed successfully.


**SHOT:**<br>
Final 'shot' folder should contain 140 videos.

In [None]:
# Example usage:
input_directory = "/content/drive/My Drive/csvs/handball_test/cleaned_actions/shot"
output_directory = "/content/drive/My Drive/csvs/handball_test/cleaned_actions/shot_aug"
desired_num_videos = 45

augment_videos(input_directory, output_directory, desired_num_videos)


Augmentation completed.


In [None]:
# Example usage:
old_directory = "/content/drive/My Drive/csvs/handball_test/cleaned_actions/shot"
new_directory = "/content/drive/My Drive/csvs/handball_test/cleaned_actions/shot_aug"

move_rename_directories(old_directory, new_directory)


Directories moved, deleted, and renamed successfully.


### Down-sampling categories:

In [None]:
def keep_random_videos(directory, num_videos_to_keep):
    """
    Keep a specified number of random videos in the directory and delete the rest.

    Args:
    - directory (str): Path to the directory containing the videos.
    - num_videos_to_keep (int): Number of videos to keep.

    Returns:
    - None
    """

    # Get the list of all video files in the directory
    video_files = [f for f in os.listdir(directory) if f.endswith(".avi")]

    # If the number of videos in the directory is less than or equal to the number to keep, do nothing
    if len(video_files) <= num_videos_to_keep:
        print("Number of videos in the directory is already less than or equal to the number to keep.")
        return

    # Randomly select the videos to keep
    videos_to_keep = random.sample(video_files, num_videos_to_keep)

    # Delete the videos that are not selected to keep
    for file in video_files:
        if file not in videos_to_keep:
            file_path = os.path.join(directory, file)
            os.remove(file_path)

    print("Random videos kept and others deleted successfully.")

**JUMP-SHOT:**<br>
Final 'jump-shot' folder should contain 150 videos.

In [None]:
# Example usage:
directory = "/content/drive/My Drive/csvs/handball_test/cleaned_actions/jump-shot"
num_videos_to_keep = 150

keep_random_videos(directory, num_videos_to_keep)


Random videos kept and others deleted successfully.


**CROSSING:**<br>
Final 'crossing' folder should contain 150 videos.

In [None]:
# Example usage:
directory = "/content/drive/My Drive/csvs/handball_test/cleaned_actions/crossing"
num_videos_to_keep = 150

keep_random_videos(directory, num_videos_to_keep)


Random videos kept and others deleted successfully.


### _Final dataset:_
The size of the final dataset is 1,8 GB.

In [None]:
# Define the directory path
directory = "/content/drive/My Drive/csvs/handball_test/cleaned_actions"

# Initialize an empty dictionary to store the count of videos for each action
video_counts = {}

# Iterate through each folder in the directory
for action_folder in os.listdir(directory):
    # Construct the full path to the action folder
    action_folder_path = os.path.join(directory, action_folder)
    # Check if the path is a directory
    if os.path.isdir(action_folder_path):
        # Count the number of files with a ".avi" extension in the folder
        num_videos = len([file for file in os.listdir(action_folder_path) if file.endswith(".avi")])
        # Store the count in the dictionary with the action name as the key
        video_counts[action_folder] = num_videos

# Print the counts
for action, count in video_counts.items():
    print(f"Action: {action}, Number of Videos: {count}")

Action: jump-shot, Number of Videos: 150
Action: passing, Number of Videos: 141
Action: crossing, Number of Videos: 150
Action: defence, Number of Videos: 140
Action: shot, Number of Videos: 141


## Dividing the dataset in training, validation and test set:

In [None]:
# Define the paths to the dataset folders

# Original dataset:
#dataset_path = '/content/drive/My Drive/csvs/handball_s/actions'

# Copy were the preprocessing steps are being conducted:
dataset_path = '/content/drive/My Drive/csvs/handball_test/cleaned_actions'
label_types = os.listdir(dataset_path)
print(label_types)

train_path = '/content/drive/My Drive/csvs/handball_test/train'
test_path = '/content/drive/My Drive/csvs/handball_test/test'
val_path = '/content/drive/My Drive/csvs/handball_test/val'

['crossing', 'passing', 'jump-shot', 'defence', 'shot']


In [None]:
# Define the actions
# actions = ['shot', 'dribbling', 'jump-shot', 'passing', 'running', 'crossing', 'defence']
actions = label_types

# Define the split ratios (e.g., 65% training, 15% validation, 20% test)
train_ratio = 0.7
val_ratio = 0.1

# Iterate over each action folder
for action in actions:
    action_path = os.path.join(dataset_path, action)

    # List the videos in the action folder
    videos = os.listdir(action_path)

    # Shuffle the list of videos
    random.shuffle(videos)

    # Determine the number of videos for training, validation, and test
    num_videos = len(videos)
    num_train = int(num_videos * train_ratio)
    num_val = int(num_videos * val_ratio)
    num_test = num_videos - num_train - num_val
    print(f"for {action} -->\n num train: {num_train}, num test: {num_test}, num val: {num_val}")

    # Create directories for the action in the train, validation, and test sets
    train_action_path = os.path.join(train_path, action)
    val_action_path = os.path.join(val_path, action)
    test_action_path = os.path.join(test_path, action)
    os.makedirs(train_action_path, exist_ok=True)
    os.makedirs(val_action_path, exist_ok=True)
    os.makedirs(test_action_path, exist_ok=True)
    # exist_ok=True --> the function will not raise an error if the directory
    # already exists; it will simply continue

    # Move videos to the train, validation, and test directories
    train_videos = videos[:num_train]
    val_videos = videos[num_train:(num_train + num_val)]
    test_videos = videos[(num_train + num_val):]

    for video in train_videos:
        src = os.path.join(action_path, video)
        dst = os.path.join(train_action_path, video)
        shutil.copy(src, dst)

    for video in val_videos:
        src = os.path.join(action_path, video)
        dst = os.path.join(val_action_path, video)
        shutil.copy(src, dst)

    for video in test_videos:
        src = os.path.join(action_path, video)
        dst = os.path.join(test_action_path, video)
        shutil.copy(src, dst)


for crossing -->
 num train: 105, num test: 30, num val: 15
for passing -->
 num train: 98, num test: 29, num val: 14
for jump-shot -->
 num train: 105, num test: 30, num val: 15
for defence -->
 num train: 98, num test: 28, num val: 14
for shot -->
 num train: 98, num test: 29, num val: 14


In [None]:
os.listdir('/content/drive/My Drive/csvs/handball_test/val/crossing')

['crossing_KS_33_act2.avi',
 'crossing_KS_125_act1.avi',
 'crossing_KS_91_act1.avi',
 'crossing_KS_116_act2.avi',
 'crossing_KS_7_act1.avi',
 'crossing_KS_27_act2.avi',
 'crossing_KS_13_act2.avi',
 'crossing_KS_18_act1.avi',
 'crossing_KS_95_act2.avi',
 'crossing_KS_21_act2.avi',
 'crossing_KS_103_act1.avi',
 'crossing_KS_31_act2.avi',
 'crossing_KS_119_act1.avi',
 'crossing_KS_83_act2.avi',
 'crossing_KS_115_act1.avi']

## Preparing dataframes for each subset:
Each dataframe contains the name of the videos belonging to each set, ant their corresponding tag action. They will be used for the "CNN+Sequential" model.

In [None]:
def create_dataframe_from_subset(subset_path):
    info_videos = []

    for action in os.listdir(subset_path):
        # Get all the videos in this action:
        action_path = os.path.join(subset_path, action)
        all_videos = os.listdir(action_path)

        # Add the videos to the list:
        for video in all_videos:
            video_path = os.path.join(action_path, video)
            info_videos.append((action, video_path))

    # Build DataFrame:
    df = pd.DataFrame(data=info_videos, columns=['tag', 'video_name'])
    return df

### Preparing the training data:

In [None]:
train_df = create_dataframe_from_subset(train_path)
print(train_df.head())
print(train_df.shape)
print(train_df.tail())

        tag                                         video_name
0  crossing  /content/drive/My Drive/csvs/handball_test/tra...
1  crossing  /content/drive/My Drive/csvs/handball_test/tra...
2  crossing  /content/drive/My Drive/csvs/handball_test/tra...
3  crossing  /content/drive/My Drive/csvs/handball_test/tra...
4  crossing  /content/drive/My Drive/csvs/handball_test/tra...
(504, 2)
         tag                                         video_name
499  passing  /content/drive/My Drive/csvs/handball_test/tra...
500  passing  /content/drive/My Drive/csvs/handball_test/tra...
501  passing  /content/drive/My Drive/csvs/handball_test/tra...
502  passing  /content/drive/My Drive/csvs/handball_test/tra...
503  passing  /content/drive/My Drive/csvs/handball_test/tra...


### Preparing the validation data:

In [None]:
val_df = create_dataframe_from_subset(val_path)
print(val_df.head())
print(val_df.shape)
print(val_df.tail())

         tag                                         video_name
0  jump-shot  /content/drive/My Drive/csvs/handball_test/val...
1  jump-shot  /content/drive/My Drive/csvs/handball_test/val...
2  jump-shot  /content/drive/My Drive/csvs/handball_test/val...
3  jump-shot  /content/drive/My Drive/csvs/handball_test/val...
4  jump-shot  /content/drive/My Drive/csvs/handball_test/val...
(72, 2)
        tag                                         video_name
67  defence  /content/drive/My Drive/csvs/handball_test/val...
68  defence  /content/drive/My Drive/csvs/handball_test/val...
69  defence  /content/drive/My Drive/csvs/handball_test/val...
70  defence  /content/drive/My Drive/csvs/handball_test/val...
71  defence  /content/drive/My Drive/csvs/handball_test/val...


### Preparing the test data:

In [None]:
test_df = create_dataframe_from_subset(test_path)
print(test_df.head())
print(test_df.shape)
print(test_df.tail())

    tag                                         video_name
0  shot  /content/drive/My Drive/csvs/handball_test/tes...
1  shot  /content/drive/My Drive/csvs/handball_test/tes...
2  shot  /content/drive/My Drive/csvs/handball_test/tes...
3  shot  /content/drive/My Drive/csvs/handball_test/tes...
4  shot  /content/drive/My Drive/csvs/handball_test/tes...
(146, 2)
          tag                                         video_name
141  crossing  /content/drive/My Drive/csvs/handball_test/tes...
142  crossing  /content/drive/My Drive/csvs/handball_test/tes...
143  crossing  /content/drive/My Drive/csvs/handball_test/tes...
144  crossing  /content/drive/My Drive/csvs/handball_test/tes...
145  crossing  /content/drive/My Drive/csvs/handball_test/tes...


### _Save each dataframe in a '.csv'_

In [None]:
# Save DataFrames to CSV files
data_path = '/content/drive/My Drive/csvs/handball_test/'
train_df.to_csv(os.path.join(data_path, 'train.csv'), index=False)
test_df.to_csv(os.path.join(data_path, 'test.csv'), index=False)
val_df.to_csv(os.path.join(data_path, 'val.csv'), index=False)

In [None]:
val_df.head()

Unnamed: 0,tag,video_name
0,jump-shot,/content/drive/My Drive/csvs/handball_test/val...
1,jump-shot,/content/drive/My Drive/csvs/handball_test/val...
2,jump-shot,/content/drive/My Drive/csvs/handball_test/val...
3,jump-shot,/content/drive/My Drive/csvs/handball_test/val...
4,jump-shot,/content/drive/My Drive/csvs/handball_test/val...
