# Configuration setup
All global notebook variables will be placed here for readability and maintainability.

In [None]:
from pathlib import Path
import os
import shutil
import gdown
import random
from project_paths import *
import cv2, argparse, glob, PIL, tqdm, sys

# Definition of global variables for the notebook
raw_dataset_links = {
    "mivia": (
        "https://drive.google.com/file/d/1tEz2wVQjPp1MjVHZLa-Z3uyVBnwljgGF/view?usp=sharing",
        "https://drive.google.com/file/d/123AcAQCldRNE6iKpXuCaVtsaR3uHIOeN/view?usp=sharing"
    ),
    "custom": (
        "https://drive.google.com/file/d/1eTDG_SbHkCo0OeVwRKugQ2vDV2csDx6q/view?usp=sharing",
        "https://drive.google.com/file/d/1UjWkvzzezXNOkncas4Q-kP9X9VU2D0OE/view?usp=sharing"
    ),
    # Dataset custom senza video duplicati e con numerazione corretta.
    # Fino ad adesso ne è stato testato solo l'effettivo scaricamento.
    # Si testerà l'estrazione dei frames a breve.
    "mivia_custom_04_07": (
        "https://drive.google.com/file/d/13VJ-MGfrS_cvOwLvmiDlqmawGV0ebZGC/view?usp=sharing",
        "https://drive.google.com/file/d/13TmHShb5tNa9doujghCNoFlB5pAJsJWz/view?usp=sharing"
    ),
    "mivia_custom_09_07": (
        "https://drive.google.com/file/d/13n9lbNyJchDk5olfHzU6Oy_WQR7ZbPFX/view?usp=drive_link",
        "https://drive.google.com/file/d/13lJY76imebgbovVc1asDm6GvuNkaF8u8/view?usp=drive_link"
   ),
    "mivia_custom_10_07": (
        "https://drive.google.com/file/d/14-z_UsyDaPyGCTdXrXS2YDkGIfDfGQNJ/view?usp=sharing",
        "https://drive.google.com/file/d/13xXyOqp2ET_ySTzd7mP-qk8hs5x3y96g/view?usp=sharing"
    ),
    # Versione in cui il numero dei secondi indica il momento in cui entrambe le anomalie sono presenti nei video (Fire e Smoke)
    "mivia_custom_12_07": ( 
        "https://drive.google.com/file/d/14-z_UsyDaPyGCTdXrXS2YDkGIfDfGQNJ/view?usp=sharing", # Same as mivia_custom_10_07
        "https://drive.google.com/file/d/1FuyS0f1nsEHREvekEAJWHpe2Zte434rQ/view?usp=sharing" # GT updated
    ),
    "mivia_custom_14_07": (
        "https://drive.google.com/file/d/1d48J_kUpfUmuJjvLAlBwT4PoVFHFXeja/view?usp=sharing",
        "https://drive.google.com/file/d/1FuyS0f1nsEHREvekEAJWHpe2Zte434rQ/view?usp=sharing"
    ),
    "mivia_only_15_07": (
        "https://drive.google.com/file/d/1NXFvcJqjZ80l7YEl4k23YV0FveJs-avm/view?usp=sharing", # Dataset composto soltanto dai video di MIVIA
        "https://drive.google.com/file/d/1NWy-pD6zYDg5t_84H3NkVswIJlD6OUIc/view?usp=sharing"  # Le annotazioni sono già modificate con i frames assoluti.
    )
}

SELECTED_DATASET = "mivia_only_15_07"
RELOAD_DATASET = False # If True, the dataset is reloaded from the links above, otherwise it is loaded from the local folder



video_link, labels_link = raw_dataset_links[SELECTED_DATASET]

In [None]:
def download_google_file(shader_url, output_name):
  id_url = "https://drive.google.com/uc?id=" + shader_url.split("/")[5]
  gdown.download(id_url, output_name)


def reload_data_folder_videos():
    videos_name_file = "VIDEOS.zip"
    shutil.rmtree(videos_path, ignore_errors=True)  # delete the folder
    os.makedirs(videos_path, exist_ok=True)  # create a new one with the same name
    download_google_file(video_link, videos_name_file)
    zip_videos_path = videos_path / videos_name_file
    shutil.move(videos_name_file, zip_videos_path)
    shutil.unpack_archive(zip_videos_path, videos_path)
    os.remove(zip_videos_path)
    if (data_folder_path / "__MACOSX").exists():
        shutil.rmtree(data_folder_path / "__MACOSX")


def reload_data_folder_annotations():
    labels_name_file = "GT.zip"
    shutil.rmtree(train_original_annotations_path, ignore_errors=True)  # delete the folder
    download_google_file(labels_link, labels_name_file)
    zip_labels_path = data_folder_path / labels_name_file
    shutil.move(labels_name_file, zip_labels_path)
    shutil.unpack_archive(zip_labels_path, data_folder_path)
    os.remove(zip_labels_path)    
    os.makedirs(train_original_annotations_path)
    old_no_fire_labels_folder_path = data_folder_path /"GT_TRAINING_SET_CL0"
    old_fire_labels_folder_path = data_folder_path / "GT_TRAINING_SET_CL1"
    shutil.move(old_no_fire_labels_folder_path, train_original_annotations_path)
    shutil.move(old_fire_labels_folder_path, train_original_annotations_path)
    os.rename(train_original_annotations_path / "GT_TRAINING_SET_CL0", train_original_annotations_path / "0")
    os.rename(train_original_annotations_path / "GT_TRAINING_SET_CL1", train_original_annotations_path / "1")
    if (data_folder_path / "__MACOSX").exists():
        shutil.rmtree(data_folder_path / "__MACOSX")

def reload_data_folder():
    
    if data_folder_path.exists():
        shutil.rmtree(data_folder_path)  # delete the folder
    
    reload_data_folder_videos()
    reload_data_folder_annotations()
    
    

def check_data_folder(reload = False, size_limit=10):
    """
    Checks if a folder exists and if its size (including subfolders) is less than a given limit.
    If both conditions are met, the folder is deleted and recreated.

    :param folder_path: path of the folder to check
    :param size_limit: size limit in MB
    """

    if reload:
        reload_data_folder()
        return

    if data_folder_path.exists():
        total_size = sum(f.stat().st_size for f in data_folder_path.glob('**/*') if f.is_file()) / (1024 * 1024)
        if total_size < size_limit:
            reload_data_folder()
    else:
        reload_data_folder()


In [None]:
import platform  

def get_device():
    """
    Function to determine the device type based on the node name.
    It uses a dictionary to map node names to device types.

    :return: device type as a string
    :raises Exception: if node name is not found in the device_map dictionary
    """
    device_map = {
        "PC-Cristian": "cuda",
        "Dell-G5-15-Alexios": "cuda",
        "MacBook-Pro-di-Cristian.local": "mps",
        "MacBookProDiGrazia": "cpu",
        "DESKTOP-RQVK8SI":"cuda",
        "MacBook-Pro.station":"mps"
    }

    try:
        return device_map[platform.uname().node]
    except KeyError:
        raise Exception("Node name not found. Please add your node name and its corresponding device to the dictionary.")

device = get_device()

# Download and unzip

In [None]:
check_data_folder(RELOAD_DATASET)

try:
    shutil.rmtree("ONFIRE2023_Example_Code")
except FileNotFoundError:
    pass

# download_google_file("https://drive.google.com/file/d/1rXMCtpus2i2UDdSBD9RwWAxnT0wrrXOk/view?usp=sharing", "test_code.zip")
# shutil.unpack_archive("test_code.zip", ".")
# os.remove("test_code.zip")

# Extract frames from video files

Riconduciamo il problema da un dominio di video ad un dominio di immagini andando a selezionare i frame di ogni video e disponendoli in una sottocartella. È importante salvare i frame con qualità alta, altrimenti distorciamo l'informazione data al classificatore.

We use ffmpeg to faster the frame extraction

In [None]:
os.makedirs(original_frames_path, exist_ok=True)

file_list = [path for path in Path(videos_path).rglob("*") if path.is_file()]
for video in tqdm.tqdm(file_list):
  output_video_frames_folder = Path(os.path.join(original_frames_path, video.relative_to(videos_path)))
  if output_video_frames_folder.is_dir():
    continue
  
  os.makedirs(output_video_frames_folder, exist_ok=True)
  os.system("ffmpeg -i {} -r 1/1 {}/{}.jpg".format(video, output_video_frames_folder, "%05d"))
  shutil.rmtree(output_video_frames_folder / "__MACOSX", ignore_errors=True)

# Split dataset

In [None]:
def split_dataset(frames_source_path, annotations_source_path, frames_destination_path, annotations_destination_path, mode="classic", p=[0.8,0.2], mivia_validation_percentage=0.8):
        """
        If mode is "classic", it splits the dataset into train, validation and test sets.
        If mode is "k-fold", it splits only the test. In this case other videos remain in the training set folder.
        """
        # set paths and the mivia percentage of videos which have to be put in the validation set
        no_fire_original_frames_folder = frames_source_path /'0'
        fire_original_frames_folder = frames_source_path / '1'
        no_fire_original_annotations_folder = annotations_source_path / '0'
        fire_original_annotations_folder = annotations_source_path / '1'
        
        # create the destination folders
        os.makedirs(frames_destination_path / "TRAINING_SET" / "0", exist_ok=True)
        os.makedirs(frames_destination_path / "TRAINING_SET" / "1", exist_ok=True)

        os.makedirs(train_splitted_annotations_path / "0", exist_ok=True)
        os.makedirs(train_splitted_annotations_path / "1", exist_ok=True)

        no_fires_folders = [folder_name for folder_name in os.listdir(no_fire_original_frames_folder) if
                            os.path.isdir(os.path.join(no_fire_original_frames_folder, folder_name))]
        

        fires_folders = [folder_name for folder_name in os.listdir(fire_original_frames_folder) if
                            os.path.isdir(os.path.join(fire_original_frames_folder, folder_name))]

        random.shuffle(no_fires_folders)
        random.shuffle(fires_folders)

        # count total no fires and fires
        total_no_fires = len(no_fires_folders)
        total_fires = len(fires_folders)
        print(f"Total no fires: {total_no_fires}", f"Total fires: {total_fires}")

        # create thea list of entries for each category
        mivia_fire_entries = []
        custom_fire_entries = []
        mivia_no_fire_entries = []
        custom_no_fire_entries = []
            
        for folder in no_fires_folders:
            if "custom" in folder:
                custom_no_fire_entries.append(folder)
            else:
                mivia_no_fire_entries.append(folder)
                
        for folder in fires_folders:
            if "custom" in folder:
                custom_fire_entries.append(folder)
            else:
                mivia_fire_entries.append(folder)

        # if mode is "classic", it splits the dataset into train and validation
        if mode == "classic":
            # create the validation set folders
            os.makedirs(frames_destination_path / "VALIDATION_SET" / "0", exist_ok=True)
            os.makedirs(frames_destination_path / "VALIDATION_SET" / "1", exist_ok=True)
            
            os.makedirs(val_splitted_annotations_path / "0", exist_ok=True)
            os.makedirs(val_splitted_annotations_path / "1", exist_ok=True)

            # we have to compute the number of videos to put in the validation set from the total of the no fire videos
            # and the total of the fire videos depending on the given percentage
            no_fires_to_put_in_validation = int(total_no_fires * p[1])
            # now on the total of videos for each catedory we have to compute how many of them have to come from the mivia dataset
            mivia_no_fires_to_put_in_validation = int(no_fires_to_put_in_validation * mivia_validation_percentage)
            # if this percentage is higher than the total number of mivia available videos, we put all of them in the validation set and the
            # remaining ones will come from the custom dataset
            if mivia_no_fires_to_put_in_validation > len(mivia_no_fire_entries):
                mivia_no_fires_to_put_in_validation = len(mivia_no_fire_entries)
            custom_no_fires_to_put_in_validation = no_fires_to_put_in_validation - mivia_no_fires_to_put_in_validation

            # we do the same for the fire videos
            fires_to_put_in_validation = int(total_fires * p[1])
            mivia_fires_to_put_in_validation = int(fires_to_put_in_validation * mivia_validation_percentage)
            if mivia_fires_to_put_in_validation > len(mivia_fire_entries):
                mivia_fires_to_put_in_validation = len(mivia_fire_entries)
            custom_fires_to_put_in_validation = fires_to_put_in_validation - mivia_fires_to_put_in_validation
            
            # create the lists of the validation set and the training set
            no_fire_entries_validation = mivia_no_fire_entries[:mivia_no_fires_to_put_in_validation] + custom_no_fire_entries[:custom_no_fires_to_put_in_validation]
            fire_entries_validation = mivia_fire_entries[:mivia_fires_to_put_in_validation] + custom_fire_entries[:custom_fires_to_put_in_validation]
            no_fire_entries_training = mivia_no_fire_entries[mivia_no_fires_to_put_in_validation:] + custom_no_fire_entries[custom_no_fires_to_put_in_validation:]
            fire_entries_training = mivia_fire_entries[mivia_fires_to_put_in_validation:] + custom_fire_entries[custom_fires_to_put_in_validation:]
            
            # iterate over the validation entries and copy them in the validation set folder
            for folder in no_fire_entries_validation:
                shutil.copytree(no_fire_original_frames_folder / folder, frames_destination_path / "VALIDATION_SET" / "0" / folder)
                file_name = folder.replace("mp4", "rtf")
                shutil.copy(no_fire_original_annotations_folder / file_name, val_splitted_annotations_path / "0" / file_name)
                
            for folder in fire_entries_validation:
                shutil.copytree(fire_original_frames_folder / folder, frames_destination_path / "VALIDATION_SET" / "1" / folder)
                file_name = folder.replace("mp4", "rtf")
                shutil.copy(fire_original_annotations_folder / file_name, val_splitted_annotations_path / "1" / file_name)
                
        else:
            
            no_fire_entries_training = mivia_no_fire_entries + custom_no_fire_entries
            fire_entries_training = mivia_fire_entries + custom_fire_entries
            
        # iterate over the training entries and copy them in the training set folder
        for folder in no_fire_entries_training:
            shutil.copytree(no_fire_original_frames_folder / folder, frames_destination_path / "TRAINING_SET" / "0" / folder)
            file_name = folder.replace("mp4", "rtf")
            shutil.copy(no_fire_original_annotations_folder / file_name, train_splitted_annotations_path / "0" / file_name)
            
        for folder in fire_entries_training:
            shutil.copytree(fire_original_frames_folder / folder, frames_destination_path / "TRAINING_SET" / "1" / folder)
            file_name = folder.replace("mp4", "rtf")
            shutil.copy(fire_original_annotations_folder / file_name, train_splitted_annotations_path / "1" / file_name)

In [None]:
from dataset_management.dataset_management import *
split_dataset(train_original_frames_path, train_original_annotations_path, splitted_frames_path, splitted_annotations_path)

In [None]:
# count the number of elements in splitted_frames/training_set/0, splitted_frames/training_set/1, splitted_frames/validation_set/0, splitted_frames/validation_set/1
train_0 = [folder for folder in os.listdir(splitted_frames_path / "TRAINING_SET" / "0") if os.path.isdir(splitted_frames_path / "TRAINING_SET" / "0" / folder)]
train_1 = [folder for folder in os.listdir(splitted_frames_path / "TRAINING_SET" / "1") if os.path.isdir(splitted_frames_path / "TRAINING_SET" / "1" / folder)]
val_0 = [folder for folder in os.listdir(splitted_frames_path / "VALIDATION_SET" / "0") if os.path.isdir(splitted_frames_path / "VALIDATION_SET" / "0" / folder)]
val_1 = [folder for folder in os.listdir(splitted_frames_path / "VALIDATION_SET" / "1") if os.path.isdir(splitted_frames_path / "VALIDATION_SET" / "1" / folder)]

mivia_train_0 = []
custom_train_0 = []
for folder in train_0:
    if "custom" in folder:
        custom_train_0.append(folder)
    else:
        mivia_train_0.append(folder)
        
mivia_train_1 = []
custom_train_1 = []
for folder in train_1:
    if "custom" in folder:
        custom_train_1.append(folder)
    else:
        mivia_train_1.append(folder)

mivia_val_0 = []
custom_val_0 = []
for folder in val_0:
    if "custom" in folder:
        custom_val_0.append(folder)
    else:
        mivia_val_0.append(folder)

mivia_val_1 = []
custom_val_1 = []
for folder in val_1:
    if "custom" in folder:
        custom_val_1.append(folder)
    else:
        mivia_val_1.append(folder)

print("mivia_train_0: ", len(mivia_train_0))
print("custom_train_0: ", len(custom_train_0))
print("mivia_train_1: ", len(mivia_train_1))
print("custom_train_1: ", len(custom_train_1))
print("mivia_val_0: ", len(mivia_val_0))
print("custom_val_0: ", len(custom_val_0))
print("mivia_val_1: ", len(mivia_val_1))
print("custom_val_1: ", len(custom_val_1))

for folder in mivia_train_0:
    if folder in custom_train_0:
        print("Duplicate folder:", folder)
        
for folder in mivia_train_1:
    if folder in custom_train_1:
        print("Duplicate folder:", folder)
        
for folder in mivia_val_0:
    if folder in custom_val_0:
        print("Duplicate folder:", folder)

for folder in mivia_val_1:
    if folder in custom_val_1:
        print("Duplicate folder:", folder)