# Configuration setup
All global notebook variables will be placed here for readability and maintainability.

In [None]:
from pathlib import Path
import os
import shutil
import gdown
import random
from project_paths import *
import cv2, argparse, glob, PIL, tqdm, sys

# Definition of global variables for the notebook
raw_dataset_links = {
    "mivia": (
        "https://drive.google.com/file/d/1tEz2wVQjPp1MjVHZLa-Z3uyVBnwljgGF/view?usp=sharing",
        "https://drive.google.com/file/d/123AcAQCldRNE6iKpXuCaVtsaR3uHIOeN/view?usp=sharing"
    ),
    "custom": (
        "https://drive.google.com/file/d/1eTDG_SbHkCo0OeVwRKugQ2vDV2csDx6q/view?usp=sharing",
        "https://drive.google.com/file/d/1UjWkvzzezXNOkncas4Q-kP9X9VU2D0OE/view?usp=sharing"
    ),

    "mivia_custom_04_07": (   # Dataset custom senza video duplicati e con numerazione corretta.
                        # Fino ad adesso ne è stato testato solo l'effettivo scaricamento.
                        # Si testerà l'estrazione dei frames a breve.
        "https://drive.google.com/file/d/13VJ-MGfrS_cvOwLvmiDlqmawGV0ebZGC/view?usp=sharing",
        "https://drive.google.com/file/d/13TmHShb5tNa9doujghCNoFlB5pAJsJWz/view?usp=sharing"
    ),
    "mivia_custom_09_07": (
        "https://drive.google.com/file/d/13n9lbNyJchDk5olfHzU6Oy_WQR7ZbPFX/view?usp=drive_link",
        "https://drive.google.com/file/d/13lJY76imebgbovVc1asDm6GvuNkaF8u8/view?usp=drive_link"
    ),
    "mivia_custom_10_07": (
        "https://drive.google.com/file/d/14-z_UsyDaPyGCTdXrXS2YDkGIfDfGQNJ/view?usp=drive_link",
        "https://drive.google.com/file/d/13xXyOqp2ET_ySTzd7mP-qk8hs5x3y96g/view?usp=drive_link"
    ),
}

SELECTED_DATASET = "mivia_custom_10_07"
RELOAD_DATASET = False # If True, the dataset is reloaded from the links above, otherwise it is loaded from the local folder



video_link, labels_link = raw_dataset_links[SELECTED_DATASET]

In [None]:
def download_google_file(shader_url, output_name):
  id_url = "https://drive.google.com/uc?id=" + shader_url.split("/")[5]
  gdown.download(id_url, output_name)


def reload_data_folder_videos():
    videos_name_file = "VIDEOS.zip"
    shutil.rmtree(videos_path, ignore_errors=True)  # delete the folder
    os.makedirs(videos_path, exist_ok=True)  # create a new one with the same name
    download_google_file(video_link, videos_name_file)
    zip_videos_path = videos_path / videos_name_file
    shutil.move(videos_name_file, zip_videos_path)
    shutil.unpack_archive(zip_videos_path, videos_path)
    os.remove(zip_videos_path)
    if (data_folder_path / "__MACOSX").exists():
        shutil.rmtree(data_folder_path / "__MACOSX")


def reload_data_folder_annotations():
    labels_name_file = "GT.zip"
    shutil.rmtree(train_original_annotations_path, ignore_errors=True)  # delete the folder
    download_google_file(labels_link, labels_name_file)
    zip_labels_path = data_folder_path / labels_name_file
    shutil.move(labels_name_file, zip_labels_path)
    shutil.unpack_archive(zip_labels_path, data_folder_path)
    os.remove(zip_labels_path)    
    os.makedirs(train_original_annotations_path)
    old_no_fire_labels_folder_path = data_folder_path /"GT_TRAINING_SET_CL0"
    old_fire_labels_folder_path = data_folder_path / "GT_TRAINING_SET_CL1"
    shutil.move(old_no_fire_labels_folder_path, train_original_annotations_path)
    shutil.move(old_fire_labels_folder_path, train_original_annotations_path)
    os.rename(train_original_annotations_path / "GT_TRAINING_SET_CL0", train_original_annotations_path / "0")
    os.rename(train_original_annotations_path / "GT_TRAINING_SET_CL1", train_original_annotations_path / "1")
    if (data_folder_path / "__MACOSX").exists():
        shutil.rmtree(data_folder_path / "__MACOSX")

def reload_data_folder():
    
    if data_folder_path.exists():
        shutil.rmtree(data_folder_path)  # delete the folder
    
    reload_data_folder_videos()
    reload_data_folder_annotations()
    
    

def check_data_folder(reload = False, size_limit=10):
    """
    Checks if a folder exists and if its size (including subfolders) is less than a given limit.
    If both conditions are met, the folder is deleted and recreated.

    :param folder_path: path of the folder to check
    :param size_limit: size limit in MB
    """

    if reload:
        reload_data_folder()
        return

    if data_folder_path.exists():
        total_size = sum(f.stat().st_size for f in data_folder_path.glob('**/*') if f.is_file()) / (1024 * 1024)
        if total_size < size_limit:
            reload_data_folder()
    else:
        reload_data_folder()


In [None]:
import platform  

def get_device():
    """
    Function to determine the device type based on the node name.
    It uses a dictionary to map node names to device types.

    :return: device type as a string
    :raises Exception: if node name is not found in the device_map dictionary
    """
    device_map = {
        "PC-Cristian": "cuda",
        "Dell-G5-15-Alexios": "cuda",
        "MacBook-Pro-di-Cristian.local": "mps",
        "MacBookProDiGrazia": "cpu",
        "DESKTOP-RQVK8SI":"cuda"
    }

    try:
        return device_map[platform.uname().node]
    except KeyError:
        raise Exception("Node name not found. Please add your node name and its corresponding device to the dictionary.")

device = get_device()

# Download and unzip

In [None]:
check_data_folder(RELOAD_DATASET)

try:
    shutil.rmtree("ONFIRE2023_Example_Code")
except FileNotFoundError:
    pass

download_google_file("https://drive.google.com/file/d/1rXMCtpus2i2UDdSBD9RwWAxnT0wrrXOk/view?usp=sharing", "test_code.zip")
shutil.unpack_archive("test_code.zip", ".")
os.remove("test_code.zip")

# Extract frames from video files

Riconduciamo il problema da un dominio di video ad un dominio di immagini andando a selezionare i frame di ogni video e disponendoli in una sottocartella. È importante salvare i frame con qualità alta, altrimenti distorciamo l'informazione data al classificatore.

We use ffmpeg to faster the frame extraction

In [None]:
os.makedirs(original_frames_path, exist_ok=True)

file_list = [path for path in Path(videos_path).rglob("*") if path.is_file()]
for video in tqdm.tqdm(file_list):
  output_video_frames_folder = Path(os.path.join(original_frames_path, video.relative_to(videos_path)))
  if output_video_frames_folder.is_dir():
    continue
  
  os.makedirs(output_video_frames_folder, exist_ok=True)
  os.system("ffmpeg -i {} {}/{}.jpg".format(video, output_video_frames_folder, "%05d"))
  shutil.rmtree(output_video_frames_folder / "__MACOSX", ignore_errors=True)

# Split dataset

In [None]:
def split_dataset(source_path, destination_path, mode="classic", p=[0.8,0.1,0.1]):
    """
    If mode is "classic", it splits the dataset into train, validation and test sets.
    If mode is "k-fold", it splits only the test. In this case other videos remain in the training set folder.
    """
    no_fire_original_frames_folder = source_path /'0'
    fire_original_frames_folder = source_path / '1'

    no_fires_folders = [folder_name for folder_name in os.listdir(no_fire_original_frames_folder) if
                        os.path.isdir(os.path.join(no_fire_original_frames_folder, folder_name))]
    

    fires_folders = [folder_name for folder_name in os.listdir(fire_original_frames_folder) if
                        os.path.isdir(os.path.join(fire_original_frames_folder, folder_name))]

    first_split_perctentage = p[0] + p[1]

    random.shuffle(no_fires_folders)
    random.shuffle(fires_folders)

    total_no_fires = len(no_fires_folders)
    total_fires = len(fires_folders)

    first_split_no_fires = no_fires_folders[:int(total_no_fires * first_split_perctentage)]
    first_split_fires = fires_folders[:int(total_fires * first_split_perctentage)]

    remaining_no_fires = no_fires_folders[int(total_no_fires * first_split_perctentage):]
    remaining_fires = fires_folders[int(total_fires * first_split_perctentage):]

    os.makedirs(destination_path / "TRAINING_SET" / "0", exist_ok=True)
    os.makedirs(destination_path / "TRAINING_SET" / "1", exist_ok=True)

    os.makedirs(destination_path / "TEST_SET" / "0", exist_ok=True)
    os.makedirs(destination_path / "TEST_SET" / "1", exist_ok=True)

    os.makedirs(train_splitted_annotations_path / "0", exist_ok=True)
    os.makedirs(train_splitted_annotations_path / "1", exist_ok=True)

    os.makedirs(test_splitted_annotations_path / "0", exist_ok=True)
    os.makedirs(test_splitted_annotations_path / "1", exist_ok=True)

    for folder in first_split_no_fires:
        shutil.copytree(no_fire_original_frames_folder / folder, destination_path / "TRAINING_SET" / "0" / folder)
        rtf_file = folder.split(".")[0] + ".rtf"
        shutil.copyfile(train_original_annotations_path / "0"/ rtf_file, train_splitted_annotations_path / "0" / rtf_file)
    
    for folder in first_split_fires:
        shutil.copytree(fire_original_frames_folder / folder, destination_path / "TRAINING_SET" / "1" / folder)
        rtf_file = folder.split(".")[0] + ".rtf"
        shutil.copyfile(train_original_annotations_path / "1"/ rtf_file, train_splitted_annotations_path / "1" / rtf_file)
    
    for folder in remaining_no_fires:
        shutil.copytree(no_fire_original_frames_folder / folder, destination_path / "TEST_SET" / "0" / folder)
        rtf_file = folder.split(".")[0] + ".rtf"
        shutil.copyfile(train_original_annotations_path / "0"/ rtf_file, test_splitted_annotations_path / "0" / rtf_file)

    for folder in remaining_fires:
        shutil.copytree(fire_original_frames_folder / folder, destination_path / "TEST_SET" / "1" / folder)
        rtf_file = folder.split(".")[0] + ".rtf"
        shutil.copyfile(train_original_annotations_path / "1"/ rtf_file, test_splitted_annotations_path / "1" / rtf_file)

    if mode == "classic":
        os.makedirs(destination_path / "VALIDATION_SET" / "0", exist_ok=True)
        os.makedirs(destination_path / "VALIDATION_SET" / "1", exist_ok=True)

        os.makedirs(val_splitted_annotations_path / "0", exist_ok=True)
        os.makedirs(val_splitted_annotations_path / "1", exist_ok=True)
        
        val_num_no_fires = int(total_no_fires * p[1])
        val_num_fires = int(total_fires * p[1])

        second_split_no_fires = first_split_no_fires[:val_num_no_fires]
        second_split_fires = first_split_fires[:val_num_fires]
        
        for folder in second_split_no_fires:
            shutil.move(destination_path / "TRAINING_SET" / "0" / folder, destination_path / "VALIDATION_SET" / "0" / folder)
            rtf_file = folder.split(".")[0] + ".rtf"
            shutil.copyfile(train_splitted_annotations_path / "0" / rtf_file, val_splitted_annotations_path / "0" / rtf_file)

        for folder in second_split_fires:
            shutil.move(destination_path / "TRAINING_SET" / "1" / folder, destination_path / "VALIDATION_SET" / "1" / folder)
            rtf_file = folder.split(".")[0] + ".rtf"
            shutil.copyfile(train_splitted_annotations_path / "1" / rtf_file, val_splitted_annotations_path / "1" / rtf_file)

Convertiamo il numero di frame contenuto in GT: passimao da frame (1/1 con i secondi) al numero di frame veritiero che tiene conto degli fps.

In [None]:
import re, math
def extract_text_from_rtf(file_path):
    with open(file_path, 'r') as file:
        rtf_content = file.read()

    if rtf_content.__contains__("\\rtf1"):
        # Trova il testo tra '\f0\fs24 \cf0 ' e '}'
        match = re.search(r'\\f0\\fs24 \\cf0 (.*?)\}', rtf_content)

        if match:
            extracted_text = match.group(1)
            return extracted_text.strip()
        else:
            return None
    else: 
        return rtf_content


def process_videos(video_folder: Path, gt_folder: Path):
    video_list = os.listdir(video_folder)
    for video in video_list:
        # Get frame rate
        cap = cv2.VideoCapture(str(video_folder / video))
        frame_rate = cap.get(cv2.CAP_PROP_FPS)
        cap.release()

        # Get video annotation
        label = extract_text_from_rtf(os.path.join(gt_folder, video.split(".")[0] + ".rtf"))

        # Get frame number
        frame_number = int(label.split(",")[0])

        # Get other info
        other_info = label.split(",")[1:] # es. ['Smoke', 'Fire']

        # Replace the content of the file rtf
        with open(os.path.join(gt_folder, video.split(".")[0] + ".rtf"), 'w') as file:
            file.write(str(math.ceil(frame_number*frame_rate)) + "," + ",".join(other_info))


In [None]:
# from dataset_management.adjust_frame import *
process_videos( train_videos_path / "1", train_original_annotations_path / "1")
split_dataset(train_original_frames_path, splitted_frames_path, mode="classic", p=[0.8,0.1,0.1])

In [None]:
# Piccolo script che copia i video di test nella cartella test_data/TEST_VIDEOS, in maniera tale da poterli utilizzare per il test finale

src = test_splitted_frames_path
dst = test_data_videos_path
main_video_folder = videos_path / "TRAINING_SET"

for class_folder in os.listdir(src):
    for test_video in os.listdir(src / class_folder):
        shutil.copy(main_video_folder / class_folder / test_video, dst / ("{}_".format(class_folder) + test_video ))