# Launch of Deepethogram - Infer

## 1. Copy movies
This script will repare all videos (corrupted for one reason I ignore), and will then add them using deepethogram fonction (it calculate std and mean whhile adding the videos)

In [None]:
import os
import subprocess
from deepethogram import projects
import re
import shlex
import pandas as pd

def repair_video_with_ffmpeg(video_path, output_dir):
    filename = os.path.basename(video_path)
    name_no_ext = os.path.splitext(filename)[0]
    safe_name = name_no_ext.replace(" ", "")
    repaired_filename = f"{safe_name}.mp4"
    repaired_path = os.path.join(output_dir, repaired_filename)

    if os.path.exists(repaired_path):
        print(f"[✓] Already repaired: {repaired_filename}")
        return repaired_path
    try:
        cmd = f"""ffmpeg -y -err_detect ignore_err -i "{video_path}" \
        -vf "fps=30" -c:v libx264 -preset veryfast -crf 24 \
        -c:a aac -strict experimental "{repaired_path}" """
        
        subprocess.run(shlex.split(cmd), check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        return repaired_path
    except subprocess.CalledProcessError as e:
        print(f"[✗] Error repairing {filename}: {e}")
        return None


def ajouter_tous_videos_au_projet(project_config_path, root_video_dir, mode='copy', extensions=None, csv_filter_path=None):
    if extensions is None:
        extensions = ['.mp4', '.avi', '.mov', '.mkv', '.wmv']

    # Cargar proyecto
    project = projects.load_config(project_config_path)
    execution_dir = os.getcwd()

    # Cargar CSV con nombres de videos si se proporciona
    allowed_video_names = None
    if csv_filter_path is not None and os.path.exists(csv_filter_path):
        df_filter = pd.read_csv(csv_filter_path)
        # Usamos el nombre sin extensión para comparar
        allowed_video_names = set(os.path.splitext(v)[0] for v in df_filter['video_name'].values)
        print(f"[🔎] Usando filtro de CSV con {len(allowed_video_names)} nombres.")

    # Recopilar los paths válidos
    videos_paths = []
    pattern = re.compile(r".*_(\d{2})-(\d{2})-(\d{2})") 

    for root, dirs, files in os.walk(root_video_dir):
        # Saltar carpetas que contienen "aborted" o "post" en su ruta
        if any(x in root.lower() for x in ["aborted", "post"]):
            continue
        for file in files:
            match = pattern.match(file)
            if match and not file.lower().startswith("cont") and not file.lower().startswith("lnb"):
                name_no_ext = os.path.splitext(file)[0].replace(" ", "")
                hour = int(match.group(1))
                min =  int(match.group(2))
                if (hour < 7 or (hour == 7 and min < 29)) or hour >= 20:
                    if os.path.splitext(file)[1].lower() in extensions:
                        if allowed_video_names is None or name_no_ext in allowed_video_names:
                            full_path = os.path.join(root, file)
                            videos_paths.append(full_path)

    print(f"🔍 Found {len(videos_paths)} video(s) to check and add.")
    
    cont = 0
    for video_path in videos_paths:
        repaired_path = repair_video_with_ffmpeg(video_path, output_dir=execution_dir)
        if repaired_path:
            cont += 1
            try:
                new_path = projects.add_video_to_project(project, repaired_path, mode=mode)
                print(f"[+] Added: {repaired_path} → {new_path} . {cont} / {len(videos_paths)}")
                os.remove(repaired_path)
            except Exception as e:
                os.remove(repaired_path)
        else:
            print(f"[!] Skipped: {video_path} (repair failed)")

# === CONFIGURACIÓN ===
config_path = r"D:\LBN\Maternal_auto_classification_manualVal_deepethogram\project_config.yaml"
videos_dir = r"Z:\Marion\2. Tests - Manuels - Data\3. LBN\3. Rawdata\1. VEAVE_LBN-CONT"
csv_path = r"D:\LBN\selected_videos_balanced_filtered.csv"  # CSV con columna 'video_name'

ajouter_tous_videos_au_projet(config_path, videos_dir, mode='copy', csv_filter_path=csv_path)


## 2. Infer videos
We'll then pass our model through the new data to predict the behaviours found in each frame

In [None]:
import subprocess

python_path = r"C:\Users\TeamGranon\anaconda3\envs\deg_solo\python.exe"
script_path = r"D:\LBN\Maternal_auto_classification_TS7_deepethogram\infering.py"

subprocess.run([python_path, script_path], check=True)

## 3. Transform output
We'll finally transform the .h5 output file on a .csv file

In [None]:
from deepethogram.postprocessing import get_postprocessor_from_cfg
import os
import h5py
import pandas as pd
from deepethogram import projects

def convertir_outputs_a_csv_con_postprocess(data_dir, cfg):
    config = projects.load_config(cfg)
    output_dir = r"D:\LBN\Maternal_auto_classification_TS7_deepethogram\Maternal_behaviour_results"
    os.makedirs(output_dir, exist_ok=True)

    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith('outputs.h5'):
                outputs_path = os.path.join(root, file)
                try:
                    with h5py.File(outputs_path, 'r') as f:
                        group = f['resnet18']
                        probabilities = group['P'][:]
                        thresholds = group['thresholds'][:]
                        class_names = [x.decode('utf-8') for x in group['class_names'][:]]

                    postprocessor = get_postprocessor_from_cfg(config, thresholds)
                    estimated_labels = postprocessor(probabilities)
                    df = pd.DataFrame(estimated_labels, columns=class_names)

                    # Nouveau nom de fichier avec même base mais dans le dossier output
                    base_name = os.path.splitext(os.path.basename(outputs_path))[0]
                    prediction_fname = os.path.join(output_dir, base_name + '_labels_predictions.csv')
                    df.to_csv(prediction_fname, index=False)
                    print(f"[✓] Guardado postprocesado: {prediction_fname}")

                except Exception as e:
                    print(f"[✗] Error con {outputs_path}: {e}")

# Appel
directorio_data = r"D:\LBN\Maternal_auto_classification_TS7_deepethogram\DATA"
cfg = r"D:\LBN\Maternal_auto_classification_TS7_deepethogram\project_config.yaml"
convertir_outputs_a_csv_con_postprocess(directorio_data, cfg)
