### Générateur des phrase en 3D 

In [None]:
import pandas as pd
from pathlib import Path

lesion_info_csv_dir = Path.cwd().parent.parent / "data" / "13_lesion_info_per_label_csv"
if not lesion_info_csv_dir.exists():
    raise FileNotFoundError(f"Le dossier {lesion_info_csv_dir} n'existe pas. Veuillez d'abord exécuter le 2e script de intensity_extration_per_label.ipynb")

contrast_csv_dir = Path.cwd().parent.parent / "data" / "12_lesion_contrast_results"
if not contrast_csv_dir.exists():
    raise FileNotFoundError(f"Le dossier {contrast_csv_dir} n'existe pas. Veuillez d'abord exécuter le script de contrast_calculation.ipynb")

template_txt_dir_path = Path.cwd().parent.parent / "data" / "16_3D_template_txt"
if not template_txt_dir_path.exists():
    template_txt_dir_path.mkdir(parents=True, exist_ok=True)

In [26]:
%run "./clinical_parser.ipynb"


id = P1,
sex = F,
timepoint_number = 3,
timepoint_infos = {'T1': <patient_info.TimepointInfo object at 0x000001F0975D2890>, 'T2': <patient_info.TimepointInfo object at 0x000001F095BA2910>, 'T3': <patient_info.TimepointInfo object at 0x000001F0974EE990>},


id = P2,
sex = M,
timepoint_number = 4,
timepoint_infos = {'T1': <patient_info.TimepointInfo object at 0x000001F0974EF0D0>, 'T2': <patient_info.TimepointInfo object at 0x000001F0974EC890>, 'T3': <patient_info.TimepointInfo object at 0x000001F0974EC990>, 'T4': <patient_info.TimepointInfo object at 0x000001F0974EEF10>},


id = P3,
sex = F,
timepoint_number = 4,
timepoint_infos = {'T1': <patient_info.TimepointInfo object at 0x000001F0974EC4D0>, 'T2': <patient_info.TimepointInfo object at 0x000001F0974ED8D0>, 'T3': <patient_info.TimepointInfo object at 0x000001F0974EF350>, 'T4': <patient_info.TimepointInfo object at 0x000001F0974EE950>},


timepoint = T1,
age = 28.09,
ms_type = SMRR,
edss = 3.5,
lesion_number = 18,
lesion_volume = 20674

In [29]:
for file_path in lesion_info_csv_dir.iterdir():
    if not file_path.name.endswith("_intensity_info.csv"):
        continue

    # Récupérer patient_id et timepoint depuis le nom du fichier
    arg = file_path.stem.split("_")
    if len(arg) == 4:
        patient_id, timepoint = arg[0], arg[1]
    elif len(arg) == 3:
        patient_id, timepoint = arg[0], "T1"
    else:
        print(f"[SKIP] Nom de fichier non conforme : {file_path.name}")
        continue

    df = pd.read_csv(file_path)

    # Boucle sur toutes les modalités présentes dans le CSV
    for modality in df["modality"].unique():

        # Filtrer les données pour patient / timepoint / modality
        mask = (
            (df["patient"] == patient_id) &
            (df["timepoint"] == timepoint) &
            (df["modality"] == modality)
        )
        filtered_df = df[mask].copy()
        if filtered_df.empty:
            continue

        # Charger le CSV de contraste spécifique à la modalité
        contrast_csv_path = contrast_csv_dir / f"{patient_id}_{timepoint}_{modality}_lesion_contrast.csv"
        if contrast_csv_path.exists():
            contrast_df = pd.read_csv(contrast_csv_path)
        else:
            contrast_df = None

        # Récupérer les infos patient
        current_patient = None
        for patient in patients:
            if patient.id == patient_id:
                current_patient = patient
                break
        if current_patient is None:
            raise ValueError(f"Patient {patient_id} non trouvé.")

        sex = "male" if current_patient.sex == "M" else "female" if current_patient.sex == "F" else "unknown"
        timepoint_info = current_patient.timepoint_infos.get(timepoint)
        age = int(timepoint_info.age) if timepoint_info else "unknown"

        prop_cols = [c for c in df.columns if c.endswith("_prop")]

        # Boucle sur chaque lésion
        for _, lesion in filtered_df.iterrows():
            lesion_id = lesion["lesion_id"]

            # Intensité et volume
            volume = lesion["lesion_volume_mm3"]
            mean_int = lesion["lesion_mean"]
            min_int = lesion["lesion_min"]
            max_int = lesion["lesion_max"]

            # Contraste
            contrast_val = None
            if contrast_df is not None:
                contrast_match = contrast_df[
                    (contrast_df["patient"] == patient_id) &
                    (contrast_df["timepoint"] == timepoint) &
                    (contrast_df["lesion_id"] == lesion_id)
                ]
                if not contrast_match.empty:
                    contrast_val = contrast_match.iloc[0]["contrast_lesion_neighborhood"]

            # Préparer texte du contraste
            contrast_str = f"{float(contrast_val):.2f}" if contrast_val is not None else "unknown"

            # Localisation anatomique
            prop_values = lesion[prop_cols].astype(float)
            prop_values = prop_values[prop_values > 0].sort_values(ascending=False)
            zones_text = []
            for zone_col, val in prop_values.items():
                zone_name = zone_col.replace("_prop", "").replace("_", " ")
                zones_text.append(f"{zone_name} ({val*100:.1f}%)")
            zones_str = ", ".join(zones_text)

            # Texte final de la lésion
            lesion_text = (
                f"This {modality} scan corresponds to a {age}-year-old {sex} subject {patient_id} at timepoint {timepoint}. "
                f"Lesion ID {lesion_id} has a volume of {volume:.1f} mm³. "
                f"The lesion shows a mean intensity of {mean_int:.1f}, "
                f"with values ranging from {min_int:.1f} to {max_int:.1f}, "
                f"and a lesion-to-neighborhood contrast of {contrast_str}. "
                f"It is primarily located in {zones_str}."
            )

            # Nom et sauvegarde du fichier
            filename = f"{patient_id}_{timepoint}_L{lesion_id}_{modality}_3DDescription.txt"
            output_path = template_txt_dir_path / filename
            with open(output_path, "w", encoding="utf-8") as f:
                f.write(lesion_text)