### Générateur des phrase

In [14]:
import pandas as pd
from pathlib import Path

lesion_info_csv_dir = Path.cwd().parent.parent / "data" / "13_lesion_info_per_label_csv"
if not lesion_info_csv_dir.exists():
    raise FileNotFoundError(f"Le dossier {lesion_info_csv_dir} n'existe pas. Veuillez d'abord exécuter le 2e script de intensity_extration_per_label.ipynb")

contrast_csv_dir = Path.cwd().parent.parent / "data" / "12_lesion_contrast_results"
if not contrast_csv_dir.exists():
    raise FileNotFoundError(f"Le dossier {contrast_csv_dir} n'existe pas. Veuillez d'abord exécuter le script de contrast_calculation.ipynb")

template_txt_dir_path = Path.cwd().parent.parent / "data" / "16_template_txt"
if not template_txt_dir_path.exists():
    template_txt_dir_path.mkdir(parents=True, exist_ok=True)

# 1. A CHOISIR : Paramètres de filtrage
patient_id = "P1"
timepoint = "T1"
modality = "FLAIR"

In [4]:
%run "./clinical_parser.ipynb"


id = P1,
sex = F,
timepoint_number = 3,
timepoint_infos = {'T1': <patient_info.TimepointInfo object at 0x000001F097365F90>, 'T2': <patient_info.TimepointInfo object at 0x000001F0948579D0>, 'T3': <patient_info.TimepointInfo object at 0x000001F097367250>},


id = P2,
sex = M,
timepoint_number = 4,
timepoint_infos = {'T1': <patient_info.TimepointInfo object at 0x000001F097366F50>, 'T2': <patient_info.TimepointInfo object at 0x000001F097367510>, 'T3': <patient_info.TimepointInfo object at 0x000001F097366E90>, 'T4': <patient_info.TimepointInfo object at 0x000001F0973DF810>},


id = P3,
sex = F,
timepoint_number = 4,
timepoint_infos = {'T1': <patient_info.TimepointInfo object at 0x000001F0973DF950>, 'T2': <patient_info.TimepointInfo object at 0x000001F0973DF990>, 'T3': <patient_info.TimepointInfo object at 0x000001F0973DF9D0>, 'T4': <patient_info.TimepointInfo object at 0x000001F0973DFA10>},


timepoint = T1,
age = 28.09,
ms_type = SMRR,
edss = 3.5,
lesion_number = 18,
lesion_volume = 20674

In [None]:
path_str = f"../../data/13_lesion_info_per_label_csv/{patient_id}_{timepoint}_intensity_info.csv"
contrast_csv = f"../../data/12_lesion_contrast_results/{patient_id}_{timepoint}_lesion_contrast.csv"

csv_path = Path(path_str)
contrast_path = Path(contrast_csv)
no_timepoint = False

if not csv_path.exists():
    path_str = f"../../data/13_lesion_info_per_label_csv/{patient_id}_intensity_info.csv"
    contrast_csv = f"../../data/12_lesion_contrast_results/{patient_id}_lesion_contrast.csv"
    csv_path = Path(path_str)
    contrast_path = Path(contrast_csv)
    no_timepoint = True
    timepoint = "T1"

if not csv_path.exists():
    raise FileNotFoundError(
        f"Le fichier CSV pour le patient {patient_id} au timepoint {timepoint} n'a pas été trouvé."
    )

df = pd.read_csv(csv_path)

contrast_df = None
if contrast_path.exists():
    contrast_df = pd.read_csv(contrast_path)

# =========================================================
# 2. Filtrage des données
# =========================================================

if not no_timepoint:
    mask = (
        (df["patient"] == patient_id) &
        (df["timepoint"] == timepoint) &
        (df["modality"] == modality)
    )
else:
    mask = (
        (df["patient"] == patient_id) &
        (df["modality"] == modality)
    )

filtered_df = df[mask].copy()

if filtered_df.empty:
    print("Aucune donnée trouvée.")
    raise SystemExit

# =========================================================
# 3. Infos patient
# =========================================================

current_patient = None
for patient in patients:
    if patient.id == patient_id:
        current_patient = patient
        break

if current_patient is None:
    raise ValueError(f"Patient {patient_id} non trouvé.")

sex = "male" if current_patient.sex == "M" else "female" if current_patient.sex == "F" else "unknown"
timepoint_info = current_patient.timepoint_infos.get(timepoint)
age = int(timepoint_info.age) if timepoint_info else "unknown"

prop_cols = [c for c in df.columns if c.endswith("_prop")]

for _, lesion in filtered_df.iterrows():

    lesion_id = lesion["lesion_id"]

    # ---- Intensité & volume
    volume = lesion["lesion_volume_mm3"]
    mean_int = lesion["lesion_mean"]
    min_int = lesion["lesion_min"]
    max_int = lesion["lesion_max"]

    # ---- Contraste
    contrast_val = "unknown"
    if contrast_df is not None:
        contrast_match = contrast_df[
            (contrast_df["patient"] == patient_id) &
            (contrast_df["timepoint"] == timepoint) &
            (contrast_df["lesion_id"] == lesion_id)
        ]
        if not contrast_match.empty:
            contrast_val = contrast_match.iloc[0]["contrast_lesion_neighborhood"]

    # ---- Localisation anatomique
    prop_values = lesion[prop_cols].astype(float)
    prop_values = prop_values[prop_values > 0].sort_values(ascending=False)

    zones_text = []
    for zone_col, val in prop_values.items():
        zone_name = zone_col.replace("_prop", "").replace("_", " ")
        zones_text.append(f"{zone_name} ({val*100:.1f}%)")

    zones_str = ", ".join(zones_text)

    # ---- Texte du template (UNE lésion)
    lesion_text = (
        f"This {modality} scan corresponds to a {age}-year-old {sex} subject "
        f"{patient_id} at timepoint {timepoint}.\n"
        f"Lesion ID {lesion_id} has a volume of {volume:.1f} mm³.\n"
        f"The lesion shows a mean intensity of {mean_int:.1f}, "
        f"with values ranging from {min_int:.1f} to {max_int:.1f}, "
        f"and a lesion-to-neighborhood contrast of {contrast_val:.2f}.\n"
        f"It is primarily located in {zones_str}."
    )

    # ---- Nom et sauvegarde du fichier
    filename = f"{patient_id}_{timepoint}_L{lesion_id}_{modality}_3DDescription.txt"
    output_path = template_txt_dir_path / filename

    with open(output_path, "w", encoding="utf-8") as f:
        f.write(lesion_text)

Processing P10 - T1


ValueError: Unknown format code 'f' for object of type 'str'