### Générateur des phrase

In [None]:
import pandas as pd
from pathlib import Path

lesion_info_csv_dir = Path.cwd().parent.parent / "data" / "13_lesion_info_per_label_csv"
if not lesion_info_csv_dir.exists():
    raise FileNotFoundError(f"Le dossier {lesion_info_csv_dir} n'existe pas. Veuillez d'abord exécuter le 2e script de intensity_extration_per_label.ipynb")

# 1. A CHOISIR : Paramètres de filtrage
patient_id = "P1"
timepoint = "T1"
modality = "FLAIR"

In [46]:
%run "./clinical_parser.ipynb"


id = P1,
sex = F,
timepoint_number = 3,
timepoint_infos = {'T1': <patient_info.TimepointInfo object at 0x000001A1A4CE5990>, 'T2': <patient_info.TimepointInfo object at 0x000001A1A4CE57D0>, 'T3': <patient_info.TimepointInfo object at 0x000001A1A4CE53D0>},


id = P2,
sex = M,
timepoint_number = 4,
timepoint_infos = {'T1': <patient_info.TimepointInfo object at 0x000001A1A369D1D0>, 'T2': <patient_info.TimepointInfo object at 0x000001A1A4CB2690>, 'T3': <patient_info.TimepointInfo object at 0x000001A1A4CB0FD0>, 'T4': <patient_info.TimepointInfo object at 0x000001A1A4CB2150>},


id = P3,
sex = F,
timepoint_number = 4,
timepoint_infos = {'T1': <patient_info.TimepointInfo object at 0x000001A1A4CB3D90>, 'T2': <patient_info.TimepointInfo object at 0x000001A1A4CB2D50>, 'T3': <patient_info.TimepointInfo object at 0x000001A1A4CB23D0>, 'T4': <patient_info.TimepointInfo object at 0x000001A1A4CB1290>},


timepoint = T1,
age = 28.09,
ms_type = SMRR,
edss = 3.5,
lesion_number = 18,
lesion_volume = 20674

In [60]:
# 2. Chargement des données
path_str = f"../../data/13_lesion_info_per_label_csv/{patient_id}_{timepoint}_intensity_info.csv"
csv_path = Path(path_str)
no_timepoint = False

if not csv_path.exists():
    path_str = f"../../data/13_lesion_info_per_label_csv/{patient_id}_intensity_info.csv"
    csv_path = Path(path_str)
    no_timepoint = True
    timepoint = "T1"  # Valeur par défaut si le timepoint n'est pas dans le nom du fichier
if not csv_path.exists():
    raise FileNotFoundError(f"Le fichier CSV pour le patient {patient_id} au timepoint {timepoint} n'a pas été trouvé.")

df = pd.read_csv(csv_path)

# 3. Filtrage des données
if not no_timepoint:
    mask = (df['patient'] == patient_id) & (df['timepoint'] == timepoint) & (df['modality'] == modality)
    filtered_df = df[mask].copy()
else:
    mask = (df['patient'] == patient_id) & (df['modality'] == modality)
    filtered_df = df[mask].copy()

if not filtered_df.empty:
    # Identification de la lésion principale (max volume)
    main_lesion = filtered_df.loc[filtered_df['lesion_volume_mm3'].idxmax()]
    
    # Liste toutes les colonnes de localisation (finissant par _prop)
    prop_cols = [c for c in df.columns if c.endswith('_prop')]
    
    # Convertit en float pour éviter les erreurs
    prop_values = main_lesion[prop_cols].astype(float)
    
    # Récupère les 3 colonnes avec les plus grandes valeurs
    top3 = prop_values.nlargest(3)
    
    # Colonnes et valeurs
    top_zone_col, top_zone_val = top3.index[0], top3.iloc[0]
    second_zone_col, second_zone_val = top3.index[1], top3.iloc[1]
    third_zone_col, third_zone_val = top3.index[2], top3.iloc[2]
    
    # Nettoyage du nom de la zone pour affichage (ex: 'Zone_77_prop' -> 'Zone 77')
    top_zone_display = top_zone_col.replace('_prop', '').replace('_', ' ')
    second_zone_display = second_zone_col.replace('_prop', '').replace('_', ' ')
    third_zone_display = third_zone_col.replace('_prop', '').replace('_', ' ')
    
    # Affichage
    print(f"1ère zone : {top_zone_display} ({top_zone_val})")
    print(f"2ème zone : {second_zone_display} ({second_zone_val})")
    print(f"3ème zone : {third_zone_display} ({third_zone_val})")
    print("\n")

    # Trouver l'âge et le sexe du patient
    for patient in patients: # patients déjà définis dans clinical_parser.ipynb
        if patient.id == patient_id:
            current_patient = patient
            break

    sex = current_patient.sex
    if sex == "M":
        sex = "male"
    elif sex == "F":
        sex = "female"

    timepoint_info = current_patient.timepoint_infos.get(timepoint)
    if timepoint_info:
        age = int(timepoint_info.age)


    # 4. Génération de la phrase
    sentence = (
        f"This scan is a {modality} scan of a {age}-year-old {sex} subject {patient_id} at timepoint {timepoint}.\n"
        f"It reveals a total of {len(filtered_df)} lesions.\n"
        f"The most significant lesion has a volume of {main_lesion['lesion_volume_mm3']:.1f} mm³ and is primarily located in:\n"
        f"  1. {top_zone_display} ({top_zone_val*100:.1f}%)\n"
        f"  2. {second_zone_display} ({second_zone_val*100:.1f}%)\n"
        f"  3. {third_zone_display} ({third_zone_val*100:.1f}%)."
    )
    
    print(sentence)
else:
    print("Aucune donnée trouvée.")

1ère zone : Zone 77 (0.557)
2ème zone : Left-Cerebral-White-Matter (0.331)
3ème zone : Left-Cerebral-Cortex (0.04)


This scan is a FLAIR scan of a 28-year-old female subject P1 at timepoint T1.
It reveals a total of 18 lesions.
The most significant lesion has a volume of 15632.0 mm³ and is primarily located in:
  1. Zone 77 (55.7%)
  2. Left-Cerebral-White-Matter (33.1%)
  3. Left-Cerebral-Cortex (4.0%).
