In [1]:
import numpy as np
import SimpleITK as sitk
from pathlib import Path
import pandas as pd
import logging

# Configuration du journal d'exécution (Logging)
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

class LesionAnalyzer:
    """Analyseur de lésions pour jeux de données IRM avec atlas anatomique."""
    
    LABELS_MAP = {
        0: "Background", 2: "Left-Cerebral-White-Matter", 3: "Left-Cerebral-Cortex",
        4: "Left-Lateral-Ventricle", 5: "Left-Inf-Lat-Vent", 7: "Left-Cerebellum-White-Matter",
        8: "Left-Cerebellum-Cortex", 10: "Left-Thalamus-Proper", 11: "Left-Caudate",
        12: "Left-Putamen", 13: "Left-Pallidum", 14: "3rd-Ventricle", 15: "4th-Ventricle",
        16: "Brain-Stem", 17: "Left-Hippocampus", 18: "Left-Amygdala", 24: "CSF",
        26: "Left-Accumbens-area", 28: "Left-VentralDC", 30: "Left-vessel", 31: "Left-choroid-plexus",
        41: "Right-Cerebral-White-Matter", 42: "Right-Cerebral-Cortex", 43: "Right-Lateral-Ventricle",
        44: "Right-Inf-Lat-Vent", 46: "Right-Cerebellum-White-Matter", 47: "Right-Cerebellum-Cortex",
        49: "Right-Thalamus-Proper", 50: "Right-Caudate", 51: "Right-Putamen", 52: "Right-Pallidum",
        53: "Right-Hippocampus", 54: "Right-Amygdala", 58: "Right-Accumbens-area",
        60: "Right-VentralDC", 62: "Right-vessel", 63: "Right-choroid-plexus",
        72: "5th-Ventricle", 77: "WM-hypointensities", 80: "non-WM-hypointensities", 85: "Optic-Chiasm",
    }

    def __init__(self, dataset_path, aseg_dir, output_root):
        self.dataset_path = Path(dataset_path)
        self.aseg_dir = Path(aseg_dir)
        self.output_root = Path(output_root)
        self.modalities = ["T1", "T2", "FLAIR"]
        
        # Pré-calcul des colonnes d'anatomie (tous les labels sauf Background)
        self.region_columns = sorted([f"{name}_prop" for idx, name in self.LABELS_MAP.items() if idx != 0])
        
        # Stockage temporaire des données par patient
        self.patient_data = {
            "info": [],
            "morpho": [],
            "contrast": [],
            "anatomy": []
        }

    def process_dataset(self):
        """Parcourt le dossier dataset pour traiter chaque patient individuellement."""
        patient_dirs = sorted([d for d in self.dataset_path.iterdir() if d.is_dir()])
        
        for p_dir in patient_dirs:
            patient_id = p_dir.name
            self._reset_patient_data()
            
            # Recherche des timepoints (T1, T2, etc.)
            tp_dirs = sorted([d for d in p_dir.iterdir() if d.is_dir()])
            for t_dir in tp_dirs:
                self._process_session(patient_id, t_dir.name, t_dir)
            
            if self.patient_data["info"]:
                self._save_patient_results(patient_id)
            else:
                logging.warning(f"Aucune donnée valide trouvée pour le patient {patient_id}")

    def _reset_patient_data(self):
        for key in self.patient_data:
            self.patient_data[key] = []

    def _process_session(self, patient_id, tp_id, session_path):
        mask_file = session_path / f"{patient_id}_{tp_id}_MASK.nii.gz"
        aseg_file = self.aseg_dir / f"{patient_id}_{tp_id}_aseg.nii"
        
        if not mask_file.exists():
            return

        # Chargement et étiquetage des composantes de la lésion
        mask_sitk = sitk.ReadImage(str(mask_file))
        labeled_mask = sitk.RelabelComponent(sitk.ConnectedComponent(mask_sitk > 0))
        mask_arr = sitk.GetArrayFromImage(labeled_mask)
        
        aseg_arr = sitk.GetArrayFromImage(sitk.ReadImage(str(aseg_file))) if aseg_file.exists() else None
        
        # Chargement des modalités disponibles
        vols = {m: sitk.GetArrayFromImage(sitk.ReadImage(str(session_path / f"{patient_id}_{tp_id}_{m}.nii.gz"))) 
                for m in self.modalities if (session_path / f"{patient_id}_{tp_id}_{m}.nii.gz").exists()}

        n_lesions = int(mask_arr.max())
        logging.info(f"Traitement {patient_id} {tp_id}: {n_lesions} lésions identifiées")

        for l_id in range(1, n_lesions + 1):
            self._extract_features(patient_id, tp_id, l_id, mask_arr, vols, aseg_arr)

    def _extract_features(self, p_id, tp_id, l_id, mask_arr, vols, aseg_arr):
        lesion_mask_3d = (mask_arr == l_id)
        # Sélection de la coupe axiale avec la surface maximale
        best_z = int(np.argmax(lesion_mask_3d.sum(axis=(1, 2))))
        l_label = f"L{l_id}"

        # 1. Informations générales
        self.patient_data["info"].append({
            "patient": p_id, "timepoint": tp_id, "lesion_id": l_label, "z_axial": best_z
        })

        # 2. Morphologie 2D
        shape_filter = sitk.LabelShapeStatisticsImageFilter()
        shape_filter.Execute(sitk.GetImageFromArray(lesion_mask_3d[best_z].astype(np.uint8)))
        if shape_filter.HasLabel(1):
            dims = shape_filter.GetEquivalentEllipsoidDiameter(1)
            self.patient_data["morpho"].append({
                "patient": p_id, "timepoint": tp_id, "lesion_id": l_label,
                "major_axis_mm": round(max(dims), 2), "minor_axis_mm": round(min(dims), 2)
            })

        # 3. Signal et Contraste
        for mod, vol in vols.items():
            l_mean = np.mean(vol[best_z][mask_arr[best_z] == l_id])
            bg_mean = np.mean(vol[best_z][mask_arr[best_z] == 0])
            self.patient_data["contrast"].append({
                "patient": p_id, "timepoint": tp_id, "modality": mod, "lesion_id": l_label,
                "lesion_mean": round(l_mean, 2), "neighborhood_mean": round(bg_mean, 2),
                "contrast": round(l_mean / bg_mean, 4) if bg_mean > 0 else 1.0
            })

        # 4. Anatomie (Proportions)
        if aseg_arr is not None:
            l_aseg = aseg_arr[best_z][mask_arr[best_z] == l_id]
            ids, counts = np.unique(l_aseg, return_counts=True)
            total = len(l_aseg)
            for uid, count in zip(ids, counts):
                if uid == 0: continue
                name = self.LABELS_MAP.get(int(uid), f"ID-{int(uid)}")
                self.patient_data["anatomy"].append({
                    "patient": p_id, "timepoint": tp_id, "lesion_id": l_label,
                    "region": f"{name}_prop", "value": round(count / total, 4)
                })

    def _save_patient_results(self, p_id):
        """Crée un dossier par patient et y enregistre les 4 fichiers CSV."""
        output_path = self.output_root / p_id
        output_path.mkdir(parents=True, exist_ok=True)
        
        pd.DataFrame(self.patient_data["info"]).to_csv(output_path / "01_general_info.csv", index=False)
        pd.DataFrame(self.patient_data["morpho"]).to_csv(output_path / "02_morphology.csv", index=False)
        pd.DataFrame(self.patient_data["contrast"]).to_csv(output_path / "03_signal_contrast.csv", index=False)

        if self.patient_data["anatomy"]:
            df_anat = pd.DataFrame(self.patient_data["anatomy"])
            df_wide = df_anat.pivot(index=['patient', 'timepoint', 'lesion_id'], 
                                   columns='region', values='value').fillna(0)
            df_wide = df_wide.reindex(columns=self.region_columns, fill_value=0).reset_index()
            df_wide.to_csv(output_path / "04_anatomy.csv", index=False)
        
        logging.info(f"Résultats exportés pour {p_id} dans {output_path}")

if __name__ == "__main__":
    # Chemins à configurer
    DATASET_PATH = "../../data/01_MSLesSeg_Dataset/train"
    ASEG_DIR = "../../data/07_registered_aseg_nii_gz_dir"
    OUTPUT_DIR = "../../data/19_2D_best_slice_contrast_intensity_proportion_csv_dir"

    analyzer = LesionAnalyzer(DATASET_PATH, ASEG_DIR, OUTPUT_DIR)
    analyzer.process_dataset()

INFO: Traitement P1 T1: 18 lésions identifiées
INFO: Traitement P1 T2: 18 lésions identifiées
INFO: Traitement P1 T3: 14 lésions identifiées
INFO: Résultats exportés pour P1 dans ..\..\data\19_2D_best_slice_contrast_intensity_proportion_csv_dir\P1
INFO: Traitement P10 T1: 35 lésions identifiées
INFO: Traitement P10 T2: 32 lésions identifiées
INFO: Résultats exportés pour P10 dans ..\..\data\19_2D_best_slice_contrast_intensity_proportion_csv_dir\P10
INFO: Traitement P11 T1: 15 lésions identifiées
INFO: Traitement P11 T2: 10 lésions identifiées
INFO: Résultats exportés pour P11 dans ..\..\data\19_2D_best_slice_contrast_intensity_proportion_csv_dir\P11
INFO: Traitement P12 T1: 36 lésions identifiées
INFO: Traitement P12 T2: 42 lésions identifiées
INFO: Traitement P12 T3: 44 lésions identifiées
INFO: Traitement P12 T4: 43 lésions identifiées
INFO: Résultats exportés pour P12 dans ..\..\data\19_2D_best_slice_contrast_intensity_proportion_csv_dir\P12
INFO: Traitement P13 T1: 39 lésions ident