In [16]:
import nibabel as nib
import numpy as np
import glob,os
from scipy.stats import ttest_ind
import plotly.express as px

In [123]:
STUDY_PATH=os.path.join('/NAS','coolio','protocoles','CINOCIS')
BIDS_DIR=os.path.join(STUDY_PATH,'BIDS')
DERIVATIVES_DIR=os.path.join(BIDS_DIR,'derivatives_v2.0')

Comprends pas l'interet des deux cellules suivantes

In [4]:
# Liste des fichiers NIfTI à fusionner
file_list = glob.glob(r'/NAS/tupac/protocoles/PULSE/DTI/FA/*_onMNI.nii.gz')

# Initialiser une variable pour accumuler la somme des images
sum_image = None

# Lire chaque fichier et accumuler la somme
for file in file_list:
    img = nib.load(file)
    img_data = img.get_fdata()

    if sum_image is None:
        sum_image = np.zeros_like(img_data, dtype=np.float32)

    sum_image += img_data

# Calculer la moyenne
mean_image_data = sum_image / len(file_list)

# Créer une nouvelle image NIfTI pour la moyenne
mean_img = nib.Nifti1Image(mean_image_data, img.affine, img.header)

# Sauvegarder l'image moyenne
nib.save(mean_img, r'/NAS/tupac/protocoles/PULSE/DTI/FA/mean_image.nii.gz')


In [None]:
# Liste des fichiers NIfTI à fusionner
file_list = glob.glob(r'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/template_PAM50/sub-*/ses-*/sub-*_ses-M*_dwi_fa_on_PAM50.nii.gz')

# Initialiser une variable pour accumuler la somme des images
sum_image = None

# Lire chaque fichier et accumuler la somme
for file in file_list:
    img = nib.load(file)
    img_data = img.get_fdata()

    if sum_image is None:
        sum_image = np.zeros_like(img_data, dtype=np.float32)

    sum_image += img_data

# Calculer la moyenne
mean_image_data = sum_image / len(file_list)

# Créer une nouvelle image NIfTI pour la moyenne
mean_img = nib.Nifti1Image(mean_image_data, img.affine, img.header)

# Sauvegarder l'image moyenne
nib.save(mean_img, r'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/mean_image_FA_onPAM50.nii.gz')

In [102]:
import os
import glob
import numpy as np
import nibabel as nib
import pandas as pd

tsv_path = BIDS_DIR + 'participants.tsv'
df = pd.read_csv(tsv_path, sep='\t')

times = ['M3', 'M6', 'M12', 'M24', 'M60']
labels_to_report = [3, 4, 5, 6]

# construire les colonnes dynamiquement
cols = ['Subject']
for t in times:
    cols += [
        f'mean_fa_lesion_{t}', f'mean_fa_nawm_{t}',
        f'mean_adc_lesion_{t}', f'mean_adc_nawm_{t}',
        f'vol_lesion_{t}', f'vol_nawm_{t}',
    ]
    for lab in labels_to_report:
        cols += [f'mean_fa_nawm_label{lab}_{t}', f'mean_adc_nawm_label{lab}_{t}',f'vol_lesion_label{lab}_{t}']

# colonnes prediction existantes
cols += ['vol_lesion_M6_denovo', 'mean_adcM3_lesion_M6_denovo', 'mean_faM3_lesion_M6_denovo']

results = []

def compute_session_metrics(SUBJ, ses):
    lesion_path = f"{DERIVATIVES_DIR}segmentation7.1/{SUBJ}/ses-{ses}/lesion_ses-{ses}.lps.nii.gz"
    NAWM_path   = f"{DERIVATIVES_DIR}segmentation7.1/{SUBJ}/ses-{ses}/NAWM_ses-{ses}.nii.gz"
    sc_path     = f"{DERIVATIVES_DIR}segmentation7.1/{SUBJ}/ses-{ses}/mask_cord_normalised.nii.gz"
    label_path  = f"{DERIVATIVES_DIR}segmentation7.1/{SUBJ}/ses-{ses}/T2_ses-{ses}.lps_pred_seg_labeled.nii.gz"
    reg_pattern = f"{DERIVATIVES_DIR}registration/{SUBJ}/ses-{ses}/{SUBJ}_ses-{ses}_acq-*dwi_fa.rec.nii.gz"
    fa_files = glob.glob(reg_pattern)
    adc_files = glob.glob(reg_pattern.replace('fa','adc'))

    # valeurs par défaut
    out = {
        'mean_fa_lesion': np.nan,
        'mean_fa_nawm': np.nan,
        'mean_adc_lesion': np.nan,
        'mean_adc_nawm': np.nan,
        'vol_lesion':np.nan,
        'vol_nawm':np.nan,
    }
    for lab in labels_to_report:
        out[f'mean_fa_nawm_label{lab}'] = np.nan
        out[f'mean_adc_nawm_label{lab}'] = np.nan
        out[f'vol_lesion_label{lab}'] = np.nan
    
    if fa_files and adc_files and os.path.exists(lesion_path) and os.path.exists(NAWM_path) and os.path.exists(sc_path) and os.path.exists(label_path):
        fa = nib.load(fa_files[0]).get_fdata()
        adc = nib.load(adc_files[0]).get_fdata()
        lesion = nib.load(lesion_path).get_fdata()
        nawm = nib.load(NAWM_path).get_fdata()
        sc = nib.load(sc_path).get_fdata()
        label = nib.load(label_path).get_fdata()

        vol_lesion = np.sum((lesion == 1) & (sc == 1))
        vol_nawm = np.sum((nawm == 1) & (sc == 1))

        out['mean_fa_lesion'] = np.mean(fa[(lesion == 1) & (fa != 0) & (sc == 1)]) if vol_lesion > 0 else np.nan
        out['mean_fa_nawm']   = np.mean(fa[(nawm == 1)   & (fa != 0) & (sc == 1)]) if vol_nawm > 0 else np.nan
        out['mean_adc_lesion']= np.mean(adc[(lesion == 1) & (adc != 0) & (sc == 1)]) if vol_lesion > 0 else np.nan
        out['mean_adc_nawm']  = np.mean(adc[(nawm == 1)   & (adc != 0) & (sc == 1)]) if vol_nawm > 0 else np.nan
        out['vol_lesion'] = vol_lesion
        out['vol_nawm'] = vol_nawm

        # calculs par label (sur NAWM & label==X & sc==1)
        for lab in labels_to_report:
            mask_lab = (nawm == 1) & (label == lab) & (sc == 1)
            n_mask = np.sum(mask_lab)
            if n_mask > 0:
                out[f'mean_fa_nawm_label{lab}'] = np.mean(fa[mask_lab & (fa != 0)]) if np.any(fa[mask_lab] != 0) else np.nan
                out[f'mean_adc_nawm_label{lab}'] = np.mean(adc[mask_lab & (adc != 0)]) if np.any(adc[mask_lab] != 0) else np.nan
            else:
                out[f'mean_fa_nawm_label{lab}'] = np.nan
                out[f'mean_adc_nawm_label{lab}'] = np.nan
            out[f'vol_lesion_label{lab}']  = np.sum((lesion == 1) & (label == lab))
    return out

for SUBJ in df['participant_id']:
    print(SUBJ)

    # compute for each session
    m3 = compute_session_metrics(SUBJ, 'M3')
    m6 = compute_session_metrics(SUBJ, 'M6')
    m12 = compute_session_metrics(SUBJ, 'M12')
    m24 = compute_session_metrics(SUBJ, 'M24')
    m60 = compute_session_metrics(SUBJ, 'M60')

    # Prediction M3->M6 (leave existing logic)
    adc_M3_M6_path=f"{DERIVATIVES_DIR}prediction_M3_M6/{SUBJ}/adc_M3_warpedByFlirt.nii.gz"
    fa_M3_M6_path=f"{DERIVATIVES_DIR}prediction_M3_M6/{SUBJ}/fa_M3_warpedByFlirt.nii.gz"
    lesion_M3onM6_path=f"{DERIVATIVES_DIR}prediction_M3_M6/{SUBJ}/lesion_dil_M3_warpedByFlirt.nii.gz"

    if os.path.exists(adc_M3_M6_path) and os.path.exists(fa_M3_M6_path) and os.path.exists(lesion_M3onM6_path):
        try:
            adc_M3_M6_img = nib.load(adc_M3_M6_path).get_fdata()
            fa_M3_M6_img = nib.load(fa_M3_M6_path).get_fdata()
            lesion_M3onM6_mask = nib.load(lesion_M3onM6_path).get_fdata()
            lesion_mask_M6 = nib.load(f"{DERIVATIVES_DIR}segmentation/{SUBJ}/ses-M6/lesion_ses-M6.lps.nii.gz").get_fdata()
            sc_M6 = nib.load(f"{DERIVATIVES_DIR}segmentation/{SUBJ}/ses-M6/mask_cord_normalised.nii.gz").get_fdata()
            if lesion_mask_M6.shape == lesion_M3onM6_mask.shape:
                mask = ((lesion_mask_M6 == 1) & (lesion_M3onM6_mask == 0) & (sc_M6 == 1))
                vol_lesion_M6_denovo = np.sum(mask)
                mean_adcM3_lesion_M6_denovo = np.mean(adc_M3_M6_img[mask & (adc_M3_M6_img != 0)]) if np.sum(mask) > 0 else np.nan
                mean_faM3_lesion_M6_denovo = np.mean(fa_M3_M6_img[mask & (fa_M3_M6_img != 0)]) if np.sum(mask) > 0 else np.nan
            else:
                print(f"Shape mismatch for subject {SUBJ}")
                vol_lesion_M6_denovo = 0
                mean_adcM3_lesion_M6_denovo = np.nan
                mean_faM3_lesion_M6_denovo = np.nan
        except Exception as e:
            print("Error loading prediction files for", SUBJ, e)
            vol_lesion_M6_denovo = 0
            mean_adcM3_lesion_M6_denovo = np.nan
            mean_faM3_lesion_M6_denovo = np.nan
    else:
        vol_lesion_M6_denovo = 0
        mean_adcM3_lesion_M6_denovo = np.nan
        mean_faM3_lesion_M6_denovo = np.nan

    # construire la ligne de résultat dynamiquement
    row = {'Subject': SUBJ}
    for t, metrics in zip(times, [m3, m6, m12, m24, m60]):
        for k, v in metrics.items():
            row[f"{k}_{t}"] = v

    # ajouter colonnes prediction
    row['vol_lesion_M6_denovo'] = vol_lesion_M6_denovo
    row['mean_adcM3_lesion_M6_denovo'] = mean_adcM3_lesion_M6_denovo
    row['mean_faM3_lesion_M6_denovo'] = mean_faM3_lesion_M6_denovo

    results.append(row)

df_M_long = pd.DataFrame(results, columns=cols)


sub-001
sub-002
sub-003
sub-004
sub-005
sub-006
sub-007
sub-008
sub-009
sub-010
sub-011
sub-012
sub-013
sub-014
sub-015
sub-017
sub-018
sub-019
sub-020
sub-021
sub-022
sub-023
sub-024
sub-025
sub-026
sub-027
sub-028
sub-029
sub-030
sub-031
sub-032
sub-033
sub-034
sub-035
sub-036
sub-037
sub-038
sub-039
sub-040
sub-041
sub-042
sub-043
sub-044
sub-045
sub-046
sub-047
sub-048
sub-049
sub-050
sub-051
sub-053
sub-054
sub-056
sub-057
sub-058
sub-059
sub-060
sub-063
sub-064
sub-065
sub-066
sub-067
sub-068
sub-069
sub-070
sub-071
sub-072
sub-074
sub-075
sub-076
sub-077
sub-078
sub-079
sub-081
sub-082
sub-083
sub-084
sub-085
sub-086
sub-087
sub-088
sub-089
sub-090
sub-091
sub-092
sub-094
sub-095
sub-096
sub-097
sub-098
sub-099
sub-100
sub-102
sub-103
sub-104
sub-105
sub-106
sub-107
sub-108
sub-109
sub-110
sub-111
sub-112
sub-113
sub-114
sub-115
sub-117
sub-118
sub-119
sub-120
sub-122
sub-123
sub-124
sub-125
sub-126
sub-127
sub-128
sub-129
sub-130
sub-131
sub-132
sub-133


In [66]:
import warnings

# Bootstrap comparison (voxel-wise) des FA dans NAWM M3/M6, lésions M3/M6 et lésions "de novo"
# Prend les fichiers déjà présents dans le notebook (DERIVATIVES_DIR, patients_with_denovo / df_M3M6, etc.)
# Résultat : distribution bootstrap des moyennes et intervalles de confiance + tests empiriques (p-values)

warnings.filterwarnings("ignore", category=RuntimeWarning)

n_boot = 10000

# groupes : pools de voxels (valeurs FA)
pools = {
    'NAWM_M3': [],
    'Lesion_M3': [],
    'NAWM_M6': [],
    'Lesion_M6': [],
    'Denovo_M6': []
}

subjects = patients_with_denovo['Subject'].tolist()

for SUBJ in subjects:
    # chemins attendus (mêmes conventions que dans les cellules précédentes)
    lesion_M3_path = f"{DERIVATIVES_DIR}segmentation/{SUBJ}/ses-M3/lesion_ses-M3.lps.nii.gz"
    NAWM_M3_path   = f"{DERIVATIVES_DIR}segmentation/{SUBJ}/ses-M3/NAWM_ses-M3.nii.gz"
    sc_M3_path     = f"{DERIVATIVES_DIR}segmentation/{SUBJ}/ses-M3/mask_cord_normalised.nii.gz"
    reg_pattern_M3 = f"{DERIVATIVES_DIR}registration/{SUBJ}/ses-M3/{SUBJ}_ses-M3_acq-*dwi_fa.rec.nii.gz"
    fa_M3_files    = glob.glob(reg_pattern_M3)

    lesion_M6_path = f"{DERIVATIVES_DIR}segmentation/{SUBJ}/ses-M6/lesion_ses-M6.lps.nii.gz"
    NAWM_M6_path   = f"{DERIVATIVES_DIR}segmentation/{SUBJ}/ses-M6/NAWM_ses-M6.nii.gz"
    sc_M6_path     = f"{DERIVATIVES_DIR}segmentation/{SUBJ}/ses-M6/mask_cord_normalised.nii.gz"
    reg_pattern_M6 = f"{DERIVATIVES_DIR}registration/{SUBJ}/ses-M6/{SUBJ}_ses-M6_acq-*dwi_fa.rec.nii.gz"
    fa_M6_files    = glob.glob(reg_pattern_M6)

    # mask de la lésion M3 projetée sur M6 (optionnel pour dénovo)
    lesion_M3onM6_path = f"{DERIVATIVES_DIR}prediction_M3_M6/{SUBJ}/lesion_dil_M3_warpedByFlirt.nii.gz"

    # charger si disponibles et collecter voxels valides (mask==1, fa != 0)
    try:
        if fa_M3_files and os.path.exists(NAWM_M3_path) and os.path.exists(lesion_M3_path) and os.path.exists(sc_M3_path):
            fa_M3 = nib.load(fa_M3_files[0]).get_fdata()
            nawm_m3 = nib.load(NAWM_M3_path).get_fdata()
            lesion_m3 = nib.load(lesion_M3_path).get_fdata()
            sc_m3 = nib.load(sc_M3_path).get_fdata()

            # extraire voxels
            v = fa_M3[(nawm_m3 == 1) & (fa_M3 != 0) & (sc_m3 == 1)]
            if v.size > 0:
                pools['NAWM_M3'].append(v.ravel())

            v = fa_M3[(lesion_m3 == 1) & (fa_M3 != 0) & (sc_m3 == 1)]
            if v.size > 0:
                pools['Lesion_M3'].append(v.ravel())
    except Exception as e:
        print(f"[M3] skip {SUBJ} ({e})")

    try:
        if fa_M6_files and os.path.exists(NAWM_M6_path) and os.path.exists(lesion_M6_path) and os.path.exists(sc_M6_path):
            fa_M6 = nib.load(fa_M6_files[0]).get_fdata()
            nawm_m6 = nib.load(NAWM_M6_path).get_fdata()
            lesion_m6 = nib.load(lesion_M6_path).get_fdata()
            sc_m6 = nib.load(sc_M6_path).get_fdata()

            v = fa_M6[(nawm_m6 == 1) & (fa_M6 != 0) & (sc_m6 == 1)]
            if v.size > 0:
                pools['NAWM_M6'].append(v.ravel())

            v = fa_M6[(lesion_m6 == 1) & (fa_M6 != 0) & (sc_m6 == 1)]
            if v.size > 0:
                pools['Lesion_M6'].append(v.ravel())

            # denovo : voxels dans lesion M6 mais pas dans lesion M3 projetée sur M6
            if os.path.exists(lesion_M3onM6_path):
                lesion_m3onm6 = nib.load(lesion_M3onM6_path).get_fdata()
                mask_denovo = (lesion_m6 == 1) & (lesion_m3onm6 == 0) & (sc_m6 == 1)
                v = fa_M6[mask_denovo & (fa_M6 != 0)]
                if v.size > 0:
                    pools['Denovo_M6'].append(v.ravel())
    except Exception as e:
        print(f"[M6] skip {SUBJ} ({e})")

# concaténation des pools par groupe
for k in list(pools.keys()):
    if len(pools[k]) == 0:
        pools[k] = np.array([], dtype=float)
    else:
        pools[k] = np.concatenate(pools[k])
    # enlever NaN
    pools[k] = pools[k][~np.isnan(pools[k])]

# retirer groupes vides
print("Voxels par groupe (count):")
for k,v in pools.items():
    print(f"  {k}: {v.size}")

# fonction bootstrap (retourne distribution des moyennes)
def bootstrap_means(values, n_boot=n_boot):
    if values.size == 0:
        return np.array([])
    N = values.size
    print(f"Bootstrap: {N} voxels, {n_boot} itérations")
    # échantillonnage par index pour mémoire
    idx = np.random.randint(0, N, size=(n_boot, N))
    print(f"Index shape: {idx.shape}")
    b_means = values[idx].mean(axis=1)
    return b_means

# calculer distributions bootstrap pour chaque groupe
b_dists = {}
obs_means = {}
for k, vals in pools.items():
    if vals.size > 0:
        b = bootstrap_means(vals, n_boot=n_boot)
        b_dists[k] = b
        obs_means[k] = np.mean(vals)
    else:
        b_dists[k] = np.array([])
        obs_means[k] = np.nan

# comparaisons souhaitées
comparisons = [
    ('Lesion_M3', 'NAWM_M3'),
    ('Lesion_M6', 'NAWM_M6'),
    ('Denovo_M6', 'NAWM_M6'),
    ('Denovo_M6', 'Lesion_M6')
]

def bootstrap_diff_stats(b1, b2):
    # b1, b2 distributions bootstrap des moyennes (même nombre d'itérations)
    if b1.size == 0 or b2.size == 0:
        return {'diff_mean': np.nan, 'ci': (np.nan, np.nan), 'p_empirical': np.nan}
    diffs = b1 - b2
    ci_low, ci_high = np.percentile(diffs, [2.5, 97.5])
    diff_mean = diffs.mean()
    # p-value empirique two-sided pour H0 : diff = 0
    prop_pos = np.mean(diffs > 0)
    prop_neg = np.mean(diffs < 0)
    p_emp = 2.0 * min(prop_pos, prop_neg)
    p_emp = min(p_emp, 1.0)
    return {'diff_mean': diff_mean, 'ci': (ci_low, ci_high), 'p_empirical': p_emp}

# calcul des stats pour chaque comparaison
results = []
for a,b in comparisons:
    # si b_dists[a] ou b_dists[b] vide, tenter d'aligner par rééchantillonnage direct depuis pools
    if b_dists[a].size == 0 and pools[a].size>0:
        b_dists[a] = bootstrap_means(pools[a], n_boot=n_boot)
    if b_dists[b].size == 0 and pools[b].size>0:
        b_dists[b] = bootstrap_means(pools[b], n_boot=n_boot)

    stats = bootstrap_diff_stats(b_dists[a], b_dists[b])
    results.append({
        'comp': f"{a} vs {b}",
        'mean_a_obs': obs_means[a],
        'mean_b_obs': obs_means[b],
        'diff_mean_boot': stats['diff_mean'],
        'ci_95_low': stats['ci'][0],
        'ci_95_high': stats['ci'][1],
        'p_empirical': stats['p_empirical'],
        'n_vox_a': pools[a].size,
        'n_vox_b': pools[b].size
    })

df_boot_results = pd.DataFrame(results)
print("\nRésultats bootstrap (voxel-wise) :")
print(df_boot_results.round(6).to_string(index=False))

# Optionnel : afficher histogrammes des différences bootstrap pour chaque comparaison
import plotly.express as px
for a,b in comparisons:
    if b_dists[a].size>0 and b_dists[b].size>0:
        diffs = b_dists[a] - b_dists[b]
        fig = px.histogram(diffs, nbins=50, title=f"Bootstrap diffs: {a} - {b}", labels={'value':'diff FA'})
        fig.update_layout(xaxis_title='Difference of bootstrap means', yaxis_title='Count')
        fig.show()

Voxels par groupe (count):
  NAWM_M3: 207041
  Lesion_M3: 10258
  NAWM_M6: 202265
  Lesion_M6: 9045
  Denovo_M6: 3823
Bootstrap: 207041 voxels, 10000 itérations
Index shape: (10000, 207041)
Bootstrap: 10258 voxels, 10000 itérations
Index shape: (10000, 10258)
Bootstrap: 202265 voxels, 10000 itérations
Index shape: (10000, 202265)
Bootstrap: 9045 voxels, 10000 itérations
Index shape: (10000, 9045)
Bootstrap: 3823 voxels, 10000 itérations
Index shape: (10000, 3823)

Résultats bootstrap (voxel-wise) :
                  comp  mean_a_obs  mean_b_obs  diff_mean_boot  ci_95_low  ci_95_high  p_empirical  n_vox_a  n_vox_b
  Lesion_M3 vs NAWM_M3    0.488862    0.536265       -0.047402  -0.050026   -0.044681          0.0    10258   207041
  Lesion_M6 vs NAWM_M6    0.512762    0.547692       -0.034911  -0.037754   -0.032124          0.0     9045   202265
  Denovo_M6 vs NAWM_M6    0.530933    0.547692       -0.016755  -0.021357   -0.012130          0.0     3823   202265
Denovo_M6 vs Lesion_M6    0.

In [69]:
from scipy.stats import ttest_ind

# Boxplot + scatter (jittered) for FA groups with significance annotation (star + p-value)
# Utilise les variables déjà présentes dans le notebook (df_M3M6_filtered, etc.)

import plotly.express as px

# Construire le DataFrame concaténé FA (si df_fa n'existe pas)
try:
    df_fa  # use if already defined
except NameError:
    fa_lesion = pd.concat([df_M3M6_filtered["mean_fa_lesion_M3"].dropna(), df_M3M6_filtered["mean_fa_lesion_M6"].dropna()])
    fa_nawm = pd.concat([df_M3M6_filtered["mean_fa_nawm_M3"].dropna(), df_M3M6_filtered["mean_fa_nawm_M6"].dropna()])
    fa_denovo = df_M3M6_filtered["mean_faM3_lesion_M6_denovo"].dropna()

    subjects_lesion = pd.concat([
        df_M3M6_filtered.loc[df_M3M6_filtered["mean_fa_lesion_M3"].notna(), "Subject"],
        df_M3M6_filtered.loc[df_M3M6_filtered["mean_fa_lesion_M6"].notna(), "Subject"]
    ]).reset_index(drop=True)
    subjects_nawm = pd.concat([
        df_M3M6_filtered.loc[df_M3M6_filtered["mean_fa_nawm_M3"].notna(), "Subject"],
        df_M3M6_filtered.loc[df_M3M6_filtered["mean_fa_nawm_M6"].notna(), "Subject"]
    ]).reset_index(drop=True)
    subjects_denovo = df_M3M6_filtered.loc[df_M3M6_filtered["mean_faM3_lesion_M6_denovo"].notna(), "Subject"].reset_index(drop=True)

    df_fa = pd.DataFrame({
        "FA": pd.concat([fa_nawm, fa_denovo, fa_lesion], ignore_index=True),
        "Groupe": (["NAWM"] * len(fa_nawm)) + (["Lesion de-novo"] * len(fa_denovo)) + (["Lesion"] * len(fa_lesion)),
        "Subject": pd.concat([subjects_nawm, subjects_denovo, subjects_lesion], ignore_index=True)
    })

# Ordre des groupes désiré
group_order = ["NAWM", "Lesion de-novo", "Lesion"]

# Figure: box + jittered points
fig = px.box(
    df_fa, x="Groupe", y="FA", color="Groupe",
    category_orders={"Groupe": group_order},
    # points="all",  # montre les points (scatter) sur le boxplot
    title="Distribution FA (bootstrap 10 000 tirages avec remise): Lesion (M3+M6) vs NAWM (M3+M6) vs Lesion de-novo "
)
# fig.update_traces(jitter=0.4, marker=dict(opacity=0.7, size=6))
fig.add_scatter(
    x=df_fa["Groupe"],
    y=df_fa["FA"],
    mode='markers',
    marker=dict(size=6, opacity=0.6),
    text=df_fa["Subject"],
    hovertemplate='Subject=%{text}<br>FA=%{y}<extra></extra>',
    showlegend=False
)

# Tests statistiques (t-tests indépendants, deux côtés)
comparisons = [("Lesion", "NAWM"), ("Lesion de-novo", "NAWM"), ("Lesion de-novo", "Lesion")]
y_max = df_fa["FA"].max()
y_min = df_fa["FA"].min()
y_range = y_max - y_min if (y_max - y_min) != 0 else 1e-3
print(f"FA range: min={y_min}, max={y_max}, range={y_range}")
start_height = y_max + 0.05 * y_range
step = 0.2 * y_range

annot_height = start_height
for a, b in comparisons:
    vals_a = df_fa.loc[df_fa["Groupe"] == a, "FA"].dropna()
    vals_b = df_fa.loc[df_fa["Groupe"] == b, "FA"].dropna()
    if len(vals_a) == 0 or len(vals_b) == 0:
        annot_height += step
        continue
    t_stat, p_val = ttest_ind(vals_a, vals_b, equal_var=False)
    # n.s. ignored per user request: annotate only if significant (p < 0.05)
    if p_val < 0.05:
        # significance stars
        if p_val < 0.001:
            star = "                                                                                                                                           ***"
        elif p_val < 0.01:
            star = "                                                **"
        else:
            star = "                                                                *"
        # horizontal line between groups
        fig.add_shape(
            dict(type="line",
                 xref="x", yref="y",
                 x0=a, x1=b,
                 y0=annot_height, y1=annot_height,
                 line=dict(color="black", width=1))
        )
        # small vertical ticks
        fig.add_shape(dict(type="line", xref="x", yref="y", x0=a, x1=a, y0=annot_height, y1=annot_height - (0.01*y_range),
                           line=dict(color="black", width=1)))
        fig.add_shape(dict(type="line", xref="x", yref="y", x0=b, x1=b, y0=annot_height, y1=annot_height - (0.01*y_range),
                           line=dict(color="black", width=1)))
        # annotation with star and p-value
        fig.add_annotation(
            x=(a if group_order.index(a) <= group_order.index(b) else b),
            y=annot_height + 0.05*y_range,
            xref="x", yref="y",
            text=f"{star} p={p_val:.3g}",
            showarrow=False,
            font=dict(size=12, color="black")
        )
        annot_height += step
    else:
        annot_height += step

fig.update_layout(showlegend=False, yaxis_title="FA")
fig.show()

FA range: min=0.12223102897405624, max=0.9343571700466542, range=0.812126141072598


In [7]:
    df_M3M6_filtered_filtered.loc[len(df_M3M6)] = [
        SUBJ,
        mean_fa_lesion_M3, mean_fa_nawm_M3, mean_adc_lesion_M3, mean_adc_nawm_M3,
        mean_fa_lesion_M6, mean_fa_nawm_M6, mean_adc_lesion_M6, mean_adc_nawm_M6,
        vol_lesion_M6_denovo, mean_adcM3_lesion_M6_denovo, mean_faM3_lesion_M6_denovo
    ]

NameError: name 'df_M3M6_filtered_filtered' is not defined

In [34]:
for idx, row in df_M3M6.iterrows():
    if row['vol_lesion_M6_denovo'] > 200:
        SUBJ = row['Subject']
        print(f"Subject with significant new lesion volume: {SUBJ}, Volume: {row['vol_lesion_M6_denovo']}")
        print(row)
        DERIVATIVES_DIR_SUBJ = f"{DERIVATIVES_DIR}segmentation/{SUBJ}/ses-M6/"
        adc_M6_path = glob.glob(f"{DERIVATIVES_DIR}registration/{SUBJ}/ses-M6/{SUBJ}_ses-M6_acq-*dwi_adc.rec.nii.gz")
        adc_M3_M6_path = f"{DERIVATIVES_DIR}prediction_M3_M6/{SUBJ}/adc_M3_warpedByFlirt.nii.gz"
        lesion_M3onM6_path = f"{DERIVATIVES_DIR}prediction_M3_M6/{SUBJ}/lesion_dil_M3_warpedByFlirt.nii.gz"
        NAWM_M6_path = f"{DERIVATIVES_DIR}segmentation/{SUBJ}/ses-M6/NAWM_ses-M6.nii.gz"
        lesion_M6_path = f"{DERIVATIVES_DIR}segmentation/{SUBJ}/ses-M6/lesion_ses-M6.lps.nii.gz"
        if len(adc_M6_path) > 0:
            if os.path.exists(adc_M6_path[0]) and os.path.exists(adc_M3_M6_path) and os.path.exists(lesion_M3onM6_path) and os.path.exists(NAWM_M6_path):
                print(f"freeview {adc_M6_path[0]}:grayscale=0.0,0.004 {adc_M3_M6_path}:grayscale=0.0,0.004 {lesion_M3onM6_path}:colormap=binary:binary_color=blue {lesion_M6_path}:colormap=binary:binary_color=red {NAWM_M6_path}:colormap=binary:binary_color=green")

Subject with significant new lesion volume: sub-013, Volume: 283
Subject                         sub-013
mean_fa_lesion_M3                   NaN
mean_fa_nawm_M3                0.366439
mean_adc_lesion_M3                  NaN
mean_adc_nawm_M3               0.001766
mean_fa_lesion_M6              0.557465
mean_fa_nawm_M6                0.578881
mean_adc_lesion_M6             0.001083
mean_adc_nawm_M6               0.001188
vol_lesion_M6_denovo                283
mean_adcM3_lesion_M6_denovo    0.002222
mean_faM3_lesion_M6_denovo     0.307546
Name: 12, dtype: object
freeview /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/registration/sub-013/ses-M6/sub-013_ses-M6_acq-901DTICOROSPINE2D_dwi_adc.rec.nii.gz:grayscale=0.0,0.004 /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/prediction_M3_M6/sub-013/adc_M3_warpedByFlirt.nii.gz:grayscale=0.0,0.004 /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/prediction_M3_M6/sub-013/lesion_dil_M3_warpedByFlirt.nii.gz:colormap=binary:binary_col

In [None]:
#df_M3M6.rename(columns={"mean_fa_M3_M6_lesion": "mean_faM3_lesion_M6_denovo"}, inplace=True)
#d_M3M6_filtered = df_M3M6[~df_M3M6['Subject'].isin(df_M3M6[df_M3M6['vol_lesion_M6_denovo'] > 200]['Subject'])]

In [26]:
#df_M3M6_filtered_nonan = df_M3M6_filtered[df_M3M6_filtered['mean_faM3_lesion_M6_denovo'].notna()]

Affiche pour copier-coller xls ou csv

In [27]:
print(df_M3M6[["Subject"] + [col for col in df_M3M6.columns if 'fa' in col]].to_csv(index=False, sep='\t'))

Subject	mean_fa_lesion_M3	mean_fa_nawm_M3	mean_fa_lesion_M6	mean_fa_nawm_M6	mean_faM3_lesion_M6_denovo
sub-001			0.5124371936166991	0.5587590878928441	
sub-002			0.5025770525316533	0.5561604029408278	
sub-003			0.610818983322411	0.5019412783621234	0.3192068255609936
sub-004	0.3324959887006019	0.6068280825026113	0.43206244490994433	0.6893504716320644	0.21915262937545776
sub-005	0.5157765981178133	0.5889824426770409	0.13619562749072534	0.22556658401975274	0.47761957912609493
sub-006	0.5513981817248568	0.5667682623731359	0.41565388110899193	0.47283522745753825	0.7245949109395345
sub-007	0.6075503377114938	0.6050826869803758	0.6236360080420913	0.6042752433948213	0.2995800648574476
sub-008	0.34371040402504477	0.5311505351863895	0.379317496355776	0.5317743665146782	0.41485069365832056
sub-009	0.32638506122598115	0.47591743463339575		0.6302451790145043	
sub-010	0.2504252480299995	0.5235732315346212	0.3548870520993394	0.6944652029597623	0.32079367472657133
sub-011	0.6203025003065472	0.60753042

In [29]:
patients_with_denovo = df_M3M6 # df_M3M6_filtered[df_M3M6_filtered['vol_lesion_M6_denovo'] > 0]
count_patients = len(patients_with_denovo)
mean_vol_denovo = patients_with_denovo['vol_lesion_M6_denovo'].mean()
print(f"Nombre de patients avec vol_lesion_M6_denovo > 0 : {count_patients}")
print(f"Moyenne de vol_lesion_M6_denovo chez ces patients : {mean_vol_denovo *(3.3*0.4878*0.4878)} mm3")

import plotly.express as px

fig = px.histogram(
    patients_with_denovo,
    x="vol_lesion_M6_denovo",
    nbins=30,
    title="Distribution des volumes de vol_lesion_M6_denovo",
    labels={"vol_lesion_M6_denovo": "Volume lésion M6 dé novo"}
)
fig.show()


Nombre de patients avec vol_lesion_M6_denovo > 0 : 122
Moyenne de vol_lesion_M6_denovo chez ces patients : 24.026786598983605 mm3


In [101]:
df_M_long

Unnamed: 0,Subject,mean_fa_lesion_M3,mean_fa_nawm_M3,mean_adc_lesion_M3,mean_adc_nawm_M3,mean_fa_nawm_label3_M3,mean_adc_nawm_label3_M3,mean_fa_nawm_label4_M3,mean_adc_nawm_label4_M3,mean_fa_nawm_label5_M3,...,mean_adc_nawm_label3_M60,mean_fa_nawm_label4_M60,mean_adc_nawm_label4_M60,mean_fa_nawm_label5_M60,mean_adc_nawm_label5_M60,mean_fa_nawm_label6_M60,mean_adc_nawm_label6_M60,vol_lesion_M6_denovo,mean_adcM3_lesion_M6_denovo,mean_faM3_lesion_M6_denovo
0,sub-001,,,,,,,,,,...,0.001468,0.575117,0.001327,0.599002,0.001259,0.591927,0.001271,0,,
1,sub-002,,,,,,,,,,...,0.001313,0.628242,0.001147,0.605699,0.001252,0.509450,0.001686,0,,
2,sub-003,,0.573649,,0.001124,0.714090,0.000891,0.579624,0.001130,0.510109,...,0.001294,0.641344,0.001228,0.631106,0.001160,0.558253,0.001305,9,0.002203,0.319207
3,sub-004,,0.621102,,0.001223,0.745918,0.000969,0.670170,0.001216,0.564588,...,,,,,,,,3,0.002264,0.219153
4,sub-005,,0.573490,,0.001102,0.610504,0.001073,0.549177,0.001159,0.582509,...,0.001538,0.494951,0.001657,0.635351,0.001388,0.486866,0.001848,29,0.001294,0.477620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,sub-129,,0.644573,,0.001463,0.678810,0.001198,0.639996,0.001410,0.681488,...,,,,,,,,15,0.002135,0.542487
118,sub-130,,0.451487,,0.002018,0.430405,0.002237,0.400831,0.002279,0.439582,...,0.001479,0.544089,0.001346,0.672714,0.001140,0.750740,0.001102,17,0.001474,0.637330
119,sub-131,,0.542328,,0.001454,0.576494,0.001493,0.570780,0.001434,0.529819,...,0.001413,0.511721,0.001400,0.475662,0.001423,0.448105,0.001675,0,,
120,sub-132,,0.443698,,0.002023,0.445907,0.001745,0.445927,0.002144,0.430987,...,,,,,,,,12,0.002291,0.395675


In [None]:
outfile = os.path.join(STUDY_PATH, 'DWI_alongSessions.xlsx')
if 'df_M_long' in globals():
    try:
        df_M_long.to_excel(outfile, index=False)
        print(f"Saved df_M_long to {outfile}")
    except Exception as e:
        print("Error saving df_M_long:", e)
else:
    print("df_M_long not found in the current notebook namespace. Nothing saved.")

Saved df_M_long to /NAS/coolio/protocoles/CINOCIS/df_M_long.xlsx


In [118]:
# ...existing code...
# Ajouter prise en charge des temps supplémentaires (M12, M24, M60)
times = ['M3', 'M6', 'M12', 'M24', 'M60']

# choisir le DataFrame contenant les colonnes (df_M_long ou df_M3M6_filtered suivant ce qui est disponible)
df_used = df_M_long if 'df_M_long' in globals() else df_M3M6_filtered

# construire listes de variables FA / ADC
fa_lesion_vars = [f"mean_fa_lesion_{t}" for t in times]
fa_nawm_vars   = [f"mean_fa_nawm_{t}"   for t in times]
adc_lesion_vars = [f"mean_adc_lesion_{t}" for t in times]
adc_nawm_vars   = [f"mean_adc_nawm_{t}"   for t in times]

# Boxplot FA (tous temps)
import plotly.express as px
fa_vars = fa_lesion_vars + fa_nawm_vars
existing_fa_vars = [v for v in fa_vars if v in df_used.columns]
fig_fa = px.box(
    df_used.melt(id_vars=["Subject"], value_vars=existing_fa_vars),
    x="variable",
    y="value",
    color="variable",
    hover_data=["Subject"],
    title="Boxplots interactifs de FA (M3,M6,M12,M24,M60)"
)
# ajouter points
for variable in existing_fa_vars:
    fig_fa.add_scatter(
        x=[variable]*len(df_used),
        y=df_used.get(variable, pd.Series(dtype=float)),
        mode='markers',
        marker=dict(size=6, opacity=0.6),
        name=f'{variable} points',
        text=df_used['Subject'],
        hovertemplate='Subject=%{text}<br>FA=%{y}<extra></extra>',
        showlegend=False
    )
fig_fa.show()
save_path = BIDS_DIR=os.path.join(STUDY_PATH,'screenshots','boxplots_FA_overSession.png')
os.makedirs(os.path.dirname(save_path), exist_ok=True)
try:
    fig_fa.write_image(save_path, scale=2)
    print(f"Saved FA boxplot to {save_path}")
except Exception as e:
    print("Failed to save image. Ensure 'kaleido' is installed (`pip install -U kaleido`). Error:", e)

# Boxplot ADC (tous temps)
adc_vars = adc_lesion_vars + adc_nawm_vars
existing_adc_vars = [v for v in adc_vars if v in df_used.columns]
fig_adc = px.box(
    df_used.melt(id_vars=["Subject"], value_vars=existing_adc_vars),
    x="variable",
    y="value",
    color="variable",
    hover_data=["Subject"],
    title="Boxplots interactifs de ADC (M3,M6,M12,M24,M60)"
)
for variable in existing_adc_vars:
    fig_adc.add_scatter(
        x=[variable]*len(df_used),
        y=df_used.get(variable, pd.Series(dtype=float)),
        mode='markers',
        marker=dict(size=6, opacity=0.6),
        name=f'{variable} points',
        text=df_used['Subject'],
        hovertemplate='Subject=%{text}<br>ADC=%{y}<extra></extra>',
        showlegend=False
    )
fig_adc.show()
save_path = BIDS_DIR=os.path.join(STUDY_PATH,'screenshots','boxplots_ADC_overSession.png')
fig_adc.write_image(save_path, scale=2)

# Descripteurs statistiques FA/ADC (tous temps disponibles)
fa_stats = df_used[[v for v in fa_vars if v in df_used.columns]].agg(['min','max','mean','median','std','count']).T
print("Descripteurs statistiques FA :")
print(fa_stats.round(4).to_csv(sep='\t', index=True))

adc_stats = df_used[[v for v in adc_vars if v in df_used.columns]].agg(['min','max','mean','median','std','count']).T
print("\nDescripteurs statistiques ADC :")
print(adc_stats.round(4).to_csv(sep='\t', index=True))

# Construire df_fa pour comparaisons groupe (concaténé sur tous les temps)
fa_lesion_all = pd.concat([df_used[v].dropna() for v in fa_lesion_vars if v in df_used.columns], ignore_index=True)
fa_nawm_all   = pd.concat([df_used[v].dropna() for v in fa_nawm_vars   if v in df_used.columns], ignore_index=True)

df_fa_all = pd.DataFrame({
    "FA": pd.concat([fa_lesion_all, fa_nawm_all], ignore_index=True),
    "Groupe": (["Lesion"] * len(fa_lesion_all)) + (["NAWM"] * len(fa_nawm_all))
})

fig = px.box(df_fa_all, x="Groupe", y="FA", color="Groupe", title="FA Lesion vs NAWM (all timepoints)")
fig.add_scatter(x=df_fa_all["Groupe"], y=df_fa_all["FA"], mode='markers', marker=dict(size=6, opacity=0.6), showlegend=False)
fig.show()

# t-tests par temps et t-test global (concaténé)
from scipy.stats import ttest_ind
print("\nT-tests par temps (Lesion vs NAWM):")
for t in times:
    a = f"mean_fa_lesion_{t}"
    b = f"mean_fa_nawm_{t}"
    if a in df_used.columns and b in df_used.columns:
        vals_a = df_used[a].dropna()
        vals_b = df_used[b].dropna()
        if len(vals_a)>0 and len(vals_b)>0:
            t_stat, p_val = ttest_ind(vals_a, vals_b, equal_var=False)
            print(f" {t}: n_lesion={len(vals_a)}, n_nawm={len(vals_b)} -> t={t_stat:.3f}, p={p_val:.4g}")
        else:
            print(f" {t}: insufficient data")
    else:
        print(f" {t}: columns missing")

# t-test global concaténé
if len(fa_lesion_all)>0 and len(fa_nawm_all)>0:
    t_stat, p_val = ttest_ind(fa_lesion_all, fa_nawm_all, equal_var=False)
    print(f"\nGlobal (all times concatenated): t={t_stat:.3f}, p={p_val:.4g}")
else:
    print("\nGlobal test: insufficient data")
# ...existing code...

Saved FA boxplot to /NAS/coolio/protocoles/CINOCIS/screenshots/boxplots_FA_overSession.png


Descripteurs statistiques FA :
	min	max	mean	median	std	count
mean_fa_lesion_M3	0.191	0.648	0.4791	0.4908	0.1082	53.0
mean_fa_lesion_M6	0.1143	0.9123	0.4944	0.5082	0.1368	49.0
mean_fa_lesion_M12	0.2944	0.7785	0.4919	0.4931	0.1046	41.0
mean_fa_lesion_M24	0.2353	0.6952	0.4884	0.4909	0.1061	37.0
mean_fa_lesion_M60	0.2717	0.7452	0.4996	0.4763	0.1268	27.0
mean_fa_nawm_M3	0.2472	0.6865	0.5447	0.56	0.0815	115.0
mean_fa_nawm_M6	0.2179	0.7698	0.5553	0.5697	0.0918	115.0
mean_fa_nawm_M12	0.2647	0.6514	0.5191	0.5395	0.0832	97.0
mean_fa_nawm_M24	0.2483	0.6889	0.5279	0.5443	0.0874	102.0
mean_fa_nawm_M60	0.2279	0.6941	0.5226	0.5295	0.0889	93.0


Descripteurs statistiques ADC :
	min	max	mean	median	std	count
mean_adc_lesion_M3	0.0009	0.0042	0.0015	0.0013	0.0006	53.0
mean_adc_lesion_M6	0.0002	0.0034	0.0015	0.0014	0.0006	49.0
mean_adc_lesion_M12	0.0008	0.0032	0.0015	0.0013	0.0005	41.0
mean_adc_lesion_M24	0.0	0.0028	0.0014	0.0013	0.0005	37.0
mean_adc_lesion_M60	0.001	0.0027	0.0016	0.0014	0.0005	27.0
mean


T-tests par temps (Lesion vs NAWM):
 M3: n_lesion=53, n_nawm=115 -> t=-3.930, p=0.0001792
 M6: n_lesion=49, n_nawm=115 -> t=-2.853, p=0.005745
 M12: n_lesion=41, n_nawm=97 -> t=-1.479, p=0.1441
 M24: n_lesion=37, n_nawm=102 -> t=-2.032, p=0.04699
 M60: n_lesion=27, n_nawm=93 -> t=-0.881, p=0.3846

Global (all times concatenated): t=-5.090, p=6.304e-07


In [119]:
# Ligne plot : FA moyenne dans NAWM par label au fil des temps
# Utilise les variables déjà présentes dans le notebook (df_M_long ou df_M3M6_filtered, times, labels_to_report)

# choisir le DataFrame source
df_src = df_M_long if 'df_M_long' in globals() else (df_M3M6_filtered if 'df_M3M6_filtered' in globals() else None)
if df_src is None:
    raise RuntimeError("Aucun DataFrame disponible (df_M_long ou df_M3M6_filtered).")

# récupérer times et labels (fallback si non présents)
times_list = globals().get('times', ['M3','M6','M12','M24','M60'])
labels = globals().get('labels_to_report', [3,4,5,6])

# construire dataframe de synthèse : moyenne (et sem) par label x temps
rows = []
for lab in labels:
    for t in times_list:
        col = f"mean_fa_nawm_label{lab}_{t}"
        if col in df_src.columns:
            vals = df_src[col].dropna()
            mean_val = vals.mean() if len(vals)>0 else np.nan
            sem_val = vals.std(ddof=1)/np.sqrt(len(vals)) if len(vals)>1 else np.nan
            n = len(vals)
        else:
            mean_val = np.nan
            sem_val = np.nan
            n = 0
        rows.append({'label': f'label{lab}', 'time': t, 'mean_fa': mean_val, 'sem': sem_val, 'n': n})

df_plot = pd.DataFrame(rows)

# vérifier qu'il y a des données utilisables
if df_plot['n'].sum() == 0:
    raise RuntimeError("Aucune valeur disponible pour les colonnes mean_fa_nawm_label{lab}_{time} dans le DataFrame source.")

# ordonner les temps selon times_list
df_plot['time'] = pd.Categorical(df_plot['time'], categories=times_list, ordered=True)
df_plot = df_plot.sort_values(['label','time'])

# plotly : ligne par label avec barres d'erreur (sem)
fig = px.line(
    df_plot,
    x='time',
    y='mean_fa',
    color='label',
    #markers=True,
    error_y='sem',
    category_orders={'time': times_list},
    title='FA moyenne dans NAWM par label au cours du temps'
)
fig.update_layout(yaxis_title='Mean FA (NAWM)', xaxis_title='Temps', legend_title='Label')
fig.show()

In [None]:
import os
import glob
import nibabel as nib
import numpy as np
import pandas as pd

# sessions à traiter
sessions = ['M3', 'M6', 'M12', 'M24', 'M60']

# utiliser la variable DERIVATIVES_DIR si elle existe dans le notebook, sinon fallback
DERIV = globals().get('DERIVATIVES_DIR', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/')

results = []
for ses in sessions:
    pattern = os.path.join(DERIVATIVES_DIR, 'registration', 'sub-*', f'ses-{ses}', f'sub-*_{ses}*_dwi_fa.rec.nii.gz')
    print(f"{pattern}")
    fa_files = sorted(glob.glob(pattern))
    for fa_path in fa_files:
        print(f"{fa_path}")
        # récupérer subject (on remonte deux répertoires)
        SUBJ = os.path.basename(os.path.dirname(os.path.dirname(fa_path)))
        label_path = os.path.join(DERIV, 'segmentation7.1', SUBJ, f'ses-{ses}', f'T2_ses-{ses}.lps_pred_seg_labeled.nii.gz')

        if not os.path.exists(label_path):
            print(f"[SKIP] label file missing for {SUBJ} {ses}: {label_path}")
            continue

        try:
            fa = nib.load(fa_path).get_fdata()
            labels = nib.load(label_path).get_fdata()
        except Exception as e:
            print(f"[ERROR] loading for {SUBJ} {ses}: {e}")
            continue

        # moyenne globale (dans tout label non-nul)
        mask_all = (labels != 0) & (fa != 0)
        if np.any(mask_all):
            results.append({
                'Subject': SUBJ,
                'session': ses,
                'label': 'ALL',
                'n_vox': int(np.sum(mask_all)),
                'mean_fa': float(np.nanmean(fa[mask_all]))
            })
        else:
            results.append({
                'Subject': SUBJ,
                'session': ses,
                'label': 'ALL',
                'n_vox': 0,
                'mean_fa': np.nan
            })

        # moyennes par label (conserver uniquement 3,4,5,6)
        uniq = np.unique(labels)
        for lbl in uniq:
            if int(lbl) not in (3, 4, 5, 6):
                continue
            mask = (labels == lbl) & (fa != 0)
            if np.any(mask):
                mean_val = float(np.nanmean(fa[mask]))
                nvox = int(np.sum(mask))
            else:
                mean_val = np.nan
                nvox = 0
            results.append({
                'Subject': SUBJ,
                'session': ses,
                'label': int(lbl),
                'n_vox': nvox,
                'mean_fa': mean_val
            })

        # moyenne combinée pour les labels 3-6
        mask_3_6 = np.isin(labels, [3,4,5,6]) & (fa != 0)
        if np.any(mask_3_6):
            results.append({
                'Subject': SUBJ,
                'session': ses,
                'label': '3-6',
                'n_vox': int(np.sum(mask_3_6)),
                'mean_fa': float(np.nanmean(fa[mask_3_6]))
            })
        else:
            results.append({
                'Subject': SUBJ,
                'session': ses,
                'label': '3-6',
                'n_vox': 0,
                'mean_fa': np.nan
            })

# DataFrame et sauvegarde
df_out = pd.DataFrame(results)
base_dir = globals().get('STUDY_PATH', '/NAS/coolio/protocoles/CINOCIS')
out_file = os.path.join(base_dir, 'fa_means_per_label_by_session.tsv')
os.makedirs(os.path.dirname(out_file), exist_ok=True)
df_out.to_csv(out_file, sep='\t', index=False)
print(f"Saved full results to {out_file}")


Saved full results to /NAS/coolio/protocoles/CINOCIS/fa_means_per_label_by_session.tsv


In [None]:

# Pivot : créer colonnes par session x label (ex: mean_fa_label3_M3, mean_fa_labels3-6_M6, ...)
# Normaliser le nom de label en string
df_out['label_str'] = df_out['label'].apply(lambda x: f"label{int(x)}" if isinstance(x, (int, np.integer)) else (f"labels_{x}" if isinstance(x, str) else str(x)))
# construire nom de colonne
df_out['col'] = df_out.apply(lambda r: f"mean_fa_{r['label_str']}_{r['session']}", axis=1)

# pivot (sujet en ligne, colonnes = col, valeurs = mean_fa)
df_pivot = df_out.pivot_table(index='Subject', columns='col', values='mean_fa', aggfunc='first').reset_index()

# sauvegarde pivot
pivot_out = os.path.join(base_dir, 'fa_means_labels3-6_by_subject_and_session.tsv')
df_pivot.to_csv(pivot_out, sep='\t', index=False)
print(f"Saved pivot (subjects x session x labels) to {pivot_out}")
print(df_pivot.head(20).to_string(index=False))

In [131]:
# Moyenne des colonnes numériques de df_pivot
df_pivot_means = df_pivot.select_dtypes(include=[np.number]).mean()
print(df_pivot_means.round(6))

label3    0.510385
label4    0.485295
label5    0.464709
label6    0.446683
dtype: float64


In [33]:
# Graphique FA
#df_M3M6_filtered=df_M3M6_filtered_nonan
df_M3M6_filtered = df_M3M6 

fig_fa = px.box(
    df_M3M6_filtered.melt(id_vars=["Subject"], value_vars=["mean_fa_lesion_M3", "mean_fa_nawm_M3", "mean_fa_lesion_M6", "mean_fa_nawm_M6", "mean_faM3_lesion_M6_denovo"]),
    x="variable",
    y="value",
    color="variable",
    hover_data=["Subject"],
    title="Boxplots interactifs de FA_NAWM et FA_lesion"
)
for variable in ["mean_fa_lesion_M3", "mean_fa_nawm_M3", "mean_fa_lesion_M6", "mean_fa_nawm_M6", "mean_faM3_lesion_M6_denovo"]:
    fig_fa.add_scatter(
        x=[variable]*len(df_M3M6_filtered),
        y=df_M3M6_filtered[variable],
        mode='markers',
        marker=dict(size=6, opacity=0.6),
        name=f'{variable} points',
        text=df_M3M6_filtered['Subject'],
        hovertemplate='Subject=%{text}<br>FA=%{y}<extra></extra>',
        showlegend=False
    )
fig_fa.show()

# Graphique ADC
fig_adc = px.box(
    df_M3M6_filtered.melt(id_vars=["Subject"], value_vars=["mean_adc_lesion_M3", "mean_adc_nawm_M3", "mean_adc_lesion_M6", "mean_adc_nawm_M6", "mean_adcM3_lesion_M6_denovo"]),
    x="variable",
    y="value",
    color="variable",
    hover_data=["Subject"],
    title="Boxplots interactifs de ADC_NAWM et ADC_lesion"
)
for variable in ["mean_adc_lesion_M3", "mean_adc_nawm_M3", "mean_adc_lesion_M6", "mean_adc_nawm_M6", "mean_adcM3_lesion_M6_denovo"]:
    fig_adc.add_scatter(
        x=[variable]*len(df_M3M6_filtered),
        y=df_M3M6_filtered[variable],
        mode='markers',
        marker=dict(size=6, opacity=0.6),
        name=f'{variable} points',
        text=df_M3M6_filtered['Subject'],
        hovertemplate='Subject=%{text}<br>ADC=%{y}<extra></extra>',
        showlegend=False
    )
fig_adc.show()

# Ajouter les descripteurs statistiques pour chaque variable FA
fa_stats = df_M3M6_filtered[["mean_fa_lesion_M3", "mean_fa_nawm_M3", "mean_fa_lesion_M6", "mean_fa_nawm_M6", "mean_faM3_lesion_M6_denovo"]].agg(['min', 'max', 'mean', 'median', 'std','count']).T
print("Descripteurs statistiques FA :")
print(fa_stats.round(4).to_csv(sep='\t', index=True))

# Ajouter les descripteurs statistiques pour chaque variable ADC
adc_stats = df_M3M6_filtered[["mean_adc_lesion_M3", "mean_adc_nawm_M3", "mean_adc_lesion_M6", "mean_adc_nawm_M6", "mean_adcM3_lesion_M6_denovo"]].agg(['min', 'max', 'mean', 'median', 'std','count']).T
print("\nDescripteurs statistiques ADC :")
print(adc_stats.round(4).to_csv(sep='\t', index=True))


Descripteurs statistiques FA :
	min	max	mean	median	std	count
mean_fa_lesion_M3	0.1963	0.7748	0.4861	0.4904	0.1178	67.0
mean_fa_nawm_M3	0.2591	0.6796	0.5388	0.5551	0.0817	113.0
mean_fa_lesion_M6	0.1362	0.9344	0.5031	0.5104	0.1333	57.0
mean_fa_nawm_M6	0.2228	0.7444	0.5485	0.5598	0.091	114.0
mean_faM3_lesion_M6_denovo	0.1222	0.7573	0.494	0.5108	0.1412	47.0


Descripteurs statistiques ADC :
	min	max	mean	median	std	count
mean_adc_lesion_M3	0.0005	0.0036	0.0015	0.0013	0.0006	67.0
mean_adc_nawm_M3	0.0009	0.0033	0.0014	0.0013	0.0004	113.0
mean_adc_lesion_M6	0.0002	0.0035	0.0015	0.0014	0.0006	57.0
mean_adc_nawm_M6	0.0007	0.0037	0.0014	0.0013	0.0004	114.0
mean_adcM3_lesion_M6_denovo	0.0003	0.0023	0.0014	0.0013	0.0004	48.0



In [None]:
import pandas as pd
import plotly.express as px

# Concaténer les valeurs FA pour les lésions et NAWM à M3 et M6
fa_lesion = pd.concat([df_M3M6_filtered["mean_fa_lesion_M3"].dropna(), df_M3M6_filtered["mean_fa_lesion_M6"].dropna()])
fa_nawm = pd.concat([df_M3M6_filtered["mean_fa_nawm_M3"].dropna(), df_M3M6_filtered["mean_fa_nawm_M6"].dropna()])
fa_denovo = df_M3M6_filtered["mean_faM3_lesion_M6_denovo"].dropna()

subjects_lesion = pd.concat([
    df_M3M6_filtered.loc[df_M3M6_filtered["mean_fa_lesion_M3"].notna(), "Subject"],
    df_M3M6_filtered.loc[df_M3M6_filtered["mean_fa_lesion_M6"].notna(), "Subject"],
    
])
subjects_nawm = pd.concat([
    df_M3M6_filtered.loc[df_M3M6_filtered["mean_fa_nawm_M3"].notna(), "Subject"],
    df_M3M6_filtered.loc[df_M3M6_filtered["mean_fa_nawm_M6"].notna(), "Subject"]
])
subjects_denovo = df_M3M6_filtered.loc[df_M3M6_filtered["mean_faM3_lesion_M6_denovo"].notna(), "Subject"]

# Créer un DataFrame pour le graphique
df_fa = pd.DataFrame({
    "FA": pd.concat([fa_lesion, fa_nawm, fa_denovo], ignore_index=True),
    "Groupe": (["Lésion"] * len(fa_lesion)) + (["NAWM"] * len(fa_nawm)) + (["Lésion dé novo"] * len(fa_denovo)),
    "Subject": pd.concat([subjects_lesion, subjects_nawm, subjects_denovo], ignore_index=True)
})

# Afficher le boxplot interactif
fig = px.box(df_fa, x="Groupe", y="FA", color="Groupe", title="FA concaténée M3+M6 : Lésion vs NAWM vs Lésion dé novo")
fig.add_scatter(
    x=df_fa["Groupe"],
    y=df_fa["FA"],
    mode='markers',
    marker=dict(size=6, opacity=0.6),
    text=df_fa["Subject"],
    hovertemplate='Subject=%{text}<br>FA=%{y}<extra></extra>',
    showlegend=False
)
fig.show()

# Afficher les estimateurs pour chaque groupe
stats = df_fa.groupby("Groupe")["FA"].agg(['min', 'max', 'median', 'std', 'mean', 'count'])
print(stats.round(2))


                 min   max  median   std  mean  count
Groupe                                               
Lésion          0.14  0.93    0.50  0.12  0.49    124
Lésion dé novo  0.12  0.76    0.51  0.14  0.49     47
NAWM            0.22  0.74    0.56  0.09  0.54    227


In [32]:
t_stat, p_value = ttest_ind(df_M3M6_filtered["mean_fa_lesion_M3"].dropna(), df_M3M6_filtered["mean_fa_lesion_M6"].dropna(), equal_var=False)
print(f"difference M3 M6 fa lesion : ({round(t_stat,3)}, {round(p_value,3)})")

t_stat, p_value = ttest_ind(df_M3M6_filtered["mean_adc_lesion_M3"].dropna(), df_M3M6_filtered["mean_adc_lesion_M6"].dropna(), equal_var=False)
print(f"difference M3 M6 adc lesion : ({round(t_stat,3)}, {round(p_value,3)})")

t_stat, p_value = ttest_ind(df_M3M6_filtered["mean_fa_nawm_M3"].dropna(), df_M3M6_filtered["mean_fa_nawm_M6"].dropna(), equal_var=False)
print(f"difference M3 M6 fa nawm : ({round(t_stat,3)}, {round(p_value,3)})")

t_stat, p_value = ttest_ind(df_M3M6_filtered["mean_adc_nawm_M3"].dropna(), df_M3M6_filtered["mean_adc_nawm_M6"].dropna(), equal_var=False)
print(f"difference M3 M6 adc nawm : ({round(t_stat,3)}, {round(p_value,3)})")

t_stat, p_value = ttest_ind(
    df_M3M6_filtered["mean_fa_lesion_M3"].dropna(),
    df_M3M6_filtered["mean_fa_nawm_M3"].dropna(),
    equal_var=False
)
print(f"difference de fa entre lesion Vs NAWM à M3: ({round(t_stat,3)}, {round(p_value,3)})")

t_stat, p_value = ttest_ind(
    df_M3M6_filtered["mean_fa_lesion_M6"].dropna(),
    df_M3M6_filtered["mean_fa_nawm_M6"].dropna(),
    equal_var=False
)
print(f"difference de fa entre lesion Vs NAWM à M6: ({round(t_stat,3)}, {round(p_value,3)})")

t_stat, p_value = ttest_ind(
    pd.concat([df_M3M6_filtered["mean_fa_lesion_M3"].dropna(), df_M3M6_filtered["mean_fa_lesion_M6"].dropna()]),
    pd.concat([df_M3M6_filtered["mean_fa_nawm_M3"].dropna(), df_M3M6_filtered["mean_fa_nawm_M6"].dropna()]),
    equal_var=False
)
print(f"difference de fa entre lesion Vs NAWM : ({round(t_stat,3)}, {round(p_value,3)})")

t_stat, p_value = ttest_ind(
    pd.concat([df_M3M6_filtered["mean_adc_lesion_M3"].dropna(), df_M3M6_filtered["mean_adc_lesion_M6"].dropna()]),
    pd.concat([df_M3M6_filtered["mean_adc_nawm_M3"].dropna(), df_M3M6_filtered["mean_adc_nawm_M6"].dropna()]),
    equal_var=False
)
print(f"difference de adc entre lesion Vs NAWM : ({round(t_stat,3)}, {round(p_value,3)})")

t_stat, p_value = ttest_ind(
    pd.concat([df_M3M6_filtered["mean_fa_nawm_M3"].dropna(), df_M3M6_filtered["mean_fa_nawm_M6"].dropna()]),
    df_M3M6_filtered["mean_faM3_lesion_M6_denovo"].dropna(),
    equal_var=False
)
print(f"difference de fa pour prediction : ({round(t_stat,3)}, {round(p_value,3)})")

t_stat, p_value = ttest_ind(
    pd.concat([df_M3M6_filtered["mean_adc_nawm_M3"].dropna(), df_M3M6_filtered["mean_adc_nawm_M6"].dropna()]),
    df_M3M6_filtered["mean_adcM3_lesion_M6_denovo"].dropna(),
    equal_var=False
)
print(f"difference de adc pour prediction : ({round(t_stat,3)}, {round(p_value,3)})")


difference M3 M6 fa lesion : (-0.747, 0.457)
difference M3 M6 adc lesion : (-0.373, 0.71)
difference M3 M6 fa nawm : (-0.851, 0.395)
difference M3 M6 adc nawm : (0.753, 0.452)
difference de fa entre lesion Vs NAWM à M3: (-3.228, 0.002)
difference de fa entre lesion Vs NAWM à M6: (-2.316, 0.023)
difference de fa entre lesion Vs NAWM : (-3.948, 0.0)
difference de adc entre lesion Vs NAWM : (1.364, 0.174)
difference de fa pour prediction : (2.325, 0.024)
difference de adc pour prediction : (0.295, 0.769)


In [21]:
# Effectuer un test t pour comparer les deux groupesv
t_stat, p_value = ttest_ind([df_M3M6["mean_fa_lesion_M3"].dropna(),df_M3M6["mean_fa_lesion_M6"].dropna()], fa_lesion_values.dropna(), equal_var=False)

# Afficher les résultats du test t
print(f"T-statistic: {t_stat}, P-value: {p_value}")

NameError: name 'fa_lesion_values' is not defined

In [None]:
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
import os
%matplotlib inline
import matplotlib.pyplot as plt

# # Lire les fichiers avec nibabel
# #images = [ for file_path in file_paths]
# # Soustraire l'image '801T2W_SPAIR2D_T2w_masklesioncervicalcord_dilated' de 'seg_cerv_rpi_labeled_eroded'
# dilated_les_M6 = nib.load('/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/sct_4.3_fused2/sub-012/ses-M6/sub-012_ses-M6_acq-801T2W_SPAIR2D_T2w_masklesioncervicalcord_dilated.nii.gz').get_fdata()
# eroded_les_M6 = nib.load('/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/sct_4.3_fused2/sub-012/ses-M6/sub-012_ses-M6_acq-801T2W_SPAIR2D_T2w_masklesioncervicalcord_eroded.nii.gz').get_fdata()
# segmented_vert_M6 = nib.load('/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/sct_4.3_fused2/sub-012/ses-M6/seg_cerv_rpi_labeled_eroded.nii.gz').get_fdata()


# # Ne garder que les labels 3 à 6 dans dilated_img
# filtered_vert_data = np.where((segmented_vert_M6 >= 3) & (segmented_vert_M6 <= 6), 1, 0)
# result_vertminusles_data = filtered_vert_data - dilated_les_M6 - dilated_les_M3

# Chemin des fichiers à récupérer
#file_pattern = r'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/*/fa_NAWM_mean.txt'
file_pattern = r'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/*/fa_M3onM6.nii.gz'

# Récupérer les fichiers avec une taille supérieure à 0 octet
valid_files = [file for file in glob.glob(file_pattern) if os.path.getsize(file) > 0]

# Afficher les fichiers récupérés
print("Fichiers valides :", valid_files)
# Créer un dictionnaire pour stocker les données
data = {}


# Afficher des informations sur chaque fichier chargé
for i, img in enumerate(valid_files[:]):    
    SUBJ = img.split('/')[8]
    # print(f"Traitement du fichier ${SUBJ} {i+1}/{len(valid_files[:])}: {img}")
    if os.path.exists(img.replace('fa_M3onM6.nii.gz','fa_M3onM6.rotated.nii.gz')) :
        DWI_quant = nib.load(img.replace('fa_M3onM6.nii.gz','fa_M3onM6.rotated.nii.gz')).get_fdata()
        print(f"Vérification des fichiers : {img.replace('fa_M3onM6.nii.gz','fa_M3onM6.rotated.nii.gz')}")
    else :
        DWI_quant = nib.load(img).get_fdata()

    # Calculer la moyenne de DWI_quant pour les voxels dans filtered_vert_data
    nawm_name = f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{SUBJ}/NAWM.nii.gz'
    lesion_name = f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{SUBJ}/mask_lesion_M6_eroded_3_6.nii.gz'
    
    if os.path.exists(nawm_name) and os.path.exists(lesion_name):
        nawm_data = nib.load(nawm_name).get_fdata()
        mean_nawm = np.mean(DWI_quant[(nawm_data == 1) & (DWI_quant != 0)])
        
        filtered_vert_data = nib.load(lesion_name).get_fdata()    
        # Calculer la moyenne de DWI_quant pour les voxels dans result_vertminusles_data
        print(np.sum(filtered_vert_data))
        if np.sum(filtered_vert_data) == 0:
            mean_lesion = 0
        else:
            mean_lesion = np.mean(DWI_quant[(filtered_vert_data == 1) & (DWI_quant != 0)])

        # Ajouter les résultats au dictionnaire
        data[SUBJ] = {'mean_nawm': mean_nawm, 'mean_lesion': mean_lesion}

# Créer un DataFrame à partir du dictionnaire
df_results = pd.DataFrame.from_dict(data, orient='index')

# Afficher le DataFrame
print(df_results)

# # Afficher l'histogramme
# plt.hist(DWI_quant[filtered_vert_data == 1], bins=50, color='blue', alpha=0.7)
# plt.title("Histogramme des valeurs de DWI_quant pour NAWM")
# plt.xlabel("Valeurs de DWI_quant")
# plt.ylabel("Fréquence")
# plt.show()    

Fichiers valides : ['/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-078/fa_M3onM6.nii.gz', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-126/fa_M3onM6.nii.gz', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-107/fa_M3onM6.nii.gz', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-048/fa_M3onM6.nii.gz', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-010/fa_M3onM6.nii.gz', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-040/fa_M3onM6.nii.gz', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-013/fa_M3onM6.nii.gz', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-090/fa_M3onM6.nii.gz', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-008/fa_M3onM6.nii.gz', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-098/fa_M3onM6.nii.gz', '/NAS/coolio/protocoles/CINOCIS/BIDS/der


Mean of empty slice.


invalid value encountered in scalar divide



0.0
Vérification des fichiers : /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-126/fa_M3onM6.rotated.nii.gz
0.0
0.0
Vérification des fichiers : /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-048/fa_M3onM6.rotated.nii.gz
Vérification des fichiers : /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-010/fa_M3onM6.rotated.nii.gz
251.0
Vérification des fichiers : /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-040/fa_M3onM6.rotated.nii.gz
140.0
Vérification des fichiers : /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-013/fa_M3onM6.rotated.nii.gz
153.0
Vérification des fichiers : /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-090/fa_M3onM6.rotated.nii.gz
0.0
Vérification des fichiers : /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-008/fa_M3onM6.rotated.nii.gz
70.0
Vérification des fichiers : /NAS/coolio/protocoles/CINOCIS/BIDS/derivati

In [51]:
from scipy.stats import ttest_ind

import plotly.express as px

# Créer un DataFrame avec les données triées
# df_results = df_results.sort_values(by="FA_NAWM")

# Créer un graphique interactif avec Plotly
fig = px.box(
    df_results.melt(id_vars=["Subject"], value_vars=["mean_nawm", "mean_lesion"]),
    x="variable",
    y="value",
    color="variable",
    hover_data=["Subject"],
    labels={"value": "FA Values", "variable": "Group"},
    title="Boxplots interactifs de FA_NAWM et FA_lesion"
)

# Afficher le graphique
fig.show()

# # Effectuer un test t pour comparer les deux groupes
# t_stat, p_value = ttest_ind(fa_nawm_values.dropna(), fa_lesion_values.dropna(), equal_var=False)

# # Afficher les résultats du test t
# print(f"T-statistic: {t_stat}, P-value: {p_value}")

NameError: name 'df_results' is not defined

In [40]:
import glob
import os
import pandas as pd

# Chemin des fichiers à récupérer
file_pattern = r'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/*/fa_NAWM_mean.txt'

# Récupérer les fichiers avec une taille supérieure à 0 octet
valid_files = [file for file in glob.glob(file_pattern) if os.path.getsize(file) > 0]

# Afficher les fichiers récupérés
print("Fichiers valides :", valid_files)
# Créer un dictionnaire pour stocker les données
data = {}

# Parcourir les fichiers valides
for file in valid_files:
    # Extraire la clé "sub-..." du chemin
    key = os.path.basename(os.path.dirname(file))
    
    # Lire la valeur FA_NAWM contenue dans le fichier
    value = float(open(file, 'r').read().strip())
    
    # Ajouter la clé et la valeur au dictionnaire
    #dta[key] = value

    # Chemin du fichier fa_lesion correspondant
    lesion_file = os.path.join(os.path.dirname(file), 'fa_lesion_M6_mean.txt')
    
    # Vérifier si le fichier fa_lesion existe et a une taille > 0
    if os.path.exists(lesion_file) and os.path.getsize(lesion_file) > 0:
        lesion_value = float(open(lesion_file, 'r').read().strip())
    else:
        lesion_value = 0  # Valeur par défaut si le fichier n'existe pas ou est vide
    
    # Ajouter la valeur fa_lesion au dictionnaire
    if value > 0 and lesion_value > 0:
        data[key] = {'FA_NAWM': value, 'FA_lesion': lesion_value}


# Créer un DataFrame à partir du dictionnaire
df_FA_NAWM = pd.DataFrame.from_dict(data, orient='index').reset_index()
df_FA_NAWM.rename(columns={'index': 'Subject'}, inplace=True)

# Afficher le DataFrame
print(df_FA_NAWM)

Fichiers valides : ['/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-078/fa_NAWM_mean.txt', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-126/fa_NAWM_mean.txt', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-107/fa_NAWM_mean.txt', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-010/fa_NAWM_mean.txt', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-040/fa_NAWM_mean.txt', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-013/fa_NAWM_mean.txt', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-090/fa_NAWM_mean.txt', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-008/fa_NAWM_mean.txt', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-098/fa_NAWM_mean.txt', '/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-111/fa_NAWM_mean.txt', '/NAS/coolio/protocoles/CINOCIS/BIDS/der

In [56]:
import nibabel as nib
import os
import pandas as pd
import glob
import numpy as np

# Initialiser une liste pour stocker les résultats
results = []
fa_files = glob.glob('/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-*/fa_M3onM6.nii.gz')
fa_files = sorted(fa_files)

# Parcourir tous les sujets possibles
for fa_path in fa_files:
    subj = os.path.basename(os.path.dirname(fa_path))
    # Pour chaque type de fichier (fa, adc)
    for modality in ['fa', 'adc']:
        modality_path = f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/{modality}_M3onM6.lps.nii.gz'
        if os.path.exists(modality_path):
            nawm_path = f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/NAWM_lps.nii.gz'
            mask_lesion_path = f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/mask_lesion_M6_eroded_3_6_lps.nii.gz'
            print(f'patient {subj} - {modality}')
            if os.path.exists(nawm_path) and os.path.exists(mask_lesion_path):
                nawm_img = nib.load(nawm_path)
                mask_lesion_img = nib.load(mask_lesion_path)
                modality_img = nib.load(modality_path)

                nawm_data = nawm_img.get_fdata()
                mask_lesion_data = mask_lesion_img.get_fdata()
                modality_data = modality_img.get_fdata()

                mean_nawm = np.nanmean(modality_data[(nawm_data == 1) & (modality_data != 0)])
                mean_lesion = np.nanmean(modality_data[(mask_lesion_data == 1) & (modality_data != 0)])
                print(f'Mean {modality.upper()}_NAWM for {subj}: {mean_nawm}')
                print(f'Mean {modality.upper()}_lesion for {subj}: {mean_lesion}')
                results.append({'Subject': subj, f'{modality.upper()}_NAWM': mean_nawm, f'{modality.upper()}_lesion': mean_lesion})
                
# Créer un DataFrame à partir des résultats
df_results = pd.DataFrame(results)

# Afficher le DataFrame
print(df_results)


patient sub-003 - fa
patient sub-003 - adc
patient sub-004 - fa
Mean FA_NAWM for sub-004: 0.5697818013995174
Mean FA_lesion for sub-004: 0.6115499159746003
patient sub-004 - adc
Mean ADC_NAWM for sub-004: 0.0012156045936207204
Mean ADC_lesion for sub-004: 0.0009780332401166099
patient sub-005 - fa
Mean FA_NAWM for sub-005: 0.37307082658323565
Mean FA_lesion for sub-005: 0.4622087193501962
patient sub-005 - adc
Mean ADC_NAWM for sub-005: 0.0017567409724749713
Mean ADC_lesion for sub-005: 0.001218340901746037
patient sub-006 - fa
Mean FA_NAWM for sub-006: 0.43876212437468676
Mean FA_lesion for sub-006: 0.45259168381413456
patient sub-006 - adc
Mean ADC_NAWM for sub-006: 0.0015529629847512018
Mean ADC_lesion for sub-006: 0.0010216941003116074
patient sub-007 - fa
patient sub-007 - adc
patient sub-008 - fa
Mean FA_NAWM for sub-008: 0.398540123981588
Mean FA_lesion for sub-008: 0.3992381704705102
patient sub-008 - adc
Mean ADC_NAWM for sub-008: 0.0014434679080937605
Mean ADC_lesion for sub-


Mean of empty slice



Mean FA_NAWM for sub-009: 0.4305846143468445
Mean FA_lesion for sub-009: nan
patient sub-009 - adc
Mean ADC_NAWM for sub-009: 0.0015198876478733465
Mean ADC_lesion for sub-009: nan
patient sub-010 - fa
Mean FA_NAWM for sub-010: 0.35724487081988826
Mean FA_lesion for sub-010: 0.33202400566097273
patient sub-010 - adc
Mean ADC_NAWM for sub-010: 0.0019372648449516242
Mean ADC_lesion for sub-010: 0.0015668623755589514
patient sub-011 - fa
Mean FA_NAWM for sub-011: nan
Mean FA_lesion for sub-011: nan
patient sub-011 - adc



Mean of empty slice



Mean ADC_NAWM for sub-011: 0.0016095333330173262
Mean ADC_lesion for sub-011: 0.0014555944196347678
patient sub-012 - fa
Mean FA_NAWM for sub-012: 0.6224320192032108
Mean FA_lesion for sub-012: 0.6981053757217696
patient sub-012 - adc
Mean ADC_NAWM for sub-012: 0.001021016579501792
Mean ADC_lesion for sub-012: 0.0007946891051207511
patient sub-013 - fa
Mean FA_NAWM for sub-013: 0.26110341762741135
Mean FA_lesion for sub-013: 0.1508745139623
patient sub-013 - adc
Mean ADC_NAWM for sub-013: 0.0024151773299898563
Mean ADC_lesion for sub-013: 0.0028946413278117095
patient sub-014 - fa
Mean FA_NAWM for sub-014: 0.48170071081327187
Mean FA_lesion for sub-014: 0.6078253674010435
patient sub-014 - adc
Mean ADC_NAWM for sub-014: 0.0011225376418328645
Mean ADC_lesion for sub-014: 0.0010667241270615098
patient sub-015 - fa
Mean FA_NAWM for sub-015: 0.4269647807489128
Mean FA_lesion for sub-015: nan
patient sub-015 - adc
Mean ADC_NAWM for sub-015: 0.0015806987090841939
Mean ADC_lesion for sub-015:

In [54]:
fa_files = glob.glob('/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-*/fa_M3onM6.nii.gz')
fa_files = sorted(fa_files)

# Parcourir tous les sujets possibles
for fa_path in fa_files:
    subj = os.path.basename(os.path.dirname(fa_path))
    # Pour chaque type de fichier (fa, adc)
    for modality in ['fa', 'adc']:
        if os.path.exists(f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/{modality}_M3onM6.rotated.nii.gz'):
            modality_path = f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/{modality}_M3onM6.rotated.nii.gz'
            print(f'mri_convert {modality_path} --out_orientation LPS {modality_path.replace(".rotated.nii.gz", ".lps.nii.gz")}  ')
        elif os.path.exists(f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/{modality}_M3onM6.nii.gz'):
            modality_path = f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/{modality}_M3onM6.nii.gz'
            print(f'mri_convert {modality_path} --out_orientation LPS {modality_path.replace(".nii.gz", ".lps.nii.gz")}  ')
        else:
            print(f'Fichier introuvable pour {subj} et {modality}')


mri_convert /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-003/fa_M3onM6.rotated.nii.gz --out_orientation LPS /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-003/fa_M3onM6.lps.nii.gz  
mri_convert /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-003/adc_M3onM6.rotated.nii.gz --out_orientation LPS /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-003/adc_M3onM6.lps.nii.gz  
mri_convert /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-004/fa_M3onM6.nii.gz --out_orientation LPS /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-004/fa_M3onM6.lps.nii.gz  
mri_convert /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-004/adc_M3onM6.nii.gz --out_orientation LPS /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-004/adc_M3onM6.lps.nii.gz  
mri_convert /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-005/fa_M3o

In [61]:
fa_files = glob.glob('/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-*/fa_M3onM6.nii.gz')
fa_files = sorted(fa_files)

# Parcourir tous les sujets possibles
for fa_path in fa_files:
    subj = os.path.basename(os.path.dirname(fa_path))
    if os.path.exists(f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/fa_M3onM6.lps.nii.gz') and os.path.exists(f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/adc_M3onM6.lps.nii.gz') and os.path.exists(f'/NAS/coolio/protocoles/CINOCIS/BIDS/{subj}/ses-M3/anat/recM3onM6Warped.nii.gz') and os.path.exists(f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/NAWM_lps.nii.gz') and os.path.exists(f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/mask_lesion_M6_eroded_3_6_lps.nii.gz'):
        print(f'freeview -v /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/fa_M3onM6.lps.nii.gz:grayscale=0.2,0.7 -v /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/adc_M3onM6.lps.nii.gz:grayscale=0.001,0.005 -v /NAS/coolio/protocoles/CINOCIS/BIDS/{subj}/ses-M3/anat/recM3onM6Warped.nii.gz -v /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/NAWM_lps.nii.gz:colormap=lut   /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/mask_lesion_M6_eroded_3_6_lps.nii.gz:colormap=lut:lut=ReducedLabels4')
    

freeview -v /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-004/fa_M3onM6.lps.nii.gz:grayscale=0.2,0.7 -v /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-004/adc_M3onM6.lps.nii.gz:grayscale=0.001,0.005 -v /NAS/coolio/protocoles/CINOCIS/BIDS/sub-004/ses-M3/anat/recM3onM6Warped.nii.gz -v /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-004/NAWM_lps.nii.gz:colormap=lut   /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-004/mask_lesion_M6_eroded_3_6_lps.nii.gz:colormap=lut:lut=ReducedLabels4
freeview -v /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-005/fa_M3onM6.lps.nii.gz:grayscale=0.2,0.7 -v /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-005/adc_M3onM6.lps.nii.gz:grayscale=0.001,0.005 -v /NAS/coolio/protocoles/CINOCIS/BIDS/sub-005/ses-M3/anat/recM3onM6Warped.nii.gz -v /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-005/NAWM_lps.nii.gz:

In [80]:
fa_files = glob.glob('/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/*/fa_M3onM6.nii.gz')
fa_files = sorted(fa_files)

# Parcourir tous les sujets possibles
for fa_path in fa_files:
    subj = os.path.basename(os.path.dirname(fa_path))
    
    files_dir1 = glob.glob(f'/NAS/coolio/protocoles/CINOCIS/BIDS/{subj}/ses-M6/anat/*SPAIR2D_T2w*.nii.gz')
    fa_files = glob.glob(f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/fa*.nii.gz')
    adc_file = glob.glob(f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{subj}/adc*.nii.gz')
    print(f"freeview {files_dir1[0]} {fa_files[0]} {adc_file[0]}")

freeview /NAS/coolio/protocoles/CINOCIS/BIDS/sub-003/ses-M6/anat/sub-003_ses-M6_acq-1901T2W_SPAIR2D_T2w.nii.gz /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-003/fa_M3onM6.nii.gz /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-003/adc_M3onM6.nii.gz
freeview /NAS/coolio/protocoles/CINOCIS/BIDS/sub-004/ses-M6/anat/sub-004_ses-M6_acq-801T2W_SPAIR2D_T2w.nii.gz /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-004/fa_M3onM6.nii.gz /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-004/adc_M3onM6.nii.gz
freeview /NAS/coolio/protocoles/CINOCIS/BIDS/sub-005/ses-M6/anat/sub-005_ses-M6_acq-801T2W_SPAIR2D_T2w.nii.gz /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-005/fa_M3onM6.nii.gz /NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-005/adc_M3onM6.nii.gz
freeview /NAS/coolio/protocoles/CINOCIS/BIDS/sub-006/ses-M6/anat/sub-006_ses-M6_acq-1801T2W_SPAIR2D_T2w.nii.gz /NAS/cool

In [None]:
import glob

SUBJECT = 'sub-091'
print(df_results[df_results['Subject'] == SUBJECT])


# Afficher les fichiers du premier répertoire
files_dir1 = glob.glob(f'/NAS/coolio/protocoles/CINOCIS/BIDS/{SUBJECT}/ses-M6/anat/*SPAIR2D_T2w*.nii.gz')
for file in files_dir1:
    print(file)


# Afficher les fichiers du premier répertoire
files_dir1 = glob.glob(f'/NAS/coolio/protocoles/CINOCIS/BIDS/{SUBJECT}/ses-M3/anat/*SPAIR2D_T2w*.nii.gz')
for file in files_dir1:
    print(file)

# Afficher les fichiers du deuxième répertoire
files_dir2 = glob.glob(f'/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/{SUBJECT}/*.nii.gz')
print("Fichiers dans le deuxième répertoire:")
for file in files_dir2:
    print(file)


    Subject  FA_NAWM  FA_lesion
64  sub-091  0.40518    0.67741
Fichiers dans le premier répertoire:
/NAS/coolio/protocoles/CINOCIS/BIDS/sub-091/ses-M6/anat/sub-091_ses-M6_acq-1501T2W_SPAIR2D_T2w.nii.gz
Fichiers dans le premier répertoire:
/NAS/coolio/protocoles/CINOCIS/BIDS/sub-091/ses-M3/anat/sub-091_ses-M3_acq-1501T2W_SPAIR2D_T2w_res_RPI_seg.nii.gz
/NAS/coolio/protocoles/CINOCIS/BIDS/sub-091/ses-M3/anat/sub-091_ses-M3_acq-1501T2W_SPAIR2D_T2w.nii.gz
/NAS/coolio/protocoles/CINOCIS/BIDS/sub-091/ses-M3/anat/sub-091_ses-M3_acq-1501T2W_SPAIR2D_T2w_lesionseg_recM3onM6_warped.nii.gz
/NAS/coolio/protocoles/CINOCIS/BIDS/sub-091/ses-M3/anat/sub-091_ses-M3_acq-1501T2W_SPAIR2D_T2w_RPI_seg.nii.gz
Fichiers dans le deuxième répertoire:
/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-091/fa_M3onM6.nii.gz
/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-091/lesion_M3_dilated.nii.gz
/NAS/coolio/protocoles/CINOCIS/BIDS/derivatives_v2.0/predictiveFA/sub-091/ad_

In [36]:
df_results.columns

Index(['Subject', 'FA_NAWM', 'FA_lesion', 'AD_NAWM', 'AD_lesion', 'ADC_NAWM',
       'ADC_lesion'],
      dtype='object')

In [57]:
from scipy.stats import ttest_ind

import plotly.express as px

# Créer un DataFrame avec les données triées
# df_FA_NAWM_sorted = df_results.sort_values(by="FA_NAWM")

# Créer un graphique interactif avec Plotly
fig = px.box(
    df_results.melt(id_vars=["Subject"], value_vars=["FA_NAWM", "FA_lesion"]),
    x="variable",
    y="value",
    color="variable",
    hover_data=["Subject"],
    labels={"value": "FA Values", "variable": "Group"},
    title="Boxplots interactifs de FA_NAWM et FA_lesion"
)

# Afficher le graphique
fig.show()

# Effectuer un test t pour comparer les deux groupes
t_stat, p_value = ttest_ind(df_results["FA_NAWM"].dropna(), df_results["FA_lesion"].dropna(), equal_var=False)

# Afficher les résultats du test t
print(f"T-statistic: {t_stat}, P-value: {p_value}")

T-statistic: -1.805705575835405, P-value: 0.07563556727257681


In [42]:
df_FA_NAWM_sorted_nolesionna = df_FA_NAWM_sorted.dropna(subset=['FA_lesion'])
lowest_fa_lesion_subjects = df_FA_NAWM_sorted_nolesionna.nsmallest(5, 'FA_lesion')
print(lowest_fa_lesion_subjects[['Subject', 'FA_lesion']])

    Subject  FA_lesion
64  sub-091   0.021713
70  sub-099   0.149393
8   sub-013   0.174345
3   sub-008   0.220028
5   sub-010   0.277457
