In [1]:
from glob import glob
import pandas as pd
import os

# 📂 Paths base
train_path_dir = '/data/cristian/projects/med_data/rise-miccai/task-2/'
test_path_dir  = '/data/cristian/projects/med_data/rise-miccai/task-2-val/'

# 📥 Archivos
train_list_paths       = glob(os.path.join(train_path_dir, '**/*_ciso.nii.gz'), recursive=True)
test_list_paths        = glob(os.path.join(test_path_dir, '**/*_ciso.nii.gz'), recursive=True)
train_y_list_paths_2a  = glob(os.path.join(train_path_dir, '**/*HF_hipp.nii.gz'), recursive=True)
train_y_list_paths_2b  = glob(os.path.join(train_path_dir, '**/*HF_baga.nii.gz'), recursive=True)

extra_paths_hipp       = glob(os.path.join(train_path_dir, '**/*LF_hipp.nii.gz'), recursive=True)
extra_paths_baga       = glob(os.path.join(train_path_dir, '**/*LF_baga.nii.gz'), recursive=True)
extra_paths_vent       = glob(os.path.join(train_path_dir, '**/*_vent.nii.gz'), recursive=True)

# ✅ Función auxiliar
def build_df(paths, categoria):
    df = pd.DataFrame(paths, columns=['filepath'])
    df['filename'] = df['filepath'].apply(lambda x: os.path.basename(x))
    df['ID'] = df['filename'].apply(lambda x: "_".join(x.split("_")[:2])) #lambda x: x.split('/')[-1].split(".nii.gz")[0])
    df['categoria'] = categoria
    return df

# 📊 Construcción de DataFrames
df_train          = build_df(train_list_paths, categoria='ciso')
df_test           = build_df(test_list_paths, categoria='ciso')
df_train_target2a = build_df(train_y_list_paths_2a, categoria='HF_hipp')
df_train_target2b = build_df(train_y_list_paths_2b, categoria='HF_baga')
df_extra_data     = pd.concat([
    build_df(extra_paths_hipp, categoria='LF_hipp'),
    build_df(extra_paths_baga, categoria='LF_baga'),
    build_df(extra_paths_vent, categoria='ventricle')
], ignore_index=True)

# 📁 Guardar
results_dir = '../results/preprocessed_data/task2/'
os.makedirs(results_dir, exist_ok=True)

df_train.to_csv(os.path.join(results_dir, 'df_train.csv'), index=False)
df_test.to_csv(os.path.join(results_dir, 'df_test.csv'), index=False)
df_train_target2a.to_csv(os.path.join(results_dir, 'df_train_target2a.csv'), index=False)
df_train_target2b.to_csv(os.path.join(results_dir, 'df_train_target2b.csv'), index=False)
df_extra_data.to_csv(os.path.join(results_dir, 'df_extra_data.csv'), index=False)

# ✅ Mostrar formas
print("✅ Shapes:")
print("df_train         :", df_train.shape)
print("df_test          :", df_test.shape)
print("df_target2a (hipp):", df_train_target2a.shape)
print("df_target2b (baga):", df_train_target2b.shape)
print("df_extra_data    :", df_extra_data.shape)


✅ Shapes:
df_train         : (79, 4)
df_test          : (12, 4)
df_target2a (hipp): (79, 4)
df_target2b (baga): (79, 4)
df_extra_data    : (237, 4)


In [2]:
df_train.head()

Unnamed: 0,filepath,filename,ID,categoria
0,/data/cristian/projects/med_data/rise-miccai/t...,LISA_0008_ciso.nii.gz,LISA_0008,ciso
1,/data/cristian/projects/med_data/rise-miccai/t...,LISA_0015_ciso.nii.gz,LISA_0015,ciso
2,/data/cristian/projects/med_data/rise-miccai/t...,LISA_0016_ciso.nii.gz,LISA_0016,ciso
3,/data/cristian/projects/med_data/rise-miccai/t...,LISA_0024_ciso.nii.gz,LISA_0024,ciso
4,/data/cristian/projects/med_data/rise-miccai/t...,LISA_0012_ciso.nii.gz,LISA_0012,ciso


In [3]:
df_extra_data.head()

Unnamed: 0,filepath,filename,ID,categoria
0,/data/cristian/projects/med_data/rise-miccai/t...,LISA_1016_LF_hipp.nii.gz,LISA_1016,LF_hipp
1,/data/cristian/projects/med_data/rise-miccai/t...,LISA_0055_LF_hipp.nii.gz,LISA_0055,LF_hipp
2,/data/cristian/projects/med_data/rise-miccai/t...,LISA_0046_LF_hipp.nii.gz,LISA_0046,LF_hipp
3,/data/cristian/projects/med_data/rise-miccai/t...,LISA_0058_LF_hipp.nii.gz,LISA_0058,LF_hipp
4,/data/cristian/projects/med_data/rise-miccai/t...,LISA_0001_LF_hipp.nii.gz,LISA_0001,LF_hipp


In [4]:
df_train.describe()

Unnamed: 0,filepath,filename,ID,categoria
count,79,79,79,79
unique,79,79,79,1
top,/data/cristian/projects/med_data/rise-miccai/t...,LISA_0008_ciso.nii.gz,LISA_0008,ciso
freq,1,1,1,79


In [5]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79 entries, 0 to 78
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   filepath   79 non-null     object
 1   filename   79 non-null     object
 2   ID         79 non-null     object
 3   categoria  79 non-null     object
dtypes: object(4)
memory usage: 2.6+ KB
