- data_path
    - ISBI
        - train
            - patients
                - flair
                - mprage
                - t2
                - pd
                - mask1
                - mask2
            - train_csv
    - ISBI_orig

In [25]:
import numpy as np
import os, sys
import nibabel as nib
import shutil
import glob
from pathlib import Path
from config import *
import pandas as pd

from sklearn.model_selection import StratifiedKFold

In [26]:
import sklearn
sklearn.__version__

'0.24.2'

In [27]:
path=options['data_path']
new_path=options['train_folder']+"{}_{}/"

patient_data=path+"{}/preprocessed/{}_{}_{}_pp.nii"
patient_mask=path+"{}/masks/{}_{}_{}.nii"

modalities=options['modalities']
masks=options['masks']

train_csv_path=options["train_csv_path"]

In [5]:
_, dirs,_=next(os.walk(path))
for dir_ in dirs:
    flair_files = glob.glob(patient_data.format(dir_,dir_,'*',modalities[0]))
    for file in flair_files:
        study=file.split('_')[1]

        newpath = new_path.format(dir_,study)
        Path(newpath).mkdir(parents=True, exist_ok=True)
        # copy data and masks in new folders
        for m in modalities:
            data=patient_data.format(dir_,dir_,study,m)
            shutil.copy(data, newpath+f"{m}.nii") 
        for msk in masks:
            mask=patient_mask.format(dir_,dir_,study,msk)
            shutil.copy(mask, newpath+f"{msk}.nii")        

In [42]:
train_data=pd.DataFrame(columns=['root_path','patient_id','study',*masks,*modalities])
train_data = train_data.astype({"study": str})
root,dirs,_=next(os.walk(options['train_folder']))
for dir_ in dirs:
    patient_id=dir_.split('_')[0]
    study="_"+dir_.split('_')[1]
    root_path=root+dir_+"/"
    masks_names=[]
    for mask in masks:
        masks_names.append(mask+".nii")
    modalities_names=[]
    for modality in modalities:
        modalities_names.append(modality+".nii")
    df = pd.DataFrame([[root_path, patient_id,study,*masks_names,*modalities_names ]], columns=['root_path','patient_id','study',*masks,*modalities])
    train_data=train_data.append(df)
                                

In [43]:
k_fold=4
seed=300
train_data.reset_index(inplace=True)
train_data.drop(columns=['index'],inplace=True)

skf = StratifiedKFold(
  n_splits=k_fold, random_state=seed, shuffle=True
)
print(train_data.shape)
for i, (train_index, val_index) in enumerate(
      skf.split(train_data, train_data["patient_id"])
      ):
      train_data.loc[val_index, "fold"] = i

train_df = train_data.loc[train_data['fold'] != 0].reset_index(drop=True)
val_df = train_data.loc[train_data['fold'] == 0].reset_index(drop=True)

print("train_df ->", train_df.shape, "val_df ->", val_df.shape)
train_data.to_csv(train_csv_path, index=False)

(21, 9)
train_df -> (15, 10) val_df -> (6, 10)


In [45]:
print(train_df.dtypes)
train_data

root_path      object
patient_id     object
study          object
mask1          object
mask2          object
flair          object
t2             object
pd             object
mprage         object
fold          float64
dtype: object


Unnamed: 0,root_path,patient_id,study,mask1,mask2,flair,t2,pd,mprage,fold
0,/media/marwa/F2F25460F2542ADD/MedicalAnalysis/...,training01,_01,mask1.nii,mask2.nii,flair.nii,t2.nii,pd.nii,mprage.nii,0.0
1,/media/marwa/F2F25460F2542ADD/MedicalAnalysis/...,training01,_02,mask1.nii,mask2.nii,flair.nii,t2.nii,pd.nii,mprage.nii,2.0
2,/media/marwa/F2F25460F2542ADD/MedicalAnalysis/...,training01,_03,mask1.nii,mask2.nii,flair.nii,t2.nii,pd.nii,mprage.nii,3.0
3,/media/marwa/F2F25460F2542ADD/MedicalAnalysis/...,training01,_04,mask1.nii,mask2.nii,flair.nii,t2.nii,pd.nii,mprage.nii,1.0
4,/media/marwa/F2F25460F2542ADD/MedicalAnalysis/...,training02,_01,mask1.nii,mask2.nii,flair.nii,t2.nii,pd.nii,mprage.nii,0.0
5,/media/marwa/F2F25460F2542ADD/MedicalAnalysis/...,training02,_02,mask1.nii,mask2.nii,flair.nii,t2.nii,pd.nii,mprage.nii,1.0
6,/media/marwa/F2F25460F2542ADD/MedicalAnalysis/...,training02,_03,mask1.nii,mask2.nii,flair.nii,t2.nii,pd.nii,mprage.nii,3.0
7,/media/marwa/F2F25460F2542ADD/MedicalAnalysis/...,training02,_04,mask1.nii,mask2.nii,flair.nii,t2.nii,pd.nii,mprage.nii,2.0
8,/media/marwa/F2F25460F2542ADD/MedicalAnalysis/...,training03,_01,mask1.nii,mask2.nii,flair.nii,t2.nii,pd.nii,mprage.nii,0.0
9,/media/marwa/F2F25460F2542ADD/MedicalAnalysis/...,training03,_02,mask1.nii,mask2.nii,flair.nii,t2.nii,pd.nii,mprage.nii,0.0
