## Setup imports 

In [None]:
import sys
import monai
import os 
import numpy as np
import glob
import nibabel as nib
import scipy
import random
import matplotlib.pyplot as plt
import shutil
import pandas as pd
import seaborn as sns

from torch.utils.data import Dataset, DataLoader
from monai.data import create_test_image_3d, list_data_collate, decollate_batch, partition_dataset, DatasetSummary
from monai.transforms import (Activationsd,AdjustContrastd,AsDiscrete,Compose,LoadImaged,LabelFilter,Invertd,EnsureTyped,AddChanneld,SaveImaged)
from itertools import cycle

sys.path.append('..')
from utilities import get_split_deterministic

## Define data directory and filenames and split dataset

In [None]:
root_dir ='/path/data/*' # in-house dataset
source_exams = glob.glob(root_dir)
patients = set([os.path.split(exam)[-1].split('_')[0] for exam in source_exams])
fold = 0
train,val = get_split_deterministic(patients,fold=fold, num_splits=5, random_state=12345)

In [None]:
file_name_seg1 = 'segmentation_1.nii.gz' # MS lesion segmentation baseline 
file_name_flair = 'FLAIR_1.nii.gz' # FLAIR baseline
file_name_t1 = 'T1_1.nii.gz' # T1 baseline 
file_name_brain_seg = 'brain_1_seg.nii.gz' # brain segmentation

source_pool = cycle(source_exams)

##  Cycle through exams in the training dataset and copy files to OASIS exams

In [None]:
dest_dir = '/path/oasis/data/' 
source_patients = []
for exam in os.listdir(dest_dir):
    dest_path = os.path.join(dest_dir,exam)
    
    while 'FLAIR_1.nii.gz' not in os.listdir(dest_path):  #check if the source flair images have been copied to all folders in dest
        path = next(source_pool)
        source_patient =os.path.split(path)[-1].split('_')[0] # extract patient id
        if source_patient in val:
            print(source_patient, ' Scan not copied to destintaion, patient belongs to validation group.')
            pass
        else:
            print(dest_path, source_patient)
            source_patients.append(source_patient)
            if source_patients.count(source_patient) > 2: # ensure same source is not used more than 2 times
                print(source_patient, 'Enough samples')
                pass

            else:
                t1_path = os.path.join(path,file_name_t1)
                assert os.path.exists(t1_path)
                shutil.copy(t1_path, dest_path)

                flair_path = os.path.join(path,file_name_flair)
                assert os.path.exists(flair_path)
                shutil.copy(flair_path, dest_path)

                seg_path = os.path.join(path,file_name_seg1)
                assert os.path.exists(seg_path)
                shutil.copy(seg_path, dest_path)

                seg_path = os.path.join(path,file_name_brain_seg)
                assert os.path.exists(seg_path)
                shutil.copy(seg_path, dest_path)            


## Analysis of baseline and follow-up lesions


In [None]:

root_dir = '/path/data'
file_name_seg1 = 'segmentation_1.nii.gz' # baseline MS lesion segmentation
file_name_seg2 = 'segmentation_2.nii.gz' # follow up MS lesions segmentation

patient_lesion_details = []

for exam in os.listdir(root_dir):
    dest_path = os.path.join(root_dir,exam)
    seg1 = nib.load(os.path.join(dest_path,file_name_seg1)).get_fdata() # load lesion segmentation_1
    label_array, num_features_1 = scipy.ndimage.label(seg1) #generate distinct labels and find number of lesions
    
    seg2 = nib.load(os.path.join(dest_path,file_name_seg2)).get_fdata() # load lesion segmentation_1
    label_array, num_features_2 = scipy.ndimage.label(seg2) #generate distinct labels and find number of lesions
    
    exam_details = {'exam':exam, 'lesion_count_baseline':num_features_1, 'lesion_count_follow_up':num_features_2}
    patient_lesion_details.append(exam_details)


In [None]:
lesion_df = pd.DataFrame(patient_lesion_details)
lesion_df['lesion_comparison'] = lesion_df['lesion_count_baseline'] - lesion_df['lesion_count_follow_up']


In [None]:
sns.displot(lesion_df, x="lesion_count_baseline", y="lesion_count_follow_up",cbar=True)

## Manipulate lesion segmentation maps to create balanced dataset
 

In [None]:

dest_dir = '/path/oasis/data/'
file_name_seg1 = 'segmentation_1.nii.gz'

oasis_details = []
index = 0

for exam in sorted(os.listdir(dest_dir)):
    index+=1
    dest_path = os.path.join(dest_dir,exam)
    seg1 = nib.load(os.path.join(dest_path,file_name_seg1)) # load lesion segmentation_1
    seg1_vol = seg1.get_fdata()
    header = seg1.header.copy()
    label_array, num_features = scipy.ndimage.label(seg1_vol) #generate distinct labels and find number of lesions
    if index%2 ==0:
        num_eliminate =  int(0.8*num_features)
        filtered_array = LabelFilter(applied_labels=random.sample(list(np.arange(1,num_features)), num_eliminate))(label_array)
        filtered_nifti = nib.Nifti1Image(filtered_array, None,header=header)
        nib.save(filtered_nifti, os.path.join(dest_path, 'lesion_seg_oasis_bl.nii.gz'))
        exam_details = {'exam':exam, 'lesion_count_baseline':num_features, 'lesion_count_follow_up':num_eliminate, 'change':1}
    else:
        exam_details = {'exam':exam, 'lesion_count_baseline':num_features, 'lesion_count_follow_up':num_features, 'change':0}
        nib.save(seg1, os.path.join(dest_path, 'lesion_seg_oasis_bl.nii.gz'))
    oasis_details.append(exam_details)


## Save lesion count data for exam pairs as CSV

In [None]:
oasis_df = pd.DataFrame(oasis_details)
oasis_df.to_csv('/path/oasis/oasis_lesion_labels_fold.csv')