In [None]:
#!pip install dipy
#!pip install pylibjpeg pylibjpeg-libjpeg pydicom 
#!pip install GDCM 
#!pip install gdcm
#!pip install python-gdcm

#### Import required libraries

In [None]:
import os
import re
import shutil
import glob
import pydicom
import numpy as np
import pandas as pd
import pickle as pkl
from tqdm import tqdm
import matplotlib.image as mpimg
from google.colab import files
from google.colab import drive
tqdm.pandas()
drive.mount('/content/gdrive')

##### Extracting and storing data for binary (abnormal/normal) classification

In [None]:
import zipfile
with zipfile.ZipFile('/content/gdrive/MyDrive/FYP/binary_classification_data.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/gdrive/MyDrive/FYP/binary_classification_data')

#### Identify the subfolders to be considered for experimentation
* The below code extracts the subfolders that have 5mm cross sections for each patient


In [None]:
data_dir = '/content/gdrive/MyDrive/qc500'
stored_folders = []
for dir_entry in tqdm(os.scandir(path = data_dir)):
    subject_dir_dict = dict()
    if dir_entry.is_dir():
        combined_name = ''.join(dir_entry.name.split('-'))
        subject_dir_dict[dir_entry.name] = list()
        for sub_dir in os.scandir(dir_entry.path + f'/{combined_name} {combined_name}' + "/Unknown Study/"):
            if sub_dir.is_dir() and not re.search("(POST|(?<!PRE )CONTRAST|BONE)", sub_dir.name):
                dcm_files = glob.glob(sub_dir.path + "/*.dcm")
                dicom_data = pydicom.read_file(dcm_files[0])
                sub_info = [sub_dir.name, float(dicom_data.PixelSpacing[0]), float(dicom_data.PixelSpacing[1]), float(dicom_data.SliceThickness), len(dcm_files)]
                subject_dir_dict[dir_entry.name].append(sub_info)

        if(len(subject_dir_dict[dir_entry.name]) < 1):
            print("Warning: {subject} found no usable subdirectories".format(subject=dir_entry.name))
            subject_dir_dict.pop(dir_entry.name)    # remove the entry from the dict

        elif(len(subject_dir_dict[dir_entry.name]) > 1):
            subject_scans = subject_dir_dict[dir_entry.name]
            scans_info = np.array(subject_scans)
            sorted_idx = np.argsort(scans_info[:, 3])
            # check for a 5mm slice scan
            int(scans_info[sorted_idx[-1], 3].astype(float))
            if((scans_info[sorted_idx[-1], 3].astype(float)) == 5.0):
                scans_info_5 = scans_info[(scans_info[:, 3].astype(float)) == 5.0]
                # check if there are two 5mm slice scans
                if(float(scans_info[sorted_idx[-2], 3]) == 5.0):
                    # if more than one 5mm slice scan, use the study with fewest slices
                    sorted_slices_idx = np.argsort(scans_info[:, 4])
                    subject_dir_dict[dir_entry.name] = subject_dir_dict[dir_entry.name][sorted_slices_idx[-1]]
                    if(subject_dir_dict[dir_entry.name][4] < 32):
                        print("Warning: Fewer than 32 slices for {}".format(dir_entry.path + "/" + subject_dir_dict[dir_entry.name][0]))
                else:
                    subject_dir_dict[dir_entry.name] = subject_dir_dict[dir_entry.name][sorted_idx[-1]]         # replace entries with the 5mm study

            else:   # no 5mm slice scans, so choose the smallest
                subject_dir_dict[dir_entry.name] = subject_dir_dict[dir_entry.name][sorted_idx[0]]     # replace with the small slice scan
    
    for key, value in subject_dir_dict.items():
      combined_name = ''.join(key.split('-'))
      if isinstance(value[0], str):
        stored_folders.append(f'{key}/{combined_name} {combined_name}/Unknown Study/{value[0]}')
      else:
        stored_folders.append(f'{key}/{combined_name} {combined_name}/Unknown Study/{value[0][0]}')

##### Store the list of subfolders in a text file


In [None]:
file=open('/content/gdrive/MyDrive/FYP/important_folders.txt','w')
for items in stored_folders:
    file.writelines(items+'\n')
file.close()

#### To understand about the distribution of subfolders in the dataset.
* Eg. Number of subfolders with the name CT PLAIN THIN, CT 5mm PRECONTRAST THIN etc

In [None]:
splits_ = dict()
for file in stored_folders:
  name = file.split('/')[-1]
  if name in splits_:
    splits_[name] += 1
  else:
    splits_[name] = 1

#### Extracting the metadata from all slices present in the dataset
* The extracted dataset is used to merge with the slices where annotations are available

In [None]:
meta_cols = [
             'Rows',
             'Columns',
             'InstanceNumber',
             'SOPInstanceUID',
             'PatientID',
             'SeriesInstanceUID',
             'StudyInstanceUID',
             'ScanOptions'
             ] # required metadata information
information = []
qc_path = '/content/gdrive/MyDrive/qc500/'
for path, subdirs, files in tqdm(os.walk(qc_path)):
    for name in files: 
        current_dict = {}
        current_dict = {col: "" for col in meta_cols}
        current_dict['path'] = ""
        current_dict['subfolder'] = ""  
        if 'dcm' in name:
          dicom_object = pydicom.dcmread(os.path.join(path, name))
          for col in meta_cols: 
            current_dict[col] =  str(getattr(dicom_object, col))
          current_dict['path'] = path
          current_dict['subfolder'] = path.split('/')[-1]
          information.append(current_dict)    

df = pd.DataFrame.from_dict(information)
df.to_csv('/content/gdrive/MyDrive/FYP/data.csv')

##### Analysing the data where annotations for the slices are available. There are 3 variants for the annotations and the link to download these annotations is https://physionet.org/content/bhx-brain-bounding-box/1.1/ 

In [None]:
og_bounding = pd.read_csv('/content/gdrive/MyDrive/FYP/1_Initial_Manual_Labeling.csv')
selected_bounding = pd.read_csv('/content/gdrive/MyDrive/FYP/3_Extrapolation_to_Selected_Series.csv')

In [None]:
df = pd.read_csv('/content/gdrive/MyDrive/FYP/data.csv')
data_with_bounding_selected = pd.merge(df, selected_bounding, on=['SOPInstanceUID', 'StudyInstanceUID', 'SeriesInstanceUID'], how='inner')

# Storing the list of patients with hemorrhage 
abnormal_haemo = list(set(data_with_bounding_selected['PatientID']))

In [None]:
with open('/content/gdrive/MyDrive/FYP/abnormal_hemorrhage.pkl', 'wb') as f:
  pkl.dump(abnormal_haemo, f)

In [None]:
# abnormal_scans_list contains the list of patient with one or more abnormality according to the 3 radiologists who analysed the results
with open('/content/gdrive/MyDrive/FYP/abnormal_scans_list.pkl', 'rb') as f:
  all_abnormal_scans = pkl.load(f)

# Store the patients with abnormalities other than hemorrhages
other_abnormalities = list(set(all_abnormal_scans) - set(abnormal_haemo ))
with open('/content/gdrive/MyDrive/FYP/abnormal_others.pkl', 'wb') as f:
  pkl.dump(other_abnormalities, f)

##### Merging the data we obtained by parsing the dataset and the dataset where annotations (bounding box coordinates are available).
* Merging datasets based on SOPInstanceID (slice), StudyInstanceID (Patient) and SeriesInstanceID (subfolder in our case)

In [None]:
data_with_bounding_original = pd.merge(df, og_bounding, on=['SOPInstanceUID', 'StudyInstanceUID', 'SeriesInstanceUID'], how='inner')
for count, patient in enumerate(set(data_with_bounding_original['PatientID'])):
  sample = data_with_bounding_original[data_with_bounding_original['PatientID'] == patient]
  print("\n\nPatient ", patient, "\n", sample['subfolder'].value_counts())
print("Total number of patients", count)

In [None]:
data_with_bounding_selected = pd.merge(df, selected_bounding, on=['SOPInstanceUID', 'StudyInstanceUID', 'SeriesInstanceUID'], how='inner')
for count, patient in enumerate(set(data_with_bounding_selected['PatientID'])):
  sub = data_with_bounding_selected[data_with_bounding_selected['PatientID'] == patient]
  print("\n\nPatient ", patient, "\n", sub['subfolder'].value_counts())

#### Testing the distribution / number of subfolders for a patient

In [None]:
sample_sel_test = data_with_bounding_selected[data_with_bounding_selected['PatientID'] == 'CQ500-CT-284']
sample_og_test = data_with_bounding_original[data_with_bounding_original['PatientID'] == 'CQ500-CT-284']
print("Original : ", sample_og_test['subfolder'].value_counts())
print("Extrapolated : ", sample_sel_test['subfolder'].value_counts())

In [None]:
##### Checking dimensions of the slices
df['Rows'].value_counts(), df['Columns'].value_counts()

#### Extracting the required subfolders for each patient where localized annotations are available

In [None]:
required_subs = {}
patients = set(data_with_bounding_original['PatientID'])
sample = data_with_bounding_original[data_with_bounding_original['PatientID'] == patient]
sub = data_with_bounding_selected[data_with_bounding_selected['PatientID'] == patient]
  
for patient in tqdm(patients):
  selected_box = data_with_bounding_selected[data_with_bounding_selected['PatientID'] == patient]
  original_box = data_with_bounding_original[data_with_bounding_original['PatientID'] == patient]
  selected_box_subs = set(selected_box['subfolder'].value_counts().keys())
  original_box_subs = set(original_box['subfolder'].value_counts().keys())
  if (len(original_box_subs) > 1) or (len(selected_box_subs) > 1):
    required_folder = selected_box_subs - original_box_subs
    required_subs[patient] = list(required_folder) 
  else:
    required_subs[patient] = list(selected_box_subs)

In [None]:
list_of_subfolders = []
for patient, folder in tqdm(required_subs.items()):
  if folder:
    sample = data_with_bounding_selected[(data_with_bounding_selected['PatientID'] == patient) & (data_with_bounding_selected['subfolder'] == folder[0])]
    list_of_subfolders.append(sample['path'].values[0])  

In [None]:
# Store the list of subfolders that have localized annotations for slices in a text file
file=open('/content/gdrive/MyDrive/FYP/hemorrhage_subfolders.txt','w')
for items in list_of_subfolders:
    file.writelines(items+'\n')
file.close()

In [None]:
f = open('/content/gdrive/MyDrive/FYP/hemorrhage_subfolders.txt', 'r')
lines = f.readlines()
codes = [l.split('/')[5] for l in lines]

missing = []
path = '/content/gdrive/MyDrive/qc500'
for fi in os.listdir(path):
  if os.path.isdir(os.path.join(path, fi)):
    if str(fi) not in codes:
      missing.append(fi)
print("# Missing : ", len(missing))

# Store the list of subfolders that do not have localized annotations for slices in a text file
file=open('/content/gdrive/MyDrive/FYP/non_hemorrhage.txt','w')
for items in missing:
    file.writelines(items+'\n')
file.close()

In [None]:
# Contains the ground truth values provided by each of the radiologists
reads = pd.read_csv('/content/gdrive/MyDrive/qc500/reads.csv')

In [None]:
file=open('/content/gdrive/MyDrive/FYP/important_folders.txt','r')
subfolders = [x.strip() for x in file.readlines()]

##### Storing subfolders information for normal scans

In [None]:
with open('/content/gdrive/MyDrive/FYP/normal_scans_list.pkl', 'rb') as f:
  normal_scans = pkl.load(f)
for scan in tqdm(normal_scans):
  for series in subfolders:
    scan_no = series.split('/')[0]
    if scan == scan_no:
      source = os.path.join('/content/gdrive/MyDrive/qc500', series)
      cur = series.split('/')[-1]
      destination = os.path.join('/content/gdrive/MyDrive/FYP/Dataset/normal_scans', scan, cur)
      shutil.copytree(source, destination)

##### Storing subfolders information for abnormal haemorrahge scans


In [None]:
with open('/content/gdrive/MyDrive/FYP/abnormal_haemo_paths.pkl', 'rb') as f:
    selected_paths = pkl.load(f)
with open('/content/gdrive/MyDrive/FYP/abnormal_hemorrhage.pkl', 'rb') as f:
  abnormal_haemo = pkl.load(f)
for scan in abnormal_haemo:
  for series in selected_paths:
    scan_no = series.split('/')[5]
    if scan == scan_no:
      source = os.path.join('/content/gdrive/MyDrive/qc500', series)
      cur = series.split('/')[-1]
      destination = os.path.join('/content/gdrive/MyDrive/FYP/Dataset/abnormal_hemorrhage', scan, cur)
      shutil.copytree(source, destination)

##### Storing subfolder information for other abnormal (non-hemorrhage) scans


In [None]:
with open('/content/gdrive/MyDrive/FYP/abnormal_others.pkl', 'rb') as f:
  other_abnormalities = pkl.load(f)
for scan in tqdm(other_abnormalities):
  for series in subfolders:
    scan_no = series.split('/')[0]
    if scan == scan_no:
      source = os.path.join('/content/gdrive/MyDrive/qc500', series)
      cur = series.split('/')[-1]
      destination = os.path.join('/content/gdrive/MyDrive/FYP/Dataset/abnormal_others', scan, cur)
      shutil.copytree(source, destination)

##### Obtain the ground truth values for patients who have an abnormality (predicted by atleast two radiologists)

In [None]:
ground_truth = pd.read_csv('/content/gdrive/MyDrive/FYP/reads.csv')
verdict = [
           'ICH',
           'IPH',
           'IVH',
           'SDH',
           'EDH',
           'SAH',
           'BleedLocation-Left',
           'BleedLocation-Right',
           'ChronicBleed',
           'Fracture',
           'CalvarialFracture',
           'OtherFracture',
           'MassEffect',
           'MidlineShift'
           ]
for col in verdict:
  ground_truth[col] = ground_truth[f'R1:{col}'] + ground_truth[f'R2:{col}'] + ground_truth[f'R3:{col}']
  ground_truth[col] = ground_truth[col].apply(lambda x: int(x >=2))
ground_truth.to_csv('/content/gdrive/MyDrive/FYP/ground_truth.csv')

##### Storing the meta-information of the slices

In [None]:
meta_cols = [
             'SOPInstanceUID',
             'PatientID',
             'SeriesInstanceUID',
             'StudyInstanceUID',
             ] # required metadata information
information = []
data_path = '/content/gdrive/MyDrive/FYP/Dataset'
for path, subdirs, files in os.walk(data_path):
  for name in tqdm(files): 
    current_dict = {}
    current_dict = {col: "" for col in meta_cols}
    current_dict['subfolder'] = ""  
    if 'dcm' in name:
      dicom_object = pydicom.dcmread(os.path.join(path, name))
      for col in meta_cols: 
        current_dict[col] = str(getattr(dicom_object, col))
      current_dict['subfolder'] = path.split('/')[-1]
      information.append(current_dict)    

df = pd.DataFrame.from_dict(information)
df.to_csv('/content/gdrive/MyDrive/FYP/required_dicom_data.csv')

##### Save the slices saved as a dicom file as an image

In [None]:
def transform_to_hu(medical_image, image):
    intercept = medical_image.RescaleIntercept
    slope = medical_image.RescaleSlope
    hu_image = image * slope + intercept
    return hu_image

def window_image(image, window_center, window_width):
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    window_image = image.copy()
    window_image[window_image < img_min] = img_min
    window_image[window_image > img_max] = img_max
    
    return window_image

meta_cols = ['SOPInstanceUID',] # required metadata information
data_path = '/content/gdrive/MyDrive/FYP/Dataset'
for path, subdirs, files in os.walk(data_path):
  for name in tqdm(files): 
    if 'dcm' in name:
      medical_image = pydicom.read_file(os.path.join(path, name))
      for col in meta_cols: 
        current_dicom =  str(getattr(dicom_object, col))
      image = medical_image.pixel_array
      hu_image = transform_to_hu(medical_image, image)
      brain_image = window_image(hu_image, 40, 80)
      destination = f'/content/gdrive/MyDrive/FYP/required_dicoms/{current_dicom}.png'
      mpimg.imsave((destination), brain_image)

In [None]:
reqd = pd.read_csv('/content/gdrive/MyDrive/FYP/required_dicom_data.csv')
abnormal = pd.read_csv('/content/gdrive/MyDrive/FYP/3_Extrapolation_to_Selected_Series.csv')

In [None]:
data = pd.merge(abnormal, reqd, how='right', on=['SOPInstanceUID', 'SeriesInstanceUID','StudyInstanceUID'])
data = data[~data['PatientID'].isin(other_abnormalities)]
data['abnormal'] = data['labelName'].apply(lambda x: 1 if (type(x) == str) else 0)
data.to_csv('/content/gdrive/MyDrive/FYP/master_key_hemo_and_normal.csv')

##### Extracting patient list for normal/hemorrhage/others abnormalities

In [None]:
data = pd.read_csv('./reads.csv')

cols = ['ICH', 'IPH', 'IVH', 'SDH', 'EDH',
       'SAH', 'BleedLocation-Left', 'BleedLocation-Right',
       'ChronicBleed', 'Fracture', 'CalvarialFracture',
       'OtherFracture', 'MassEffect', 'MidlineShift']
       
cols = ['R1:ICH', 'R1:IPH', 'R1:IVH', 'R1:SDH', 'R1:EDH',
       'R1:SAH', 'R1:BleedLocation-Left', 'R1:BleedLocation-Right',
       'R1:ChronicBleed', 'R1:Fracture', 'R1:CalvarialFracture',
       'R1:OtherFracture', 'R1:MassEffect', 'R1:MidlineShift', 'R2:ICH',
       'R2:IPH', 'R2:IVH', 'R2:SDH', 'R2:EDH', 'R2:SAH',
       'R2:BleedLocation-Left', 'R2:BleedLocation-Right', 'R2:ChronicBleed',
       'R2:Fracture', 'R2:CalvarialFracture', 'R2:OtherFracture',
       'R2:MassEffect', 'R2:MidlineShift', 'R3:ICH', 'R3:IPH', 'R3:IVH',
       'R3:SDH', 'R3:EDH', 'R3:SAH', 'R3:BleedLocation-Left',
       'R3:BleedLocation-Right', 'R3:ChronicBleed', 'R3:Fracture',
       'R3:CalvarialFracture', 'R3:OtherFracture', 'R3:MassEffect',
       'R3:MidlineShift']

extr = data
for column in cols:
    extr = extr[extr[column] == 0]

In [None]:
normal_scans = list(extr['name'])
import pickle as pkl
with open('./normal_scans_list.pkl', 'wb') as f:
    pkl.dump(normal_scans, f)

In [None]:
abnormal_scans = list(set(data['name']) - set(normal_scans))
with open('./abnormal_scans_list.pkl', 'wb') as f:
    pkl.dump(abnormal_scans, f)

##### Split the dataset into train and test set and store the files

* Binary Classification Data

In [None]:
df = pd.read_csv('/content/gdrive/MyDrive/FYP/master_key_hemo_and_normal.csv')
normal = list(df[df['abnormal'] == 0]['SOPInstanceUID'].values)
abnormal = list(df[df['abnormal'] == 1]['SOPInstanceUID'].values)

root_dir = './binary_classification_data/' # data root path
classes_dir = ['normal', 'abnormal'] #total labels

val_ratio = 0.20
test_ratio = 0.15

for cls in classes_dir:
    os.makedirs(root_dir +'train/' + cls)
    os.makedirs(root_dir +'val/' + cls)
    os.makedirs(root_dir +'test/' + cls)

src = './required_dicoms'
allFileNames = os.listdir(src)
allFileNames = [file[:-4] for file in allFileNames]

In [None]:
np.random.shuffle(allFileNames)
train_FileNames, val_FileNames, test_FileNames = np.split(np.array(allFileNames),
                                                          [int(len(allFileNames)* (1 - (val_ratio + test_ratio))), 
                                                           int(len(allFileNames)* (1 - test_ratio))])


train_normal = [name for name in train_FileNames.tolist() if name in normal]
train_abnormal = [name for name in train_FileNames.tolist() if name in abnormal]

validation_normal = [name for name in val_FileNames.tolist() if name in normal]
validation_abnormal = [name for name in val_FileNames.tolist() if name in abnormal]

test_normal = [name for name in test_FileNames.tolist() if name in normal]
test_abnormal = [name for name in test_FileNames.tolist() if name in abnormal]

In [None]:
finals = train_normal + train_abnormal + validation_normal + validation_abnormal + test_normal + test_abnormal
all_missing = [name for name in allFileNames if name not in finals]

In [None]:
train_normal = [src+'/'+ name for name in train_normal]
train_abnormal = [src+'/'+ name for name in train_abnormal]

validation_normal = [src+'/'+ name for name in validation_normal]
validation_abnormal = [src+'/'+ name for name in validation_abnormal ]

test_normal = [src+'/'+ name for name in test_normal]
test_abnormal = [src+'/'+ name for name in test_abnormal]

print('Total images: ', len(allFileNames))

print('Training (normal): ', len(train_normal))
print('Validation (normal): ', len(validation_normal))
print('Testing (normal): ', len(test_normal))

print('Training (abnormal): ', len(train_abnormal))
print('Validation (abnormal): ', len(validation_abnormal))
print('Testing (abnormal): ', len(test_abnormal))


In [None]:
for name in tqdm(train_normal):
    shutil.copy(name + '.png', root_dir +'train/' + 'normal')

for name in tqdm(validation_normal):
    shutil.copy(name + '.png', root_dir +'val/' + 'normal')

for name in tqdm(test_normal):
    shutil.copy(name + '.png', root_dir +'test/' + 'normal')

for name in tqdm(train_abnormal):
    shutil.copy(name + '.png', root_dir +'train/' + 'abnormal')

for name in tqdm(validation_abnormal):
    shutil.copy(name + '.png', root_dir +'val/' + 'abnormal')

for name in tqdm(test_abnormal):
    shutil.copy(name + '.png', root_dir +'test/' + 'abnormal')

* Multi-Class Classification Data

In [None]:
df = pd.read_csv('/content/gdrive/MyDrive/FYP/master_key_hemo_and_normal.csv')
df['what_abnormality'] = df['labelName'].fillna('Normal')
df.to_csv('/content/gdrive/MyDrive/FYP/master_key_multiClass.csv')

In [None]:
root_dir = './multi_classification_data/' # data root path
classes_dir = list(set(df['what_abnormality']))

In [None]:
 #total labels
val_ratio = 0.20
test_ratio = 0.15

for cls in classes_dir:
    os.makedirs(root_dir +'train/' + cls)
    os.makedirs(root_dir +'val/' + cls)
    os.makedirs(root_dir +'test/' + cls)

In [None]:
src = './required_dicoms'
allFileNames = os.listdir(src)
allFileNames = [file[:-4] for file in allFileNames]

Intraventricular = df[df['what_abnormality'] == 'Intraventricular']['SOPInstanceUID'].tolist()
Normal = df[df['what_abnormality'] == 'Normal']['SOPInstanceUID'].tolist()
Subarachnoid = df[df['what_abnormality'] == 'Subarachnoid']['SOPInstanceUID'].tolist()
Chronic = df[df['what_abnormality'] == 'Chronic']['SOPInstanceUID'].tolist()
Intraparenchymal = df[df['what_abnormality'] == 'Intraparenchymal']['SOPInstanceUID'].tolist()
Epidural = df[df['what_abnormality'] == 'Epidural']['SOPInstanceUID'].tolist()
Subdural = df[df['what_abnormality'] == 'Subdural']['SOPInstanceUID'].tolist()

In [None]:
np.random.shuffle(allFileNames)
train_FileNames, val_FileNames, test_FileNames = np.split(np.array(allFileNames),
                                                          [int(len(allFileNames)* (1 - (val_ratio + test_ratio))), 
                                                           int(len(allFileNames)* (1 - test_ratio))])


train_Intraventricular = [name for name in train_FileNames.tolist() if name in Intraventricular]
train_Normal = [name for name in train_FileNames.tolist() if name in Normal]
train_Subarachnoid = [name for name in train_FileNames.tolist() if name in Subarachnoid]
train_Chronic = [name for name in train_FileNames.tolist() if name in Chronic]
train_Intraparenchymal = [name for name in train_FileNames.tolist() if name in Intraparenchymal]
train_Epidural = [name for name in train_FileNames.tolist() if name in Epidural]
train_Subdural = [name for name in train_FileNames.tolist() if name in Subdural]

validation_Intraventricular = [name for name in val_FileNames.tolist() if name in Intraventricular]
validation_Normal = [name for name in val_FileNames.tolist() if name in Normal]
validation_Subarachnoid = [name for name in val_FileNames.tolist() if name in Subarachnoid]
validation_Chronic = [name for name in val_FileNames.tolist() if name in Chronic]
validation_Intraparenchymal = [name for name in val_FileNames.tolist() if name in Intraparenchymal]
validation_Epidural = [name for name in val_FileNames.tolist() if name in Epidural]
validation_Subdural = [name for name in val_FileNames.tolist() if name in Subdural]

test_Intraventricular = [name for name in test_FileNames.tolist() if name in Intraventricular]
test_Normal = [name for name in test_FileNames.tolist() if name in Normal]
test_Subarachnoid = [name for name in test_FileNames.tolist() if name in Subarachnoid]
test_Chronic = [name for name in test_FileNames.tolist() if name in Chronic]
test_Intraparenchymal = [name for name in test_FileNames.tolist() if name in Intraparenchymal]
test_Epidural = [name for name in test_FileNames.tolist() if name in Epidural]
test_Subdural = [name for name in test_FileNames.tolist() if name in Subdural]

In [None]:
train_Intraventricular = [src+'/'+ name for name in train_Intraventricular]
train_Normal = [src+'/'+ name for name in train_Normal]
train_Subarachnoid = [src+'/'+ name for name in train_Subarachnoid]
train_Chronic = [src+'/'+ name for name in train_Chronic]
train_Intraparenchymal = [src+'/'+ name for name in train_Intraparenchymal]
train_Epidural = [src+'/'+ name for name in train_Epidural]
train_Subdural = [src+'/'+ name for name in train_Subdural]

validation_Intraventricular = [src+'/'+ name for name in validation_Intraventricular]
validation_Normal = [src+'/'+ name for name in validation_Normal]
validation_Subarachnoid = [src+'/'+ name for name in validation_Subarachnoid]
validation_Chronic = [src+'/'+ name for name in validation_Chronic]
validation_Intraparenchymal = [src+'/'+ name for name in validation_Intraparenchymal]
validation_Epidural = [src+'/'+ name for name in validation_Epidural]
validation_Subdural = [src+'/'+ name for name in validation_Subdural]

test_Intraventricular = [src+'/'+ name for name in test_Intraventricular]
test_Normal = [src+'/'+ name for name in test_Normal]
test_Subarachnoid = [src+'/'+ name for name in test_Subarachnoid]
test_Chronic = [src+'/'+ name for name in test_Chronic]
test_Intraparenchymal = [src+'/'+ name for name in test_Intraparenchymal]
test_Epidural = [src+'/'+ name for name in test_Epidural]
test_Subdural = [src+'/'+ name for name in test_Subdural]

In [None]:
print('Total images: ', len(allFileNames))

print('Training (Intraventricular): ', len(train_Intraventricular))
print('Validation (Intraventricular): ', len(validation_Intraventricular))
print('Testing (Intraventricular): ', len(test_Intraventricular))

print('Training (Normal): ', len(train_Normal))
print('Validation (Normal): ', len(validation_Normal))
print('Testing (Normal): ', len(test_Normal))

print('Training (Subarachnoid): ', len(train_Subarachnoid))
print('Validation (Subarachnoid): ', len(validation_Subarachnoid))
print('Testing (Subarachnoid): ', len(test_Subarachnoid))

print('Training (Chronic): ', len(train_Chronic))
print('Validation (Chronic): ', len(validation_Chronic))
print('Testing (Chronic): ', len(test_Chronic))

print('Training (Intraparenchymal): ', len(train_Intraparenchymal))
print('Validation (Intraparenchymal): ', len(validation_Intraparenchymal))
print('Testing (Intraparenchymal): ', len(test_Intraparenchymal))

print('Training (Epidural): ', len(train_Epidural))
print('Validation (Epidural): ', len(validation_Epidural))
print('Testing (Epidural): ', len(test_Epidural))

print('Training (Subdural): ', len(train_Subdural))
print('Validation (Subdural): ', len(validation_Subdural))
print('Testing (Subdural): ', len(test_Subdural))

In [None]:
for name in tqdm(train_Intraventricular):
    shutil.copy(name + '.png', root_dir +'train/' + 'Intraventricular')

for name in tqdm(validation_Intraventricular):
    shutil.copy(name + '.png', root_dir +'val/' + 'Intraventricular')

for name in tqdm(test_Intraventricular):
    shutil.copy(name + '.png', root_dir +'test/' + 'Intraventricular')

for name in tqdm(train_Normal):
    shutil.copy(name + '.png', root_dir +'train/' + 'Normal')

for name in tqdm(validation_Normal):
    shutil.copy(name + '.png', root_dir +'val/' + 'Normal')

for name in tqdm(test_Normal):
    shutil.copy(name + '.png', root_dir +'test/' + 'Normal')

for name in tqdm(train_Subarachnoid):
    shutil.copy(name + '.png', root_dir +'train/' + 'Subarachnoid')

for name in tqdm(validation_Subarachnoid):
    shutil.copy(name + '.png', root_dir +'val/' + 'Subarachnoid')

for name in tqdm(test_Subarachnoid):
    shutil.copy(name + '.png', root_dir +'test/' + 'Subarachnoid')

for name in tqdm(train_Chronic):
    shutil.copy(name + '.png', root_dir +'train/' + 'Chronic')

for name in tqdm(validation_Chronic):
    shutil.copy(name + '.png', root_dir +'val/' + 'Chronic')

for name in tqdm(test_Chronic):
    shutil.copy(name + '.png', root_dir +'test/' + 'Chronic')

for name in tqdm(train_Intraparenchymal):
    shutil.copy(name + '.png', root_dir +'train/' + 'Intraparenchymal')

for name in tqdm(validation_Intraparenchymal):
    shutil.copy(name + '.png', root_dir +'val/' + 'Intraparenchymal')

for name in tqdm(test_Intraparenchymal):
    shutil.copy(name + '.png', root_dir +'test/' + 'Intraparenchymal')

for name in tqdm(train_Epidural):
    shutil.copy(name + '.png', root_dir +'train/' + 'Epidural')

for name in tqdm(validation_Epidural):
    shutil.copy(name + '.png', root_dir +'val/' + 'Epidural')

for name in tqdm(test_Epidural):
    shutil.copy(name + '.png', root_dir +'test/' + 'Epidural')

for name in tqdm(train_Subdural):
    shutil.copy(name + '.png', root_dir +'train/' + 'Subdural')

for name in tqdm(validation_Subdural):
    shutil.copy(name + '.png', root_dir +'val/' + 'Subdural')

for name in tqdm(test_Subdural):
    shutil.copy(name + '.png', root_dir +'test/' + 'Subdural')

