# Generate Metadata for Segmentation Images

Zitian Tang

11/10/2021

In [5]:
import numpy as np
import pandas as pd

## Filesystem ##
import os
from os import listdir
from os.path import isfile, join

## PyTorch and TorchVision ##
import torch
import torchvision

## Nibabel ##
import nibabel as nib

## Scikit Learn ##
from sklearn.model_selection import train_test_split

In [59]:
## Image Directory ##
image_directory = '/datadrive/COVID_CT_Images'

# Dataset 1 (combine train and val images)
D1_im_dir = os.path.join(image_directory, 'Dataset_1/tr_im.nii')
D1_lung_mask_dir = os.path.join(image_directory, 'Dataset_1/tr_lungmasks_updated.nii')
D1_covid_mask_dir = os.path.join(image_directory, 'Dataset_1/tr_mask.nii')

# Dataset 2
D2_im_dir = os.path.join(image_directory, 'Dataset_2/rp_im')
D2_lung_mask_dir = os.path.join(image_directory, 'Dataset_2/rp_lung_msk')
D2_covid_mask_dir = os.path.join(image_directory, 'Dataset_2/rp_msk')

# Dataset 3
D3_im_dir = os.path.join(image_directory, 'Dataset_3/COVID-19-CT-Seg_20cases')
D3_lung_mask_dir = os.path.join(image_directory, 'Dataset_3/Lung_Mask')
D3_covid_mask_dir = os.path.join(image_directory, 'Dataset_3/Infection_Mask')

## Slices Directory ##
slices_path = '/datadrive/COVID_CT_Images/CT_slices'

## Metadata Directory ##
metadata_path = '/home/zitiantang/code/Segmentation/metadata.csv'

## Dataset Sizes ##
data1 = nib.load(D1_im_dir).get_fdata()
data1_size = data1.shape[2]
data2_size = len(listdir(D2_im_dir))
data3_size = len(listdir(D3_im_dir))-1

# Save Each Slice to a new Folder

In [15]:
## Dataset 1 ##
for i in range(data1_size):
    image_data = nib.load(D1_im_dir).get_fdata()
    lung_mask_data = nib.load(D1_lung_mask_dir).get_fdata()
    covid_mask_data = nib.load(D1_covid_mask_dir).get_fdata()
    # get slices
    image_slice = image_data[:,:,i]
    lung_mask_slice = lung_mask_data[:,:,i]
    covid_mask_slice = covid_mask_data[:,:,i]
    # where to save
    image_path = slices_path+'/CT_lung_Dataset_20-03-24'+'_image_%s.npy'%f'{i}'
    lung_mask_path = slices_path+'/CT_lung_Dataset_20-03-24'+'_lung_mask_%s.npy'%f'{i}'
    covid_mask_path = slices_path+'/CT_lung_Dataset_20-03-24'+'_covid_mask_%s.npy'%f'{i}'
    np.save(image_path, image_slice)
    np.save(lung_mask_path, lung_mask_slice)
    np.save(covid_mask_path, covid_mask_slice)

In [26]:
## Dataset 2 ##
counter = 0
for i in range(1,10):
    image_data = nib.load(D2_im_dir+'/%s.nii'%f'{i}').get_fdata()
    lung_mask_data = nib.load(D2_lung_mask_dir+'/%s.nii'%f'{i}').get_fdata()
    covid_mask_data = nib.load(D2_covid_mask_dir+'/%s.nii'%f'{i}').get_fdata()
    for j in range(image_data.shape[2]):
        # get slices
        image_slice = image_data[:,:,j]
        lung_mask_slice = lung_mask_data[:,:,j]
        covid_mask_slice = covid_mask_data[:,:,j]
        # where to save
        image_path = slices_path+'/CT_lung_Dataset_20-04-13'+'_image_%s.npy'%f'{counter}'
        lung_mask_path = slices_path+'/CT_lung_Dataset_20-04-13'+'_lung_mask_%s.npy'%f'{counter}'
        covid_mask_path = slices_path+'/CT_lung_Dataset_20-04-13'+'_covid_mask_%s.npy'%f'{counter}'
        np.save(image_path, image_slice)
        np.save(lung_mask_path, lung_mask_slice)
        np.save(covid_mask_path, covid_mask_slice)
        counter += 1

In [45]:
## Dataset 3 ##
counter = 0
for i in range(1,21):
    image_data = nib.load(D3_im_dir+'/%s.nii'%f'{i}').get_fdata()
    lung_mask_data = nib.load(D3_lung_mask_dir+'/%s.nii'%f'{i}').get_fdata()
    covid_mask_data = nib.load(D3_covid_mask_dir+'/%s.nii'%f'{i}').get_fdata()
    for j in range(image_data.shape[2]):
        # get slices
        image_slice = image_data[:,:,j]
        lung_mask_slice = lung_mask_data[:,:,j]
        covid_mask_slice = covid_mask_data[:,:,j]
        # where to save
        image_path = slices_path+'/CT_lung_Dataset_20-04-20'+'_image_%s.npy'%f'{counter}'
        lung_mask_path = slices_path+'/CT_lung_Dataset_20-04-20'+'_lung_mask_%s.npy'%f'{counter}'
        covid_mask_path = slices_path+'/CT_lung_Dataset_20-04-20'+'_covid_mask_%s.npy'%f'{counter}'
        np.save(image_path, image_slice)
        np.save(lung_mask_path, lung_mask_slice)
        np.save(covid_mask_path, covid_mask_slice)
        counter += 1

# Generate Dataframe

In [65]:
## Check if metadata dataframe already exist ##
if 'metadata.csv' in listdir('/home/zitiantang/code/Segmentation'):
    pass
else:
    image_paths = []
    mask_lung_paths = []
    mask_infection_paths = []
    dataset_inf = []
    for i in listdir(slices_path):
        if 'image' in i:
            ## image paths ##
            image_paths.append(os.path.join(slices_path, i))
            ## lung masks ##
            front = i.split('image')[0]
            end = i.split('image')[1]
            mask_lung_paths.append(os.path.join(slices_path, front+'lung_mask'+end))
            ## infection masks ##
            mask_infection_paths.append(os.path.join(slices_path, front+'covid_mask'+end))
            ## which dataset belongs to ##
            dataset_inf.append('CT_'+i.split('_')[3])

    ## convert lists into arrays ##
    image_paths = np.expand_dims(image_paths, axis=-1)
    mask_lung_paths = np.expand_dims(mask_lung_paths, axis=-1)
    mask_infection_paths = np.expand_dims(mask_infection_paths, axis=-1)
    dataset_inf = np.expand_dims(dataset_inf, axis=-1)

    ## create dataframe ##
    metadata_df = pd.DataFrame(np.hstack((image_paths, mask_lung_paths, mask_infection_paths, dataset_inf)), columns = ['CT_image_path', 'lung_mask_path', 'covid_infection_mask_path', 'Dataset_Label'])
    
    metadata_df.head()
    metadata_df.to_csv(metadata_path)