In [None]:
# we need the following libraries, so let's install them
!pip install boto3
!pip install torchxrayvision
!pip install SimpleITK
!pip install radiomics
!pip install pyradiomics
!pip install torchvision

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# checking for the avaialblity of cude-enabled hardware
!nvidia-smi
import torch
torch.cuda.is_available()

Fri May  5 03:09:18 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

True

In [None]:
# importing libraries
from time import time
import numpy as np
import pandas as pd
from glob import glob
import os
import skimage
import SimpleITK as sitk
import skimage, torchvision
import torchxrayvision as xrv
import torchvision
import torchxrayvision as xrv
import matplotlib.pyplot as plt
from tqdm import tqdm
# importing written classes and methods
import sys
from radiomics import featureextractor
from radiomics import firstorder, glcm, glrlm, glszm, shape, shape2D, ngtdm, gldm
import warnings
import logging

#------------------------------------------------------------------------------
# deactivating any warning raiseed by the follwoing moduule
warnings.filterwarnings("ignore", category=DeprecationWarning)
# set level for all classes
logger = logging.getLogger("radiomics")
logger.setLevel(logging.ERROR)
# ... or set level for specific class
logger = logging.getLogger("radiomics.glcm")
logger.setLevel(logging.ERROR)

In [None]:
directory_ = '/content/sample_data/MIMIC/'
directory_processed_images = directory_ + 'processed_images'
directory_filterss = directory_ + 'filters'
directory_segments = directory_ + 'segments'
directory_cleaned_targets = directory_ + 'cleaned_targets'

# since already the above paths are avaialbe, the follwoing code is not necessary
# try:
#   os.makedirs(directory_processed_images)
# except:
#   pass

# try:
#   os.makedirs(directory_filterss)
# except:
#   pass

# try:
#   os.makedirs(directory_segments)
# except:
#   pass

# try:
#   os.makedirs(directory_cleaned_targets)
# except:
#   pass

In [None]:
# the main code used for image segmentation

class ChestXRaySegmentation:
    def __init__(self, image_size: int = 512, threshold: float = 0.5):
        """
        code inputs:
        :param image_size: by default 512x512 is selcted
        :param threshold: The probability value in whcih the lower values are set to 0 and higher values are set to 1
        """
        self.image_size = image_size
        self.threshold = threshold
        self.resizer = torchvision.transforms.Compose([xrv.datasets.XRayResizer(self.image_size)]) # the command used for resizing the image
        self.centerer = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop()])   # command used for cropping the image

        def device_selector_cuda(image:np.ndarray): # if gpu is avaialbel run on gpu
          return torch.from_numpy(image).cuda(device="cuda:0")

        def device_selector_cpu(image:np.ndarray):  # if cpu is available run on cpu
          return torch.from_numpy(image).cpu()

        #---------------to check whether cpu or gpus are availeble (gpu has priority)-------------------------------------------------
        print(f'device is {torch.cuda.is_available()}')
        if torch.cuda.is_available(): #  is gpu avaialbe?
          dvc = "cuda:0"
          self.model_segment = xrv.baseline_models.chestx_det.PSPNet().cuda(device=dvc)
          self.device_selector = device_selector_cuda
        else: # if gpu is not avaialbe the pspnet model is loaded on cpu
          self.model_segment = xrv.baseline_models.chestx_det.PSPNet()
          self.device_selector = device_selector_cpu
        #----------------------------------------------------------------
        self.normalizer = xrv.datasets.normalize  # used to normalized the intensity of images
        self.loader = skimage.io.imread           # command used to load images
        self.composer = torchvision.transforms.Compose  # used to perform all preprocesses at the same  time
        self.segment_names = self.model_segment.targets
        self.num_segments = len(self.segment_names) # all number of segments which are 14
        self.image = np.zeros(shape=(1, self.image_size, self.image_size)) # preallocating an image
        self.segment_index = [4,5,8,13] # the selected indexes of segments (left and right lungs, heart, and spine)
        #self.segment_index = list(range(14))
    def prep(self, name: str): # perform all preprocessing (normalization, cropping, loading, centering)
        self.image = self.resizer(self.centerer(self.normalizer(self.loader(name)[:,:,0], 255)[np.newaxis, :, :]))
        return self.device_selector(self.image)


    def prep_segment(self, name: str): # segment the image
        with torch.no_grad(): return self.model_segment(self.prep(name=name))[:,self.segment_index,:,:]

    def get_filters(self, name: str): # gives the mattrix of 0 and 1 as the filter of each segments
        probs = 1 / (1 + torch.exp(-self.prep_segment(name=name)))  # sigmoid function is used to get probability of each pixel
        return torch.where(probs > self.threshold, 1.0, 0.0).cpu().numpy()

    def get_filter_image(self, name: str): # it gives both filter and processed image
        probs = 1 / (1 + torch.exp(-self.prep_segment(name=name)))  # sigmoid function
        return torch.where(probs > self.threshold, 1.0, 0.0).cpu().numpy(), self.image, probs.cpu().numpy()

In [None]:
# loading the label files and filtering out all non-frontal images
meta_data_target = pd.read_csv('/content/sample_data/MIMIC/targets/metadata.csv')
meta_data_target.head(2)
N_all_samples = meta_data_target.shape[0]
print(f'total samples are: {N_all_samples}')
mask_lateral_samples = (meta_data_target['ViewPosition']=='PA') | (meta_data_target['ViewPosition']=='AP')
n_images = mask_lateral_samples.sum()
print(f'front image samples are: {n_images}')
truncated_target_data = meta_data_target[mask_lateral_samples]
truncated_target_data.reset_index(inplace=True)
truncated_target_data.head(2)

total samples are: 377095
front image samples are: 243324


Unnamed: 0,index,path,subject_id,study_id,dicom_id,PerformedProcedureStepDescription,ViewPosition,ProcedureCodeSequence_CodeMeaning,ViewCodeSequence_CodeMeaning,Race,...,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
0,0,files/p10/p10000032/s50414267/02aa804e-bde0afd...,10000032,50414267,02aa804e-bde0afdd-112c0b34-7bc16630-4e384014,CHEST (PA AND LAT),PA,CHEST (PA AND LAT),postero-anterior,WHITE,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,files/p10/p10000032/s53189527/2a2277a9-b0ded15...,10000032,53189527,2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab,CHEST (PA AND LAT),PA,CHEST (PA AND LAT),postero-anterior,WHITE,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# all types of images (views)
pd.unique(meta_data_target['ViewPosition'])

array(['PA', 'LATERAL', 'AP', 'LL', nan, 'LAO', 'RAO', 'AP AXIAL',
       'SWIMMERS', 'PA LLD', 'AP LLD', 'XTABLE LATERAL', 'AP RLD',
       'PA RLD', 'LPO'], dtype=object)

In [None]:
# list of all available targets
all_columns = list(truncated_target_data)
all_columns

['index',
 'path',
 'subject_id',
 'study_id',
 'dicom_id',
 'PerformedProcedureStepDescription',
 'ViewPosition',
 'ProcedureCodeSequence_CodeMeaning',
 'ViewCodeSequence_CodeMeaning',
 'Race',
 'Ethnicity',
 'Sex',
 'Age',
 'No Finding',
 'Atelectasis',
 'Cardiomegaly',
 'Consolidation',
 'Edema',
 'Enlarged Cardiomediastinum',
 'Fracture',
 'Lung Lesion',
 'Lung Opacity',
 'Pleural Effusion',
 'Pleural Other',
 'Pneumonia',
 'Pneumothorax',
 'Support Devices']

In [None]:
# the name of all segments
segment_names = ['Left Clavicle',
                  'Right Clavicle',
                  'Left Scapula',
                  'Right Scapula',
                  'Left Lung',
                  'Right Lung',
                  'Left Hilus Pulmonis',
                  'Right Hilus Pulmonis',
                  'Heart',
                  'Aorta',
                  'Facies Diaphragmatica',
                  'Mediastinum',
                  'Weasand',
                  'Spine']

In [None]:
# lets save all targets variables of images taken in PA & AP views

targets = {}
for col in all_columns:
  targets.update({col:truncated_target_data[col].values})

np.save(directory_cleaned_targets + '/new_targets.npy',targets)

In [None]:
# now we segment all images with PA & AP view positions
segmenter = ChestXRaySegmentation(image_size=512,threshold=0.5) # we call the psp model + all image preprocessing modules
base_path = '/content/sample_data/MIMIC/images/' # the base folder that all images are saved
n_error =0
for n in tqdm(range(n_images)):
  try:
    location = truncated_target_data['path'][n] # getting full location of imagese
    picture_name = location.split('/')[-1].split('.')[0] # extracting the name of image(without extension)
    filters,image,segments = segmenter.get_filter_image(base_path+location) # segment the image and process the image
    np.save(directory_filterss + '/' +picture_name+'__filters.npy',filters),  np.save(directory_processed_images + '/' +picture_name+'__image.npy',image)
  except:
    n_error += 1

# this is used to save segments
  #  np.save(directory_segments + '/' +picture_name+'__segments.npy',segments)



device is True


100%|██████████| 243324/243324 [16:31:44<00:00,  4.09it/s]
