In [None]:
#!pip install pydicom
#!pip install pylibjpeg 
#!pip install pylibjpeg pylibjpeg-libjpeg pydicom
#!pip install GDCM 
#!pip install gdcm

In [None]:
import os
import glob
import shutil
import pydicom
import numpy as np
from tqdm import tqdm
from scipy import ndimage
from zipfile import ZipFile
from pydicom import dcmread
from google.colab import drive
from skimage import morphology
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from collections import Counter

tqdm.pandas()
drive.mount('/content/gdrive')

In [None]:
parent_folder = '/content/gdrive/MyDrive/qc500/'
parent_extracted = '/content/gdrive/MyDrive/qc500_extracted/'

In [None]:
def transform_to_hu(medical_image, image):
    intercept = medical_image.RescaleIntercept
    slope = medical_image.RescaleSlope
    hu_image = image * slope + intercept
    return hu_image

def window_image(image, window_center, window_width):
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    window_image = image.copy()
    window_image[window_image < img_min] = img_min
    window_image[window_image > img_max] = img_max
    
    return window_image

def load_and_plot_image(ct_slice, save_path, file_path, save=False):
    medical_image = pydicom.read_file(file_path)
    image = medical_image.pixel_array
    hu_image = transform_to_hu(medical_image, image)
    brain_image = window_image(hu_image, 40, 80)
    bone_image = window_image(hu_image, 400, 1000)
    fname = ct_slice.split('.')[0]
    if save:
        mpimg.imsave(os.path.join(save_path, 'original', f'{fname}.png'), image)
        mpimg.imsave(os.path.join(save_path, 'hu_image', f'{fname}.png'), hu_image)
        mpimg.imsave(os.path.join(save_path, 'brain_image', f'{fname}.png'), brain_image)
        mpimg.imsave(os.path.join(save_path, 'bone_image', f'{fname}.png'), bone_image)

In [None]:
def create_directories(current_scan):
    scan_directory = os.path.join(parent_extracted, current_scan) 
    ct5_directory = os.path.join(scan_directory, 'CT 5mm')
    ctp625_directory = os.path.join(scan_directory, 'CT 0.625mm')
    try:
      os.makedirs(os.path.join(ct5_directory, 'original'))
      os.makedirs(os.path.join(ct5_directory, 'hu_image'))
      os.makedirs(os.path.join(ct5_directory, 'brain_image'))
      os.makedirs(os.path.join(ct5_directory, 'bone_image'))
      
      os.makedirs(os.path.join(ctp625_directory, 'original'))
      os.makedirs(os.path.join(ctp625_directory, 'hu_image'))
      os.makedirs(os.path.join(ctp625_directory, 'brain_image'))
      os.makedirs(os.path.join(ctp625_directory, 'bone_image'))
      print(f'{current_scan} directories created')
    except FileExistsError:
      print(f'{current_scan} already exists')

In [None]:
def unzip_extract():
    for scans in tqdm(os.listdir(parent_folder)):
        if os.path.isdir(scans.split(".")[0]):
            print(f'{scans.split(".")[0]} has been extracted already!.')
            continue
        else:
            if 'zip' in scans:
                print(f'Current scans {scans}')
                try:
                  with ZipFile(f'/content/gdrive/MyDrive/qc500/{scans}', 'r') as zip_ref:
                      zip_ref.extractall(f'/content/gdrive/MyDrive/qc500/{scans.split(".")[0]}')
                  print("extracted")
                except:
                  print(f'{scans} error')
unzip_extract()

In [None]:
for i in tqdm(os.listdir('.')):
  if i.startswith("CQ"):
    shutil.move(f'./{i}', '/content/gdrive/MyDrive/qc500') 

In [None]:
final = []
for sub in subfolders:
  for a in sub:
    final.append(a)
col_count = Counter(final)
for i in col_count.keys():
  print(i, col_count[i])
print(f'No of subfolders {len(list(subfolders))}')

In [None]:
required_subfolders = set(final)
def extract_samples():
    samples = []
    for scans in tqdm(os.listdir(parent_folder)):
        current_scan = scans.split(".")[0]
        combined_name = ''.join(current_scan.split('-'))
        current_path = f'/content/gdrive/MyDrive/qc500/{current_scan}/{combined_name} {combined_name}/Unknown Study/'    
        pathname = current_path + "/**/*.dcm"
        files = glob.glob(pathname, recursive=True)
        penult_list = ['/'.join(f.split('/')) for f in files]
        for paths in penult_list:
          for req in required_subfolders:
            subs = [f for f in penult_list if req if f]
            if req in paths:
              samples.append(subs[len(subs)//2])
              required_subfolders.remove(req)
              break
    return samples
samples = extract_samples()
for sam in samples:
  print(sam)

In [None]:
def load_and_plot_image_test(file_path, save=False):
    medical_image = dcmread(file_path)
    medical_image.PhotometricInterpretation = 'YBR_FULL'
    image = medical_image.pixel_array
     
    hu_image = transform_to_hu(medical_image, image)
    brain_image = window_image(hu_image, 40, 80)
    bone_image = window_image(hu_image, 400, 1000)
    
    plt.figure(figsize=(20, 10))
    plt.style.use('grayscale')

    plt.subplot(151)
    plt.imshow(image)
    plt.title('Original')
    plt.axis('off')

    plt.subplot(152)
    plt.imshow(hu_image)
    plt.title('Hu image')
    plt.axis('off')

    plt.subplot(153)
    plt.imshow(brain_image)
    plt.title('brain image')
    plt.axis('off')

    plt.subplot(154)
    plt.imshow(bone_image)
    plt.title('bone image')
    plt.axis('off')

In [None]:
path = '/content/gdrive/MyDrive/qc500/CQ500-CT-187/CQ500CT187 CQ500CT187/Unknown Study/CT Thin Plain/CT000140.dcm'
load_and_plot_image_test(path)

In [None]:
def remove_noise(file_path, display=False):
    medical_image = pydicom.read_file(file_path)
    image = medical_image.pixel_array
    hu_image = transform_to_hu(medical_image, image)
    brain_image = window_image(hu_image, 40, 80)
    segmentation = morphology.dilation(brain_image, np.ones((5, 5)))
    labels, label_nb = ndimage.label(segmentation)
    label_count = np.bincount(labels.ravel().astype(np.int))
    label_count[0] = 0
    mask = labels == label_count.argmax()
    mask = morphology.dilation(mask, np.ones((5, 5)))
    mask = ndimage.morphology.binary_fill_holes(mask)
    mask = morphology.dilation(mask, np.ones((3, 3)))
    masked_image = mask * brain_image
    return masked_image

In [None]:
def extract_data():
    for scans in tqdm(os.listdir(parent_folder)):
        current_scan = scans.split(".")[0]
        combined_name = ''.join(current_scan.split('-'))
        current_path = f'/content/gdrive/MyDrive/qc500/{current_scan}/{combined_name} {combined_name}/Unknown Study/'    
        pathname = current_path + "/**/*.dcm"
        files = glob.glob(pathname, recursive=True)
        penult_list = ['/'.join(f.split('/')) for f in files]
        for paths in penult_list:
          for req in required_subfolders:
            if req in paths:
              current_directory_path = f'/content/gdrive/MyDrive/qc500/extracted/{currrent_scan}/{req}/'
              masked_current_directory_path = f'/content/gdrive/MyDrive/qc500/extracted/{currrent_scan}/masked_{req}/'
              masked_image = remove_noise(paths)
              if not os.path.exists(current_directory_path):
                os.makedir(current_directory_path)
                os.makedir(masked_current_directory_path)
              shutil.copy(paths, current_directory_path)
              mpimg.imsave(os.path.join(masked_current_directory_path, f'{paths.split("/")[-1]}'), masked_image)
