In [1]:
import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nibabel as nib
from scipy import ndimage
import zipfile
from keras.utils import get_file

In [2]:
url_1 = "https://github.com/hasibzunair/3D-image-classification-tutorial/releases/download/v0.2/CT-0.zip"
url_2 = "https://github.com/hasibzunair/3D-image-classification-tutorial/releases/download/v0.2/CT-23.zip"

# Directory where the files will be stored
cache_dir = os.getcwd()  # Current working directory

# Download the first file
filename_1 = get_file("CT-0.zip", url_1, cache_dir=cache_dir)

# Download the second file
filename_2 = get_file("CT-23.zip", url_2, cache_dir=cache_dir)

# Make a directory to store the data
os.makedirs("MosMedData", exist_ok=True)  # Using exist_ok=True to avoid errors if the directory exists

# Unzip the first file
with zipfile.ZipFile(filename_1, "r") as z_fp:
    z_fp.extractall("./MosMedData/")

# Unzip the second file
with zipfile.ZipFile(filename_2, "r") as z_fp:
    z_fp.extractall("./MosMedData/")

Downloading data from https://github.com/hasibzunair/3D-image-classification-tutorial/releases/download/v0.2/CT-0.zip
[1m1065471431/1065471431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 0us/step
Downloading data from https://github.com/hasibzunair/3D-image-classification-tutorial/releases/download/v0.2/CT-23.zip
[1m1045162547/1045162547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 0us/step


In [3]:
for dirpath, dirnames, filenames in os.walk("MosMedData"):
  print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

There are 2 directories and 0 images in 'MosMedData'.
There are 0 directories and 100 images in 'MosMedData/CT-23'.
There are 0 directories and 100 images in 'MosMedData/CT-0'.


In [4]:
def read_nifti_file(filepath):
    """Read and load volume"""
    # Read file
    scan = nib.load(filepath)
    # Get raw data
    scan = scan.get_fdata()
    return scan


def normalize(volume):
    """Normalize the volume"""
    min = -1000
    max = 400
    volume[volume < min] = min
    volume[volume > max] = max
    volume = (volume - min) / (max - min)
    volume = volume.astype("float32")
    return volume


def resize_volume(img):
    """Resize across z-axis"""
    # Set the desired depth
    desired_depth = 64
    desired_width = 128
    desired_height = 128
    # Get current depth
    current_depth = img.shape[-1]
    current_width = img.shape[0]
    current_height = img.shape[1]
    # Compute depth factor
    depth = current_depth / desired_depth
    width = current_width / desired_width
    height = current_height / desired_height
    depth_factor = 1 / depth
    width_factor = 1 / width
    height_factor = 1 / height
    # Rotate
    img = ndimage.rotate(img, 90, reshape=False)
    # Resize across z-axis
    img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order=1)
    return img


def process_scan(path):
    """Read and resize volume"""
    # Read scan
    volume = read_nifti_file(path)
    # Normalize
    volume = normalize(volume)
    # Resize width, height and depth
    volume = resize_volume(volume)
    return volume

In [5]:
normal_scan_paths = [
    os.path.join(os.getcwd(), "MosMedData/CT-0", x)
    for x in os.listdir("MosMedData/CT-0")
]

# Folder "CT-23" consist of CT scans having several ground-glass opacifications,
# involvement of lung parenchyma.
abnormal_scan_paths = [
    os.path.join(os.getcwd(), "MosMedData/CT-23", x)
    for x in os.listdir("MosMedData/CT-23")
]

print(normal_scan_paths[:2])
print(abnormal_scan_paths[:2])


['/content/MosMedData/CT-0/study_0004.nii.gz', '/content/MosMedData/CT-0/study_0085.nii.gz']
['/content/MosMedData/CT-23/study_0941.nii.gz', '/content/MosMedData/CT-23/study_0942.nii.gz']


In [10]:
def split_ct_scan(volume, num_slices=8):
    """
    Splits a 3D CT scan (depth = 64) into 8 batches of 8 slices each.

    Args:
        volume (numpy array): A single 3D CT scan of shape (H, W, D).
        num_slices (int): Number of slices per batch.

    Returns:
        numpy array: Shape (8, H, W, num_slices).
    """
    H, W, D = volume.shape  # Extract height, width, depth (64 slices)

    # Ensure depth is evenly divisible by num_slices
    assert D % num_slices == 0, "Depth must be divisible by the number of slices per batch"

    # Split into 8 batches of 8 slices each
    split_volume = np.stack([volume[:, :, i * num_slices:(i + 1) * num_slices] for i in range(D // num_slices)], axis=0)

    return split_volume  # Shape: (8, H, W, num_slices)


In [None]:
abnormal_scans = np.array([split_ct_scan(process_scan(path)) for path in abnormal_scan_paths])
normal_scans = np.array([split_ct_scan(process_scan(path)) for path in normal_scan_paths])

# Reshape to flatten batches: (num_scans * 8, H, W, 8)
abnormal_scans = abnormal_scans.reshape(-1, *abnormal_scans.shape[2:])
normal_scans = normal_scans.reshape(-1, *normal_scans.shape[2:])

# For the CT scans having presence of viral pneumonia
# assign 1, for the normal ones assign 0.
abnormal_labels = np.array([1 for _ in range(len(abnormal_scans))])
normal_labels = np.array([0 for _ in range(len(normal_scans))])

print(normal_scans.shape)
print(normal_labels.shape)
print("---------------------")
print(abnormal_scans.shape)
print(abnormal_labels.shape)

