In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import SimpleITK as sitk
import cv2

# image_dir="/content/drive/MyDrive/Biomedicine/AI Project 2024-2025/Train"
image_dir = "/kaggle/input/ds-lung-tumor/Train-20250104T155611Z-001/Train"

In [None]:
def load_nrrd(file_path):
    try:
        image = sitk.ReadImage(file_path)
        array = sitk.GetArrayFromImage(image)
        return array
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

def load_all_images():
    df_images = pd.read_excel("/kaggle/input/ds-lung-tumor/dataset_lung.xlsx") # Read the CSV file
    display(df_images)

    nodule_images = []
    full_images   = []
    labels        = []

    for index, row in df_images.iterrows():
        noduleslice_filename = row['Nodule']
        noduleslice_path        = os.path.join(image_dir, noduleslice_filename)
        noduleslice_image_array = load_nrrd(noduleslice_path)
        nodule_images.append(noduleslice_image_array)

        fullslice_filename = row['Full_slice']
        fullslice_path        = os.path.join(image_dir, fullslice_filename)
        fullslice_image_array = load_nrrd(fullslice_path)
        full_images.append(fullslice_image_array)
        
        label = row['TumorClass']
        labels.append(label)
   
    labels = np.array(labels)

    return full_images, nodule_images, labels

def gamma_transform(image, lower_bound, upper_bound, gamma):
    clipped_image    = np.clip(image, lower_bound, upper_bound)
    hu_min = np.min(clipped_image)
    hu_max = np.max(clipped_image)
    normalized_image = (clipped_image - hu_min) / (hu_max - hu_min) # Normalize to [0, 1]
    gamma_corrected  = np.power(normalized_image, gamma) # Apply gamma transformation
    gamma_corrected  = gamma_corrected * (hu_max - hu_min) + hu_min # Scale back to the original HU range
    return gamma_corrected

def plot_temp_images(images, labels_ids):
    fig, axes = plt.subplots(1, 7, figsize=(10, 3))
    for idx, ax in enumerate(axes.flatten()):
        ax.imshow(images[idx], cmap='gray')  # Display image
        ax.set_title(str(labels[labels_ids[idx]]))
        ax.title.set_size(14)
        ax.axis('off')  # Turn off axis
    plt.tight_layout()
    plt.show()

def gamma_transform(image, gamma):
    hu_min = np.min(image)
    hu_max = np.max(image)
    normalized_image = (image - hu_min) / (hu_max - hu_min) # Normalize to [0, 1]
    gamma_corrected  = np.power(normalized_image, gamma) # Apply gamma transformation
    # gamma_corrected = (gamma_corrected * 255).astype(np.uint8) # Scale back to the original HU range
    gamma_corrected  = gamma_corrected * (hu_max - hu_min) + hu_min # Scale back to the original HU range
    return gamma_corrected
    
def plot_chunck_images(images, labels_ids):
    fig, axes = plt.subplots(3, 7, figsize=(12, 6))
    for idx, ax in enumerate(axes.flatten()):
        ax.imshow(images[idx], cmap='gray')  # Display image
        ax.set_title(str(labels[labels_ids[idx]]))
        ax.title.set_size(14)
        ax.axis('off')  # Turn off axis
    plt.tight_layout()
    plt.show()

def plot_all_images(images, offset):
    if offset < 2300:
        fig, axes = plt.subplots(10, 10, figsize=(20, 20))
    else:
        fig, axes = plt.subplots(7, 9, figsize=(18, 18))
    for idx, ax in enumerate(axes.flatten()):
        i = offset+idx
        ax.imshow(images[i], cmap='gray')  # Display image
        ax.set_title(str(i) + ' ==> ' + str(labels[i]))
        ax.axis('off')  # Turn off axis
    plt.tight_layout()
    plt.show()

In [None]:
full_images, nodule_images, labels = load_all_images()
df_images = pd.read_excel("/kaggle/input/ds-lung-tumor/dataset_lung.xlsx") # Read the CSV file
class_counts = df_images['TumorClass'].value_counts() # Return a Series containing the frequency of each distinct row in the Dataframe.
print("\nClass Distribution:")
print(class_counts)

plt.figure(figsize=(7, 4))
sns.barplot(x=class_counts.index, y=class_counts.values, palette="viridis")
plt.title("Tumor Class Distribution")
plt.xlabel("Tumor Class")
plt.ylabel("Count")
plt.show()

In [None]:
min_hu = -1000
max_hu = +500
full_all_modified_slices = []
for img in full_images:

    # Mist reduction and gamma transformation
    temp_slice = np.clip(img, min_hu, max_hu)
    temp_slice = cv2.normalize(temp_slice, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX)
    temp_slice = gamma_transform(temp_slice, gamma=1.35)
    
    # Apply Adaptive Contrast Equalization
    clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8,8))
    temp_slice = np.uint8(temp_slice)
    temp_slice = clahe.apply(temp_slice)

    full_all_modified_slices.append(temp_slice)

offsets = list(range(0, 2400, 100))
for offset in offsets:
    plot_all_images(full_all_modified_slices, offset)

In [None]:
min_hu = -1000
max_hu = +500
nodule_all_modified_slices = []
for img in nodule_images:

    # Mist reduction and gamma transformation
    temp_slice = np.clip(img, min_hu, max_hu)
    temp_slice = cv2.normalize(temp_slice, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX)
    temp_slice = gamma_transform(temp_slice, gamma=1.35)
    
    # Apply Adaptive Contrast Equalization
    clahe = cv2.createCLAHE(clipLimit=2, tileGridSize=(8,8))
    temp_slice = np.uint8(temp_slice)
    temp_slice = clahe.apply(temp_slice)
    
    temp_slice = cv2.resize(temp_slice, (52, 52))

    nodule_all_modified_slices.append(temp_slice)
    
offsets = list(range(0, 2400, 100))
for offset in offsets:
    plot_all_images(nodule_all_modified_slices, offset)