# Region of interest

In [None]:
def crop_between_keypoints(img, x, y):
    height, width = img.shape
    
    x, y = int(x), int(y)
    
    # Calculate bounding box around the keypoint
    left = max(0, x - width // 2)
    right = min(width, x + width // 2)
    top = max(0, y - height // 2)
    bottom = min(height, y + height // 2)

    # Crop the image
    return img[top:bottom, left:right]


def crop_around_keypoint(img, keypoint, width, height):
    h, w = img.shape
    
    x, y = int(keypoint[0]), int(keypoint[1])
    
    # Calculate bounding box around the keypoint
    left = max(0, x - width // 2)
    right = min(w, x + width // 2)
    top = max(0, y - height // 2)
    bottom = min(h, y + height // 2)
    
    # Crop the image
    return img[top:bottom, left:right]

def graph_plot(study_id, series_id):
    train_label_combinations = pl.DataFrame()
    for row in train_label.iter_rows():
        if row[0]==study_id:
            print(pl.DataFrame(row[:3]).transpose())
            data = pl.DataFrame(row[:3]).transpose()
            train_label_combinations=pl.concat([train_label_combinations, data])
    print(train_label_combinations)
    
    #rename columns
    train_label_combinations = train_label_combinations.rename({"column_0":"study_id", "column_1":"series_id", "column_2":"instance_number"})
    #extract unique combination
    train_label_combinations = train_label_combinations.unique(subset=["study_id", "series_id", "instance_number"]).sort(["study_id", "series_id", "instance_number"])
    
    instance_number_list = train_label_combinations.filter((pl.col("study_id")==study_id) & (pl.col("series_id")==series_id)).get_column("instance_number")
    #instance_number_list

    for instance_number in instance_number_list:
        #print(instance_number)
        print(f"=====study_id:{study_id}, series_id:{series_id}, instance_number:{instance_number}=====")
        #read image
        ds = pydicom.read_file(f'{INPUT_DIR}/train_images/{study_id}/{series_id}/{instance_number}.dcm')
        #draw original image
        df_plt = train_label.filter(
            (pl.col('study_id')==study_id)
            &(pl.col('series_id')==series_id)
            &(pl.col('instance_number')==instance_number)
        )
        plt.subplot(1,2,1)
        plt.imshow(ds.pixel_array, cmap='bone')
        #plt.title(f"study_id:{study_id}, series_id:{series_id}, instance_number:{instance_number}")

        #draw original image + label
        #draw image
        df_plt = train_label.filter(
            (pl.col('study_id')==study_id)
            &(pl.col('series_id')==series_id)
            &(pl.col('instance_number')==instance_number)
        )
        plt.subplot(1,2,2)
        plt.imshow(ds.pixel_array, cmap='bone')
        #plt.title(f"study_id:{study_id}, series_id:{series_id}, instance_number:{instance_number}")
        #draw rabel
        for row in df_plt.iter_rows():
            plt.scatter(row[-2], row[-1], color='red')
        plt.show()

In [None]:
import cv2
import os
import matplotlib.pyplot as plt
import pydicom as dicom

study_id, series_id = 4290709089, 3274612423
instance_number = 5
directory =f'{INPUT_DIR}/train_images/{study_id}/{series_id}/{instance_number}.dcm'

def paddingOrCroppingImage(directory):
    ds=dicom.dcmread(directory)
    
    dcm_data=ds.pixel_array
    # Example target size (height, width)
    target_height, target_width = 512, 512

    # Current image shape
    current_height, current_width = dcm_data.shape
    print(current_height, current_width)

    if current_height > target_height or current_width > target_width:
        crop_top = (current_height - target_height) // 2
        crop_bottom = crop_top + target_height
        crop_left = (current_width - target_width) // 2
        crop_right = crop_left + target_width

        cropped_image = dcm_data[crop_top:crop_bottom, crop_left:crop_right]
        return cropped_image
    else:
        # Calculate padding sizes
        pad_height = max(0, target_height - current_height)
        pad_width = max(0, target_width - current_width)

        # Calculate the padding to apply to each side
        pad_top = pad_height // 2
        pad_bottom = pad_height - pad_top
        pad_left = pad_width // 2
        pad_right = pad_width - pad_left


        # Apply padding with replicated borders
        padded_image = cv2.copyMakeBorder(
            dcm_data,
            pad_top,
            pad_bottom,
            pad_left,
            pad_right,
            cv2.BORDER_REPLICATE
        )

        # Update Dicom metadata
        ds.Rows, ds.Columns = padded_image.shape
        ds.PixelData = padded_image.tobytes()
        print(pad_top, pad_bottom, pad_left, pad_right)
        return padded_image
    

def reversed_images_cropping(image):
    current_height, current_width = image.shape
    target_height, target_width = 320, 320

    crop_top = (current_height - target_height) // 2
    crop_bottom = crop_top + target_height
    crop_left = (current_width - target_width) // 2
    crop_right = crop_left + target_width

    cropped_image = image[crop_top:crop_bottom, crop_left:crop_right]
    return cropped_image
    

dfs = train_label.filter(
            (pl.col('study_id')==study_id)
            &(pl.col('series_id')==series_id)
            &(pl.col('instance_number')==instance_number)
        )
for row in dfs.iter_rows():
        plt.scatter(row[-2], row[-1], color='red')

        
updated_image = paddingOrCroppingImage(directory)
reversed_image = reversed_images_cropping(updated_image)

plt.figure(figsize=(10, 5))  # Adjust figure size if needed

# Plot the updated (cropped/padded) image on the left
plt.subplot(1, 2, 1)
plt.imshow(updated_image, cmap='gray')  # Use 'gray' colormap for grayscale images
plt.title('Updated Image')
plt.axis('off')  # Turn off axis labels

# Plot the reversed image on the right
plt.subplot(1, 2, 2)
plt.imshow(reversed_image, cmap='gray')
plt.title('Reversed Image')
plt.axis('off')  # Turn off axis labels

# Show the plot with both images
plt.show()

# Creating database

In [None]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import pydicom as dicom
from sklearn.impute import SimpleImputer

def structure_for_train_csv(condition):
    condition = condition.lower().replace(' ', '_').replace('/', '_')
    return condition
class SpinalDataset(Dataset):
    def __init__(self, root_dir, coordinates_file, train, train_data, transform=None):
        self.root_dir = root_dir
        self.coordinates = pd.read_csv(coordinates_file)
        self.train_data = pd.read_csv(train_data)
        self.train = train
        self.transform = transform

        # Perform imputation on the training data
        self.train_data = self.impute_missing_values(self.train_data)

        # Define label encoder and one hot encoder
        self.label_encoder = LabelEncoder()
        self.onehot_encoder = OneHotEncoder(sparse_output=False)

        # Fit the label encoder and one hot encoder
        conditions = ["Normal/Mild", "Moderate", "Severe"]

        self.label_encoder.fit(conditions)
        integer_encoded = self.label_encoder.transform(conditions).reshape(-1, 1)
        self.onehot_encoder.fit(integer_encoded)

        # Define sample weights
        self.weights = {"Normal/Mild": 1, "Moderate": 2, "Severe": 4}

    def __len__(self):
        return len(self.coordinates)
    
    def __getitem__(self, idx):
        row = self.coordinates.iloc[idx]
        study_id = row['study_id']
        series_id = row['series_id']
        instance = row['instance_number']
        condition = row['condition']
        level = row['level']
        x = row['x']
        y = row['y'] 

        # Construct the path to the DICOM
        dicom_file_path = os.path.join(self.root_dir, self.train, str(study_id), str(series_id), f"{instance}.dcm")
        
       # Load and process the DICOM image
        images, crop_top, crop_left = self.load_and_process_dicom_image(dicom_file_path)

        if self.transform:
            images = self.transform(images)

        # Adjust coordinates based on whether the image was cropped or padded
        if crop_top >= 0 and crop_left >= 0:
            adjusted_x, adjusted_y = self.adjust_coordinates_after_cropping(x, y, crop_top, crop_left)
        else:
            adjusted_x, adjusted_y = self.adjust_coordinates_after_padding(x, y, -crop_top, -crop_left)

        # Extract condition for the specified level
        condition_column = f'{condition}_{level}'
        condition_column = structure_for_train_csv(condition_column)
        label_str = self.train_data.loc[self.train_data['study_id'] == study_id, condition_column].values[0]

        # Encode the label
        label_encoded = self.label_encoder.transform([label_str])
        label_onehot = self.onehot_encoder.transform(label_encoded.reshape(-1, 1))
        label = torch.tensor(label_onehot, dtype=torch.float32).squeeze()

        print(f"Label string: {label_str}")
        
        # Calculate weight for the sample
        weight = self.weights.get(label_str, 1)  # Default to 1 if condition not found

        return images, label, weight, adjusted_x, adjusted_y
    
    def load_and_process_dicom_image(self, file_path):
        dicom = pydicom.dcmread(file_path)
        image = dicom.pixel_array
        
        # Normalize the pixel values to [0, 1]
        image = image.astype(np.float32) / image.max()

        # Crop or pad the image as needed
        if image.shape[0] > 512 or image.shape[1] > 512:
            image, crop_top, crop_left = self.crop_image(image)
        else:
            image, crop_top, crop_left = self.pad_image(image)
        
        return image, crop_top, crop_left


    def crop_image(self, image):
        target_height, target_width = 512, 512
        current_height, current_width = image.shape

        crop_top = (current_height - target_height) // 2
        crop_bottom = crop_top + target_height
        crop_left = (current_width - target_width) // 2
        crop_right = crop_left + target_width

        cropped_image = image[crop_top:crop_bottom, crop_left:crop_right]
        return cropped_image, crop_top, crop_left

    def pad_image(self, image):
        target_height, target_width = 512, 512
        current_height, current_width = image.shape

        pad_height = max(0, target_height - current_height)
        pad_width = max(0, target_width - current_width)

        pad_top = pad_height // 2
        pad_bottom = pad_height - pad_top
        pad_left = pad_width // 2
        pad_right = pad_width - pad_left

        padded_image = cv2.copyMakeBorder(
            image,
            pad_top,
            pad_bottom,
            pad_left,
            pad_right,
            cv2.BORDER_REPLICATE
        )
        # Ensure final image size is exactly 512x512
        padded_image = cv2.resize(padded_image, (512, 512), interpolation=cv2.INTER_LINEAR)
        
        return padded_image, -pad_top, -pad_left


    def adjust_coordinates_after_cropping(self, x, y, crop_top, crop_left):
        """
        Adjusts x and y coordinates after cropping.
        """
        adjusted_x = x - crop_left
        adjusted_y = y - crop_top
        return adjusted_x, adjusted_y

    def adjust_coordinates_after_padding(self, x, y, pad_top, pad_left):
        """
        Adjusts x and y coordinates after padding.
        """
        adjusted_x = x + pad_left
        adjusted_y = y + pad_top
        return adjusted_x, adjusted_y
    
    def impute_missing_values(self, df):
        # Select categorical columns for imputation
        categorical_columns = df.select_dtypes(include=['object']).columns

        # Initialize the SimpleImputer for categorical columns
        categorical_imputer = SimpleImputer(strategy='most_frequent')

        # Fit and transform the categorical columns
        df[categorical_columns] = categorical_imputer.fit_transform(df[categorical_columns])

        return df



from torch.utils.data import DataLoader

# Example usage
dataset = SpinalDataset(root_dir=INPUT_DIR, 
                        coordinates_file=f'{INPUT_DIR}/train_label_coordinates.csv', 
                        train='train_images',
                        train_data=f'{INPUT_DIR}/train.csv')

dataloader = DataLoader(dataset, batch_size=5, shuffle=True)

for images, labels, weights, x, y in dataloader:
    print("Images shape:", images.shape)
    print("Labels:", labels)
    print("Weights:", weights)
    print(f"x: {x}")
    print(f"y: {y}")

# Checking and adding padding and cropping

In [None]:
import cv2
import os
import matplotlib.pyplot as plt
import pydicom as dicom
import polars as pl

def paddingOrCroppingImage(dcm_data, target_height=512, target_width=512):
    current_height, current_width = dcm_data.shape

    if current_height > target_height or current_width > target_width:
        # Cropping
        crop_top = (current_height - target_height) // 2
        crop_bottom = crop_top + target_height
        crop_left = (current_width - target_width) // 2
        crop_right = crop_left + target_width

        cropped_image = dcm_data[crop_top:crop_bottom, crop_left:crop_right]
        return cropped_image, crop_top, crop_left
    else:
        # Padding
        pad_height = max(0, target_height - current_height)
        pad_width = max(0, target_width - current_width)

        pad_top = pad_height // 2
        pad_bottom = pad_height - pad_top
        pad_left = pad_width // 2
        pad_right = pad_width - pad_left

        padded_image = cv2.copyMakeBorder(
            dcm_data,
            pad_top,
            pad_bottom,
            pad_left,
            pad_right,
            cv2.BORDER_REPLICATE
        )
        return padded_image, -pad_top, -pad_left  # Return negative padding as we need to add these to coordinates

def reversed_images_cropping(image, target_height=320, target_width=320):
    current_height, current_width = image.shape

    crop_top = (current_height - target_height) // 2
    crop_bottom = crop_top + target_height
    crop_left = (current_width - target_width) // 2
    crop_right = crop_left + target_width

    cropped_image = image[crop_top:crop_bottom, crop_left:crop_right]
    return cropped_image

def graph_plot(study_id, series_id):
    instance_number_list = train_label.filter(
        (pl.col("study_id") == study_id) & 
        (pl.col("series_id") == series_id)
    ).select("instance_number").unique().sort("instance_number").to_series().to_list()

    for instance_number in instance_number_list:
        ds = dicom.dcmread(f'{INPUT_DIR}/train_images/{study_id}/{series_id}/{instance_number}.dcm')
        original_image = ds.pixel_array

        updated_image, crop_top, crop_left = paddingOrCroppingImage(original_image)
        reversed_image = reversed_images_cropping(updated_image)

        df_plt = train_label.filter(
            (pl.col('study_id') == study_id) &
            (pl.col('series_id') == series_id) &
            (pl.col('instance_number') == instance_number)
        )

        plt.figure(figsize=(15, 5))

        # Original Image
        plt.subplot(1, 3, 1)
        plt.imshow(original_image, cmap='gray')
        plt.title('Original Image')
        plt.axis('off')
        for row in df_plt.iter_rows():
            plt.scatter(row[-2], row[-1], color='red')  # Coordinates for original image

        # Updated Image (Cropped/Padded)
        plt.subplot(1, 3, 2)
        plt.imshow(updated_image, cmap='gray')
        plt.title('Updated Image')
        plt.axis('off')
        for row in df_plt.iter_rows():
            plt.scatter(row[-2] - crop_left, row[-1] - crop_top, color='red')  # Adjusted coordinates for padding/cropping

        # Reversed Image (Final Cropping)
        plt.subplot(1, 3, 3)
        plt.imshow(reversed_image, cmap='gray')
        plt.title('Reversed Image')
        plt.axis('off')

        # Adjust coordinates for the final cropping
        reverse_crop_top = (updated_image.shape[0] - reversed_image.shape[0]) // 2
        reverse_crop_left = (updated_image.shape[1] - reversed_image.shape[1]) // 2

        for row in df_plt.iter_rows():
            adjusted_x = row[-2] - crop_left - reverse_crop_left
            adjusted_y = row[-1] - crop_top - reverse_crop_top
            print(adjusted_x, adjusted_x)
            plt.scatter(adjusted_x, adjusted_y, color='red')  # Adjusted for final cropping

        plt.show()

# Example usage:
study_id, series_id = 4290709089, 3274612423
graph_plot(study_id, series_id)



# Counting Nnumber of images and min and maxheight

In [None]:
# Path to image directory
image_dir = "rsna-2024-lumbar-spine-degenerative-classification/train_images/"

def findingShape():
    # Initialize variable to store minimum width and minimum height
    # min_width, min_height = float('inf'), float('inf')
    total_height, total_width, num_images = 0, 0, 0
    maxheight, maxwidth = 0, 0

    # Iterate through all images in the directoey
    for image_name in os.listdir(image_dir):
        study_dir = os.path.join(image_dir, image_name)
        if not os.path.isdir(study_dir):
            continue

        for series_id in os.listdir(study_dir):
            series_dir = os.path.join(study_dir, series_id)
            if not os.path.isdir(series_dir):
                continue

            for dicom_file in os.listdir(series_dir):
                dicom_path = os.path.join(series_dir, dicom_file)

                try:
                    ds = pydicom.dcmread(dicom_path)
                    image_array = ds.pixel_array
                    
                    if len(image_array.shape) == 2:  # Grayscale size
                        height, width = image_array.shape
                        print(f"Height: {image_array.shape[0]}, width: {image_array.shape[1]}, Image count")

                    elif len(image_array.shape) == 3:  # Color including 
                        height, width, _ = image_array.shape
                        print(dicom_path)
                        
                    else:
                        raise ValueError(f"Unexpected image shape: {image_array.shape}")

                    # Accumulate total size
                    maxheight = max(maxheight, height)
                    maxwidth = max(maxwidth, width)
                    total_width += width
                    total_height += height
                    num_images += 1
                    print(num_images)
                except Exception as e:
                    print(f"Failed to process {dicom_path}: {e}")



    # Calculate average 
    if num_images > 0:
        avg_width = total_width // num_images
        avg_height = total_height // num_images
        print(f"Average Width: {avg_width}, Average Height: {avg_height}, maxheight: {maxheight}, maxwidth: {maxwidth}")
    else: 
        print("No images")

findingShape()

# Testing Auto hot coding

In [None]:
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Define the conditions
conditions = ["Normal/Mild", "Moderate", "Severe"]

# Initialize and fit the LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(conditions)

# Convert labels to numerical values
integer_encoded = label_encoder.transform(conditions).reshape(-1, 1)

# Initialize and fit the OneHotEncoder
onehot_encoder = OneHotEncoder(sparse_output=False)
onehot_encoder.fit(integer_encoded)

# Example labels to encode
labels = ["Normal/Mild", "Moderate", "Severe"]

# Convert to numerical labels
integer_encoded = label_encoder.transform(labels).reshape(-1, 1)

# Convert to one-hot encoded vectors
onehot_encoded = onehot_encoder.transform(integer_encoded)

print("Integer Encoded:")
print(integer_encoded)

print("One-Hot Encoded:")
print(onehot_encoded)

# Creating first datset

In [None]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
import pydicom as dicom
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import cv2

def structure_for_train_csv(condition):
        condition = condition.lower().replace(' ', '_').replace('/', '_')
        return condition

class SpinalDataset(Dataset):
    def __init__(self, root_dir, coordinates_file, train, train_data, transform=None): # Setup the necessary attributes
        self.root_dir = root_dir
        self.coordinates = pd.read_csv(coordinates_file)
        self.train_data = pd.read_csv(train_data)
        self.train = train
        self.transform = transform

        # Define label encoder and one hot encoder
        self.label_encoder = LabelEncoder()
        self.onehot_encoder = OneHotEncoder(sparse_output=False)

        # Fit the label encoder and one hot encoder
        conditions = ["Normal/Mild", "Moderate", "Severe"]

        self.label_encoder.fit(conditions)
        integer_encoded = self.label_encoder.transform(conditions).reshape(-1, 1)
        self.onehot_encoder.fit(integer_encoded)

        # Define sample weights
        self.weights = {"Normal/Mild": 1, "Moderate": 2, "Severe": 4}

    def __len__(self): # Returns the length of the Dataframe. More specifically numbers of rows in the dataset
        return len(self.coordinates)
    
    def __getitem__(self, idx): # This method retrieves a single sample (images and label) from the dataset at the specified index (idx).
        row = self.coordinates.iloc[idx]
        study_id = row['study_id']
        series_id = row['series_id']
        instance = row['instance_number']
        condition = row['condition']
        level = row['level']
        x = row['x']
        y = row['y'] 

        # Construct the path to the DICOM
        dicom_file_path = os.path.join(self.root_dir, self.train, str(study_id), str(series_id), f"{instance}.dcm")
        
        # Load the DICOM image, normalize it, and apply cropping or padding
        # Change Needed: Instead of calling `self.load_dicom_image`, call `self.load_and_process_dicom_image` to include cropping/padding.
        images = self.load_and_process_dicom_image(dicom_file_path)

        if self.transform:
            images = self.transform(images)

        # Extract condition for the specified level
        condition_column = f'{condition}_{level}'
        condition_column = structure_for_train_csv(condition_column)
        label_str = self.train_data.loc[self.train_data['study_id'] == study_id, condition_column].values[0]

        # Encode the label
        label_encoded = self.label_encoder.transform([label_str])
        label_onehot = self.onehot_encoder.transform(label_encoded.reshape(-1, 1))
        label = torch.tensor(label_onehot, dtype=torch.float32).squeeze()

        print(f"Label string: {label_str}")
        
        # Calculate weight for the sample
        weight = self.weights.get(label_str, 1)  # Default to 1 if condition not found

        return images, label, weight  
    
    # Change Needed: Update this function to include the padding and cropping functionality
    def load_and_process_dicom_image(self, file_path):
        dicom = pydicom.dcmread(file_path)
        image = dicom.pixel_array
        
        # Normalize the pixel values to [0, 1]
        image = image.astype(np.float32) / image.max()

        # Crop or pad the image as needed
        if image.shape[0] > 512 or image.shape[1] > 512:
            image, crop_top, crop_left = self.crop_image(image)
        else:
            image, crop_top, crop_left = self.pad_image(image)
        
        return image, crop_top, crop_left

    # Change Needed: Update this method signature to accept an image array directly
    def paddingOrCroppingImage(self, dcm_data):
        # Example target size (height, width)
        target_height, target_width = 512, 512

        # Current image shape
        current_height, current_width = dcm_data.shape

        if current_height > target_height or current_width > target_width:
            crop_top = (current_height - target_height) // 2
            crop_bottom = crop_top + target_height
            crop_left = (current_width - target_width) // 2
            crop_right = crop_left + target_width

            cropped_image = dcm_data[crop_top:crop_bottom, crop_left:crop_right]
            return cropped_image
        else:
            # Calculate padding sizes
            pad_height = max(0, target_height - current_height)
            pad_width = max(0, target_width - current_width)

            # Calculate the padding to apply to each side
            pad_top = pad_height // 2
            pad_bottom = pad_height - pad_top
            pad_left = pad_width // 2
            pad_right = pad_width - pad_left

            # Apply padding with replicated borders
            padded_image = cv2.copyMakeBorder(
                dcm_data,
                pad_top,
                pad_bottom,
                pad_left,
                pad_right,
                cv2.BORDER_REPLICATE
            )

            return padded_image  # Change Needed: Return the padded image directly without updating the DICOM metadata
    
    def adjust_coordinates(self, x, y):
        """
        Adjusts x and y coordinates after cropping or padding.
        """
        crop_left, crop_right, crop_top, crop_bottom =
        if crop_left or crop_top:
            # If cropped, adjust by subtracting crop offsets
            adjusted_x = x - crop_left - crop_right
            adjusted_y = y - crop_top - crop_bottom
        else:
            # If padded, adjust by adding padding offsets
            adjusted_x = x + pad_left + pad_right
            adjusted_y = y + pad_top + pad_bottom

        return adjusted_x, adjusted_y

# Example usage
dataset = SpinalDataset(root_dir=INPUT_DIR, 
                        coordinates_file=f'{INPUT_DIR}/train_label_coordinates.csv', 
                        train='train_images',
                        train_data=f'{INPUT_DIR}/train.csv')

dataloader = DataLoader(dataset, batch_size=5, shuffle=True)

for images, labels, weights in dataloader:
    print("Images shape:", images.shape)
    print("Labels:", labels)
    print("Weights:", weights)
    break

# 