In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import pandas as pd 
import pydicom


In [3]:
# the path to the dataset

train_path = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/'

test_description = pd.read_csv(train_path + 'test_series_descriptions.csv')


In [4]:
test_description.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   study_id            3 non-null      int64 
 1   series_id           3 non-null      int64 
 2   series_description  3 non-null      object
dtypes: int64(2), object(1)
memory usage: 200.0+ bytes


In [5]:
import os
import cv2
import matplotlib.pyplot as plt

# Function to generate image paths based on directory structure
def generate_image_paths(df, data_dir):
    image_paths = []
    for study_id, series_id in zip(df['study_id'], df['series_id']):
        study_dir = os.path.join(data_dir, str(study_id))
        series_dir = os.path.join(study_dir, str(series_id))
        images = os.listdir(series_dir)
        image_paths.extend([os.path.join(series_dir, img) for img in images])
    return image_paths


test_image_paths = generate_image_paths(test_description, f'{train_path}/test_images')

# Function to visualize images using OpenCV
def visualize_image(image_path):
    # Read the image using OpenCV
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    # OpenCV reads images in BGR format, so no need to convert for grayscale
    if img is None:
        print(f"Image not found at path: {image_path}")
        return
    
    # Display the image using Matplotlib for better color support
    plt.imshow(img, cmap='rgb')
    plt.title(f"Image: {os.path.basename(image_path)}")
    plt.axis('off')  # Hide axis for better visualization
    plt.show()

## mapping conditions

In [6]:
condition_mapping = {
    'Sagittal T1': {'left': 'left_neural_foraminal_narrowing', 'right': 'right_neural_foraminal_narrowing'},
    'Axial T2': {'left': 'left_subarticular_stenosis', 'right': 'right_subarticular_stenosis'},
    'Sagittal T2/STIR': 'spinal_canal_stenosis'
}


In [7]:
base_path = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images/'


In [8]:
def get_image_paths(row):
    series_path = os.path.join(base_path, str(row['study_id']), str(row['series_id']))
    if os.path.exists(series_path):
        return [os.path.join(series_path, f) for f in os.listdir(series_path) if os.path.isfile(os.path.join(series_path, f))]
    return []


In [9]:
expanded_rows = []
for index, row in test_description.iterrows():
    image_paths = get_image_paths(row)
    conditions = condition_mapping.get(row['series_description'], {})
    if isinstance(conditions, str):  # Single condition
        conditions = {'left': conditions, 'right': conditions}
    for side, condition in conditions.items():
        for image_path in image_paths:
            expanded_rows.append({
                'study_id': row['study_id'],
                'series_id': row['series_id'],
                'series_description': row['series_description'],
                'image_path': image_path,
                'condition': condition,
                'row_id': f"{row['study_id']}_{condition}"
            })


In [10]:
test_df = pd.DataFrame(expanded_rows)

In [11]:
# Levels for row_id
levels = ['l1_l2', 'l2_l3', 'l3_l4', 'l4_l5', 'l5_s1']

# update row_id with levels
def update_row_id(row, levels):
    level = levels[row.name % len(levels)]  
    return f"{row['study_id']}_{row['condition']}_{level}"

# Update row_id in expanded_test_desc to include levels
test_df['row_id'] = test_df.apply(lambda row: update_row_id(row, levels), axis=1)

test_df.head()

Unnamed: 0,study_id,series_id,series_description,image_path,condition,row_id
0,44036939,2828203845,Sagittal T1,/kaggle/input/rsna-2024-lumbar-spine-degenerat...,left_neural_foraminal_narrowing,44036939_left_neural_foraminal_narrowing_l1_l2
1,44036939,2828203845,Sagittal T1,/kaggle/input/rsna-2024-lumbar-spine-degenerat...,left_neural_foraminal_narrowing,44036939_left_neural_foraminal_narrowing_l2_l3
2,44036939,2828203845,Sagittal T1,/kaggle/input/rsna-2024-lumbar-spine-degenerat...,left_neural_foraminal_narrowing,44036939_left_neural_foraminal_narrowing_l3_l4
3,44036939,2828203845,Sagittal T1,/kaggle/input/rsna-2024-lumbar-spine-degenerat...,left_neural_foraminal_narrowing,44036939_left_neural_foraminal_narrowing_l4_l5
4,44036939,2828203845,Sagittal T1,/kaggle/input/rsna-2024-lumbar-spine-degenerat...,left_neural_foraminal_narrowing,44036939_left_neural_foraminal_narrowing_l5_s1


In [12]:
class TestDataset:
    def __init__(self, dataframe, batch_size=16, image_size=(256, 256), normalize=False):
        self.dataframe = dataframe
        self.batch_size = batch_size
        self.image_size = image_size
        self.normalize = normalize

    def load_image(self, image_path):
        if image_path.lower().endswith('.dcm'):
            dicom = pydicom.dcmread(image_path, force=True)
            image = dicom.pixel_array
        else:
            image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
            if image is None:
                raise FileNotFoundError(f"Could not load image from {image_path}")

        # Convert image to uint8 if necessary
        if image.dtype != np.uint8:
            image = image.astype(np.uint8)

        # If the image is grayscale, stack it to make 3 channels
        if len(image.shape) == 2:
            image = np.stack([image] * 3, axis=-1)

        # Normalize the image if the flag is set
        if self.normalize:
            image = image / 255.0  # Normalization to [0, 1]

        return image

    def __getitem__(self, index):
        # Get the starting index for this batch
        start_index = index * self.batch_size
        end_index = min((index + 1) * self.batch_size, len(self.dataframe))

        images = []
        row_ids = []

        for i in range(start_index, end_index):
            row = self.dataframe.iloc[i]
            image_path = row['image_path']
            row_id = row['row_id']

            # Load and resize image
            image = self.load_image(image_path)
            image = cv2.resize(image, self.image_size)

            images.append(image)
            row_ids.append(row_id)

        # Convert the list of images and row_ids to numpy arrays
        images = np.array(images)
        row_ids = np.array(row_ids)

        return images, row_ids

    def __len__(self):
        # Number of batches per epoch
        return int(np.ceil(len(self.dataframe) / self.batch_size))


In [13]:
test_dataset = TestDataset(test_df,batch_size = 16, image_size=(256, 256), normalize=True)


In [14]:
# Load the saved model (make sure this is the correct path to your model)
from tensorflow import keras
model = keras.models.load_model("/kaggle/input/vgg_model_trained/keras/default/1/vgg_UF_model_20epoch.keras")


In [15]:
from tqdm import tqdm
import numpy as np
import pandas as pd

# Initialize results storage
results = {
    'row_id': [],
    'normal_mild': [],
    'moderate': [],
    'severe': []
}

batch_size = 16  

# Use tqdm to create a progress bar for the entire dataset
with tqdm(total=len(test_dataset), desc="Processing images") as pbar:
    # Iterate over batches of data
    for idx in range(len(test_dataset)):
        try:
            # Get a batch of images and corresponding row IDs
            images, row_ids = test_dataset[idx]

            # Ensure the images have the shape (batch_size, 256, 256, 3)
            if images.shape[1:] != (256, 256, 3):
                raise ValueError(f"Image batch shape is {images.shape}, expected (?, 256, 256, 3)")

            # Make predictions on the batch with verbose=0 to suppress output
            predictions = model.predict(images, verbose=0)  # Shape: (batch_size, num_classes)

            # Append results for each image in the batch
            for i in range(len(row_ids)):
                probs = predictions[i]

                results['row_id'].append(row_ids[i])
                results['normal_mild'].append(probs[0])  # Class 0: Normal/Mild
                results['moderate'].append(probs[1])     # Class 1: Moderate
                results['severe'].append(probs[2])       # Class 2: Severe

            # Update the progress bar by the batch size
            pbar.update(1)

        except Exception as e:
            print(f"Error processing index {idx}: {e}")

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Normalize probabilities to ensure they sum to 1
results_df[['normal_mild', 'moderate', 'severe']] = results_df[['normal_mild', 'moderate', 'severe']].div(
    results_df[['normal_mild', 'moderate', 'severe']].sum(axis=1), axis=0
)

# Save the results to a CSV file
results_df.to_csv('test_predictions.csv', index=False)


I0000 00:00:1727670608.005851      94 service.cc:145] XLA service 0x7b278000b980 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1727670608.005949      94 service.cc:153]   StreamExecutor device (0): Host, Default Version
I0000 00:00:1727670608.878027      94 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
Processing images: 100%|██████████| 13/13 [01:33<00:00,  7.21s/it]


In [16]:
results_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 194 entries, 0 to 193
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   row_id       194 non-null    object 
 1   normal_mild  194 non-null    float32
 2   moderate     194 non-null    float32
 3   severe       194 non-null    float32
dtypes: float32(3), object(1)
memory usage: 3.9+ KB


In [17]:
# Average results per row_id
averaged_results_df = results_df.groupby('row_id', as_index=False).mean()

# Normalize probabilities to ensure they sum to 1
sum_probs = averaged_results_df[['normal_mild', 'moderate', 'severe']].sum(axis=1)
averaged_results_df['normal_mild'] = averaged_results_df['normal_mild'] / sum_probs
averaged_results_df['moderate'] = averaged_results_df['moderate'] / sum_probs
averaged_results_df['severe'] = averaged_results_df['severe'] / sum_probs

# Check for any invalid values
if (averaged_results_df[['normal_mild', 'moderate', 'severe']] < 0).any().any():
    raise ValueError("Found negative probabilities in submission.")

In [18]:
submission_df = averaged_results_df[['row_id', 'normal_mild', 'moderate', 'severe']]
submission_df

Unnamed: 0,row_id,normal_mild,moderate,severe
0,44036939_left_neural_foraminal_narrowing_l1_l2,0.161456,0.815732,0.022812
1,44036939_left_neural_foraminal_narrowing_l2_l3,0.18221,0.790531,0.027259
2,44036939_left_neural_foraminal_narrowing_l3_l4,0.190244,0.775897,0.033859
3,44036939_left_neural_foraminal_narrowing_l4_l5,0.206779,0.75591,0.037311
4,44036939_left_neural_foraminal_narrowing_l5_s1,0.19424,0.766061,0.039699
5,44036939_left_subarticular_stenosis_l1_l2,0.321329,0.595876,0.082795
6,44036939_left_subarticular_stenosis_l2_l3,0.322015,0.586604,0.091381
7,44036939_left_subarticular_stenosis_l3_l4,0.295149,0.612055,0.092796
8,44036939_left_subarticular_stenosis_l4_l5,0.28578,0.614149,0.100071
9,44036939_left_subarticular_stenosis_l5_s1,0.327212,0.5667,0.106088


In [19]:
submission_df.to_csv('submission.csv', index=False)
print("Submission file saved as 'submission.csv'.")

Submission file saved as 'submission.csv'.


In [20]:
# Save the submission file
submission_df.to_csv('/kaggle/working/submission.csv', index=False)