In [1]:
# packages

# standard
import numpy as np
import pandas as pd
import os
import time

# plots
import plotly.express as px
import seaborn as sns

# dicom
import pydicom as dicom
from sklearn.model_selection import train_test_split

In [2]:
# read data
df_train_main = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/train.csv')
df_train_label = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/train_label_coordinates.csv')
df_train_desc = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/train_series_descriptions.csv')
df_test_desc = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/test_series_descriptions.csv')
df_sub = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/sample_submission.csv')

In [None]:
import os
import pandas as pd
import numpy as np
import pydicom
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense, Input
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm

# Function to load and preprocess test images
def load_and_preprocess_images(image_dir, unique_id, target_size=(32, 32), images_per_batch=5):
    images = []
    subfolders = [f.path for f in os.scandir(os.path.join(image_dir, unique_id)) if f.is_dir()]

    for folder in subfolders:
        files = [f for f in os.listdir(folder) if f.endswith('.dcm')]
        for file in files:
            image_path = os.path.join(folder, file)
            try:
                # Load DICOM image
                dicom = pydicom.dcmread(image_path)
                image = dicom.pixel_array

                # Convert image to uint8 if necessary
                if image.dtype != np.uint8:
                    image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
                image = cv2.resize(image, target_size)
                image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)  # Convert to RGB

                images.append(image)
            except Exception as e:
                print(f"Error loading image {image_path}: {e}")

    # If fewer than required images, use padding
    if len(images) < images_per_batch:
        padding = [np.zeros((target_size[0], target_size[1], 3), dtype=np.float32)] * (images_per_batch - len(images))
        images.extend(padding)
    else:
        images = sorted(images, key=lambda x: np.random.random())[:images_per_batch]

    images = np.array(images).astype('float32') / 255.0
    return images

# Load all data and preprocess
def load_data(df, image_dir, target_size=(32, 32), images_per_batch=5):
    x_data = []
    y_data = []
    label_mapping = {
        "Normal/Mild": [0.5, 0.25, 0.25],
        "Moderate": [0.25, 0.5, 0.25],
        "Severe": [0.25, 0.25, 0.5]
    }

    for _, row in tqdm(df.iterrows(), total=len(df)):
        study_id = row['study_id']
        images = load_and_preprocess_images(image_dir, str(study_id), target_size, images_per_batch)
        x_data.append(images)

        labels = {}
        for condition in df.columns[1:]:
            original_label = row[condition]
            if not pd.isna(original_label):
                labels[condition] = label_mapping.get(original_label, None)

        y_data.append(labels)

    return np.array(x_data), y_data

# Define the function to train the model for a specific condition
def train_condition_model(condition_name, x_train, y_train, image_size=(32, 32), epochs=2, batch_size=10):
    # Filter the labels based on the condition and ensure both x_train and y_filtered are of the same length
    filtered_data = [(x, labels[condition_name]) for x, labels in zip(x_train, y_train) if condition_name in labels]
    x_filtered, y_filtered = zip(*filtered_data)

    # Convert to numpy arrays
    x_filtered = np.array(x_filtered)
    y_filtered = np.array(y_filtered)

    # Define the CNN-LSTM model with smaller filter sizes and LSTM units
    def create_model(input_shape):
        model = Sequential()
        model.add(Input(shape=input_shape))
        model.add(TimeDistributed(Conv2D(4, (3, 3), activation='relu')))
        model.add(TimeDistributed(Conv2D(4, (3, 3), activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))
        model.add(TimeDistributed(Conv2D(8, (3, 3), activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))
        model.add(TimeDistributed(Flatten()))
        model.add(LSTM(50))
        model.add(Dense(3, activation='softmax'))

        model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    # Prepare the training and validation data
    split_index = int(0.9 * len(x_filtered))
    x_train_split, x_val_split = x_filtered[:split_index], x_filtered[split_index:]
    y_train_split, y_val_split = y_filtered[:split_index], y_filtered[split_index:]

    # Create the model
    input_shape = (5, image_size[0], image_size[1], 3)
    model = create_model(input_shape)

    # Train the model
    model.fit(x_train_split, y_train_split, validation_data=(x_val_split, y_val_split), epochs=epochs, batch_size=batch_size)

    return model

# Load the DataFrame
df_train_main = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train.csv')
image_dir = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images'

# Load the entire dataset
x_train, y_train = load_data(df_train_main, image_dir, images_per_batch=5)
print("Data Loaded")

# Example usage
condition_name = 'spinal_canal_stenosis_l1_l2'
model = train_condition_model(condition_name, x_train, y_train, image_size=(32, 32), epochs=1, batch_size=10)


In [7]:

# List of conditions
conditions = ['left_neural_foraminal_narrowing_l1_l2',
              'left_neural_foraminal_narrowing_l2_l3',
              'left_neural_foraminal_narrowing_l3_l4',
              'left_neural_foraminal_narrowing_l4_l5',
              'left_neural_foraminal_narrowing_l5_s1',
              'left_subarticular_stenosis_l1_l2',
              'left_subarticular_stenosis_l2_l3',
              'left_subarticular_stenosis_l3_l4',
              'left_subarticular_stenosis_l4_l5',
              'left_subarticular_stenosis_l5_s1',
              'right_neural_foraminal_narrowing_l1_l2',
              'right_neural_foraminal_narrowing_l2_l3',
              'right_neural_foraminal_narrowing_l3_l4',
              'right_neural_foraminal_narrowing_l4_l5',
              'right_neural_foraminal_narrowing_l5_s1',
              'right_subarticular_stenosis_l1_l2',
              'right_subarticular_stenosis_l2_l3',
              'right_subarticular_stenosis_l3_l4',
              'right_subarticular_stenosis_l4_l5',
              'right_subarticular_stenosis_l5_s1',
              'spinal_canal_stenosis_l1_l2',
              'spinal_canal_stenosis_l2_l3',
              'spinal_canal_stenosis_l3_l4',
              'spinal_canal_stenosis_l4_l5',
              'spinal_canal_stenosis_l5_s1']

# Train one model for each condition
trained_models = {}

for condition in conditions:
    print(f"Training model for condition: {condition}")
    model = train_condition_model(condition, x_train, y_train, image_size=(32, 32), epochs=1, batch_size=10)
    trained_models[condition] = model

print("Training complete for all conditions.")

Training model for condition: left_neural_foraminal_narrowing_l1_l2
[1m178/178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - accuracy: 0.9569 - loss: 1.0522 - val_accuracy: 0.9848 - val_loss: 1.0425
Training model for condition: left_neural_foraminal_narrowing_l2_l3
[1m178/178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - accuracy: 0.7667 - loss: 1.0685 - val_accuracy: 0.9343 - val_loss: 1.0513
Training model for condition: left_neural_foraminal_narrowing_l3_l4
[1m178/178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 35ms/step - accuracy: 0.7503 - loss: 1.0741 - val_accuracy: 0.7879 - val_loss: 1.0693
Training model for condition: left_neural_foraminal_narrowing_l4_l5
[1m178/178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 31ms/step - accuracy: 0.5732 - loss: 1.0869 - val_accuracy: 0.6566 - val_loss: 1.0809
Training model for condition: left_neural_foraminal_narrowing_l5_s1
[1m178/178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

In [8]:
import pandas as pd
import numpy as np
import os
import pydicom
import cv2

# Define the path to your test images directory
test_images_dir = "/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images"

# Function to load and preprocess test images
def load_and_preprocess_images(image_dir, unique_id, target_size=(32, 32), images_per_batch=5):
    images = []
    subfolders = [f.path for f in os.scandir(os.path.join(image_dir, unique_id)) if f.is_dir()]

    for folder in subfolders:
        files = [f for f in os.listdir(folder) if f.endswith('.dcm')]
        for file in files:
            image_path = os.path.join(folder, file)
            try:
                # Load DICOM image
                dicom = pydicom.dcmread(image_path)
                image = dicom.pixel_array

                # Convert image to uint8 if necessary
                if image.dtype != np.uint8:
                    image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
                image = cv2.resize(image, target_size)
                image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)  # Convert to RGB

                images.append(image)
            except Exception as e:
                print(f"Error loading image {image_path}: {e}")

    # If fewer than required images, use padding
    if len(images) < images_per_batch:
        padding = [np.zeros((target_size[0], target_size[1], 3), dtype=np.float32)] * (images_per_batch - len(images))
        images.extend(padding)
    else:
        images = sorted(images, key=lambda x: np.random.random())[:images_per_batch]

    images = np.array(images).astype('float32') / 255.0
    return images

# List of conditions
conditions = ['left_neural_foraminal_narrowing_l1_l2',
              'left_neural_foraminal_narrowing_l2_l3',
              'left_neural_foraminal_narrowing_l3_l4',
              'left_neural_foraminal_narrowing_l4_l5',
              'left_neural_foraminal_narrowing_l5_s1',
              'left_subarticular_stenosis_l1_l2',
              'left_subarticular_stenosis_l2_l3',
              'left_subarticular_stenosis_l3_l4',
              'left_subarticular_stenosis_l4_l5',
              'left_subarticular_stenosis_l5_s1',
              'right_neural_foraminal_narrowing_l1_l2',
              'right_neural_foraminal_narrowing_l2_l3',
              'right_neural_foraminal_narrowing_l3_l4',
              'right_neural_foraminal_narrowing_l4_l5',
              'right_neural_foraminal_narrowing_l5_s1',
              'right_subarticular_stenosis_l1_l2',
              'right_subarticular_stenosis_l2_l3',
              'right_subarticular_stenosis_l3_l4',
              'right_subarticular_stenosis_l4_l5',
              'right_subarticular_stenosis_l5_s1',
              'spinal_canal_stenosis_l1_l2',
              'spinal_canal_stenosis_l2_l3',
              'spinal_canal_stenosis_l3_l4',
              'spinal_canal_stenosis_l4_l5',
              'spinal_canal_stenosis_l5_s1']

# Get all unique IDs from the test images directory
unique_ids = [folder for folder in os.listdir(test_images_dir) if os.path.isdir(os.path.join(test_images_dir, folder))]

# Generate the row_ids needed for submission by repeating each unique_id for each condition
row_ids = [f"{id}_{condition}" for id in unique_ids for condition in conditions]

# Create the submission DataFrame
df_submission = pd.DataFrame(row_ids, columns=['row_id'])
df_submission['normal_mild'] = 0.333333
df_submission['moderate'] = 0.333333
df_submission['severe'] = 0.333333

# Iterate over each unique ID and each condition to make predictions
for unique_id in unique_ids:
    for condition in conditions:
        row_id = f"{unique_id}_{condition}"
        try:
            # Load and preprocess test images
            images = load_and_preprocess_images(test_images_dir, unique_id, images_per_batch=5)

            # Get the corresponding trained model
            model = trained_models[condition]

            # Make prediction
            prediction = model.predict(np.expand_dims(images, axis=0))

            # Update the submission DataFrame
            df_submission.loc[df_submission['row_id'] == row_id, ['normal_mild', 'moderate', 'severe']] = prediction[0]
        except Exception as e:
            print(f"Error processing {row_id}: {e}")

print(df_submission.head())


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 537ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 491ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 474ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 500ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 489ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 478ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 488ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 483ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 496ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 482ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 494ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 481ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 492ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [9]:
df_submission.to_csv('submission.csv', index=False)