In [2]:
# packages

# standard
import numpy as np
import pandas as pd
import os
import time

# plots
import plotly.express as px
import seaborn as sns

# dicom
import pydicom as dicom
from sklearn.model_selection import train_test_split

# read data
df_train_main = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/train.csv')
df_train_label = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/train_label_coordinates.csv')
df_train_desc = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/train_series_descriptions.csv')
df_test_desc = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/test_series_descriptions.csv')
df_sub = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/sample_submission.csv')

In [13]:
import os
import pandas as pd
import numpy as np
import pydicom
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense, Input, Dropout
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm

# Function to load and preprocess test images
def load_and_preprocess_images(image_dir, unique_id, series_descriptions_df, target_size=(48, 48), images_per_type=5):
    images = []
    descriptions = ["Sagittal T1", "Sagittal T2/STIR", "Axial T2"]
    study_data = series_descriptions_df[series_descriptions_df['study_id'] == int(unique_id)]
    
    for description in descriptions:
        series = study_data[study_data['series_description'] == description]
        if not series.empty:
            series_id = series.iloc[0]['series_id']
            folder_path = os.path.join(image_dir, unique_id, str(series_id))
            if os.path.exists(folder_path):
                files = [f for f in os.listdir(folder_path) if f.endswith('.dcm')]
                if len(files) >= images_per_type:
                    selected_files = np.random.choice(files, images_per_type, replace=False)
                else:
                    selected_files = files + [None] * (images_per_type - len(files))
                
                for file in selected_files:
                    if file:
                        image_path = os.path.join(folder_path, file)
                        try:
                            # Load DICOM image
                            dicom = pydicom.dcmread(image_path)
                            image = dicom.pixel_array

                            # Convert image to uint8 if necessary
                            if image.dtype != np.uint8:
                                image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
                            image = cv2.resize(image, target_size)
                            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)  # Convert to RGB

                            images.append(image)
                        except Exception as e:
                            print(f"Error loading image {image_path}: {e}")
                            images.append(np.zeros((target_size[0], target_size[1], 3), dtype=np.float32))
                    else:
                        images.append(np.zeros((target_size[0], target_size[1], 3), dtype=np.float32))
            else:
                images.extend([np.zeros((target_size[0], target_size[1], 3), dtype=np.float32)] * images_per_type)
        else:
            images.extend([np.zeros((target_size[0], target_size[1], 3), dtype=np.float32)] * images_per_type)
    
    images = np.array(images).astype('float32') / 255.0
    return images

# Load all data and preprocess
def load_data(df, series_descriptions_df, image_dir, target_size=(48, 48), images_per_type=5):
    x_data = []
    y_data = []
    label_mapping = {
        "Normal/Mild": [0.6, 0.2, 0.2],
        "Moderate": [0.2, 0.6, 0.2],
        "Severe": [0.2, 0.2, 0.6]
    }
    x = 0
    for _, row in tqdm(df.iterrows(), total=len(df)):
        study_id = row['study_id']
        try:
            images = load_and_preprocess_images(image_dir, str(study_id), series_descriptions_df, target_size, images_per_type)
            x_data.append(images)

            labels = {}
            for condition in df.columns[1:]:
                original_label = row[condition]
                if not pd.isna(original_label):
                    labels[condition] = label_mapping.get(original_label, None)

            y_data.append(labels)
        except Exception as e:
            print(f"Error processing study_id {study_id}: {e}")
            continue

    return np.array(x_data), y_data

# Load the DataFrame
df_train_main = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train.csv')
df_train_desc = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_series_descriptions.csv')
image_dir = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images'

# Load the entire dataset
x_train, y_train = load_data(df_train_main, df_train_desc, image_dir, images_per_type=5)
print("Data Loaded")


  5%|▌         | 100/1975 [00:23<07:21,  4.24it/s]

Data Loaded





In [14]:

# Define the function to train the model for a specific condition
def train_condition_model(condition_name, x_train, y_train, image_size=(48, 48), epochs=2, batch_size=10, label_mapping=None, default='yes'):
    # Default label mapping
    default_label_mapping = {
        "Normal/Mild": [0.6, 0.2, 0.2],
        "Moderate": [0.2, 0.6, 0.2],
        "Severe": [0.2, 0.2, 0.6]
    }

    # Use default label mapping if not provided or if default is set to 'yes'
    if default == 'yes' or label_mapping is None:
        label_mapping = default_label_mapping

    # Convert all labels in y_train based on the provided mapping
    y_converted = []
    for labels in y_train:
        if condition_name in labels:
            original_label = tuple(labels[condition_name])  # Convert list to tuple
            mapped_label = label_mapping.get(original_label, default_label_mapping.get(original_label))
            if mapped_label is not None:
                y_converted.append(mapped_label)
            else:
                y_converted.append(labels[condition_name])
        else:
            y_converted.append([0, 0, 0])  # Fallback if condition_name not in labels

    x_filtered = np.array(x_train)
    y_filtered = np.array(y_converted)

    # Define the CNN-LSTM model with smaller filter sizes and LSTM units
    def create_model(input_shape):
        model = Sequential()
        model.add(Input(shape=input_shape))
        model.add(TimeDistributed(Conv2D(4, (3, 3), activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))
        model.add(TimeDistributed(Conv2D(4, (3, 3), activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))
        model.add(TimeDistributed(Conv2D(8, (3, 3), activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))
        model.add(TimeDistributed(Flatten()))
        model.add(Dropout(0.5))
        model.add(LSTM(100))
        model.add(Dense(3, activation='softmax'))

        model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    # Prepare the training and validation data
    split_index = int(0.9 * len(x_filtered))
    x_train_split, x_val_split = x_filtered[:split_index], x_filtered[split_index:]
    y_train_split, y_val_split = y_filtered[:split_index], y_filtered[split_index:]

    # Create the model
    input_shape = (15, image_size[0], image_size[1], 3)
    model = create_model(input_shape)

    # Train the model
    model.fit(x_train_split, y_train_split, validation_data=(x_val_split, y_val_split), epochs=epochs, batch_size=batch_size)

    return model

# Example usage with default label mapping
condition_name = 'spinal_canal_stenosis_l1_l2'
model = train_condition_model(condition_name, x_train, y_train, image_size=(48, 48), epochs=1, batch_size=10)

# Example usage with custom label mapping
custom_label_mapping = {
    "Normal/Mild": [0.4, 0.3, 0.3],
    "Moderate": [0.3, 0.4, 0.3],
    "Severe": [0.3, 0.3, 0.4]
}
model = train_condition_model('left_neural_foraminal_narrowing_l4_l5', x_train, y_train, image_size=(48, 48), epochs=1, batch_size=10, label_mapping=custom_label_mapping, default='no')


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 143ms/step - accuracy: 0.7799 - loss: 1.0205 - val_accuracy: 1.0000 - val_loss: 0.9591
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 141ms/step - accuracy: 0.4993 - loss: 1.0870 - val_accuracy: 0.6000 - val_loss: 1.0798


In [None]:
# List of conditions
conditions = [
    'left_neural_foraminal_narrowing_l1_l2',
    'left_neural_foraminal_narrowing_l2_l3',
    'left_neural_foraminal_narrowing_l3_l4',
    'left_neural_foraminal_narrowing_l5_s1',
    'left_subarticular_stenosis_l1_l2',
    'left_subarticular_stenosis_l2_l3',
    'right_neural_foraminal_narrowing_l1_l2',
    'right_neural_foraminal_narrowing_l2_l3',
    'right_neural_foraminal_narrowing_l3_l4',
    'right_neural_foraminal_narrowing_l5_s1',
    'right_subarticular_stenosis_l1_l2',
    'right_subarticular_stenosis_l2_l3',
    'right_subarticular_stenosis_l3_l4',
    'right_subarticular_stenosis_l5_s1',
    'spinal_canal_stenosis_l1_l2',
    'spinal_canal_stenosis_l2_l3',
    'spinal_canal_stenosis_l3_l4',
    'spinal_canal_stenosis_l4_l5',
    'spinal_canal_stenosis_l5_s1'
]

# Custom label mapping for specific conditions
custom_label_mapping = {
    "Normal/Mild": [0.4, 0.3, 0.3],
    "Moderate": [0.3, 0.4, 0.3],
    "Severe": [0.3, 0.3, 0.4]
}

# Conditions to use the custom label mapping
custom_conditions = [
    'left_neural_foraminal_narrowing_l4_l5',
    'left_subarticular_stenosis_l3_l4',
    'left_subarticular_stenosis_l4_l5',
    'left_subarticular_stenosis_l5_s1',
    'right_neural_foraminal_narrowing_l4_l5',
    'right_subarticular_stenosis_l4_l5'
]

# Train one model for each condition
trained_models = {}

for condition in conditions:
    print(f"Training model for condition: {condition}")
    if condition in custom_conditions:
        model = train_condition_model(condition, x_train, y_train, ids, df_train_label, image_size=(48, 48), epochs=5, batch_size=10, label_mapping=custom_label_mapping, default='no')
    else:
        model = train_condition_model(condition, x_train, y_train, ids, df_train_label, image_size=(48, 48), epochs=5, batch_size=10)
    trained_models[condition] = model

print("Training complete for all conditions.")


In [None]:
import gc
# Assuming x_train and y_train are already defined
del x_train
del y_train
# Explicitly run garbage collection to free up memory
gc.collect()
print("x_train and y_train deleted, and memory freed using gc.collect()")

In [None]:
import os
import pandas as pd
import numpy as np
import pydicom
import cv2

# Define the path to your test images directory
test_images_dir = "/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images"

# Load the test series descriptions
df_test_desc = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_series_descriptions.csv')

# Function to load and preprocess test images
def load_and_preprocess_images(image_dir, unique_id, series_descriptions_df, target_size=(48,48), images_per_type=5):
    images = []
    descriptions = ["Sagittal T1", "Sagittal T2/STIR", "Axial T2"]
    study_data = series_descriptions_df[series_descriptions_df['study_id'] == int(unique_id)]
    
    for description in descriptions:
        series = study_data[study_data['series_description'] == description]
        if not series.empty:
            series_id = series.iloc[0]['series_id']
            folder_path = os.path.join(image_dir, unique_id, str(series_id))
            if os.path.exists(folder_path):
                files = [f for f in os.listdir(folder_path) if f.endswith('.dcm')]
                if len(files) >= images_per_type:
                    selected_files = np.random.choice(files, images_per_type, replace=False)
                else:
                    selected_files = files + [None] * (images_per_type - len(files))
                
                for file in selected_files:
                    if file:
                        image_path = os.path.join(folder_path, file)
                        try:
                            # Load DICOM image
                            dicom = pydicom.dcmread(image_path)
                            image = dicom.pixel_array

                            # Convert image to uint8 if necessary
                            if image.dtype != np.uint8:
                                image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
                            image = cv2.resize(image, target_size)
                            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)  # Convert to RGB

                            images.append(image)
                        except Exception as e:
                            print(f"Error loading image {image_path}: {e}")
                            images.append(np.zeros((target_size[0], target_size[1], 3), dtype=np.float32))
                    else:
                        images.append(np.zeros((target_size[0], target_size[1], 3), dtype=np.float32))
            else:
                images.extend([np.zeros((target_size[0], target_size[1], 3), dtype=np.float32)] * images_per_type)
        else:
            images.extend([np.zeros((target_size[0], target_size[1], 3), dtype=np.float32)] * images_per_type)
    
    images = np.array(images).astype('float32') / 255.0
    return images

# List of conditions
conditions = ['left_neural_foraminal_narrowing_l1_l2',
              'left_neural_foraminal_narrowing_l2_l3',
              'left_neural_foraminal_narrowing_l3_l4',
              'left_neural_foraminal_narrowing_l4_l5',
              'left_neural_foraminal_narrowing_l5_s1',
              'left_subarticular_stenosis_l1_l2',
              'left_subarticular_stenosis_l2_l3',
              'left_subarticular_stenosis_l3_l4',
              'left_subarticular_stenosis_l4_l5',
              'left_subarticular_stenosis_l5_s1',
              'right_neural_foraminal_narrowing_l1_l2',
              'right_neural_foraminal_narrowing_l2_l3',
              'right_neural_foraminal_narrowing_l3_l4',
              'right_neural_foraminal_narrowing_l4_l5',
              'right_neural_foraminal_narrowing_l5_s1',
              'right_subarticular_stenosis_l1_l2',
              'right_subarticular_stenosis_l2_l3',
              'right_subarticular_stenosis_l3_l4',
              'right_subarticular_stenosis_l4_l5',
              'right_subarticular_stenosis_l5_s1',
              'spinal_canal_stenosis_l1_l2',
              'spinal_canal_stenosis_l2_l3',
              'spinal_canal_stenosis_l3_l4',
              'spinal_canal_stenosis_l4_l5',
              'spinal_canal_stenosis_l5_s1']

# Get all unique IDs from the test images directory
unique_ids = [folder for folder in os.listdir(test_images_dir) if os.path.isdir(os.path.join(test_images_dir, folder))]

# Generate the row_ids needed for submission by repeating each unique_id for each condition
row_ids = [f"{id}_{condition}" for id in unique_ids for condition in conditions]

# Create the submission DataFrame
df_submission = pd.DataFrame(row_ids, columns=['row_id'])
df_submission['normal_mild'] = 0.333333
df_submission['moderate'] = 0.333333
df_submission['severe'] = 0.333333

# Iterate over each unique ID and each condition to make predictions
for unique_id in unique_ids:
    for condition in conditions:
        row_id = f"{unique_id}_{condition}"
        try:
            # Load and preprocess test images
            images = load_and_preprocess_images(test_images_dir, unique_id, df_test_desc, images_per_type=5)

            # Get the corresponding trained model
            model = trained_models[condition]

            # Make prediction
            prediction = model.predict(np.expand_dims(images, axis=0))

            # Update the submission DataFrame
            df_submission.loc[df_submission['row_id'] == row_id, ['normal_mild', 'moderate', 'severe']] = prediction[0]
        except Exception as e:
            print(f"Error processing {row_id}: {e}")

print(df_submission.head())


In [None]:
df_submission.to_csv('submission.csv', index=False)

m