In [None]:
import numpy as np
import pandas as pd
import os
import pydicom
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
train  = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train.csv')
label = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_label_coordinates.csv')
train_desc = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_series_descriptions.csv')
test_desc = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_series_descriptions.csv')

In [None]:
# Preview the data
train.head()

In [None]:
train_desc.head()

In [None]:
train.isnull().sum()

In [None]:
# Function to generate image paths based on directory structure
def generate_image_paths(df, data_dir):
    image_paths = []
    for study_id, series_id in zip(df['study_id'], df['series_id']):
        study_dir = os.path.join(data_dir, str(study_id))
        series_dir = os.path.join(study_dir, str(series_id))
        images = os.listdir(series_dir)
        image_paths.extend([os.path.join(series_dir, img) for img in images])
    return image_paths

# Generate image paths for train and test data
train_image_paths = generate_image_paths(train_desc, '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images')
test_image_paths = generate_image_paths(test_desc, '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images')

In [None]:
len(train_desc)

In [None]:
len(train_image_paths)

In [None]:
def reshape_dataframe(df):
    # Create a list of columns to exclude
    exclude_columns = ['study_id', 'series_id', 'instance_number', 'x', 'y', 'series_description']
    
    # Filter the columns to process
    columns_to_process = [col for col in df.columns if col not in exclude_columns]
    
    # Split the columns into condition and level, extract severity, and concatenate to form the new DataFrame
    reshaped_df = pd.DataFrame([
        {
            'study_id': row['study_id'],
            'condition': ' '.join([word.capitalize() for word in col.split('_')[:-2]]),
            'level': col.split('_')[-2].capitalize() + '/' + col.split('_')[-1].capitalize(),
            'severity': row[col]
        }
        for _, row in df.iterrows()
        for col in columns_to_process
    ])
    
    return reshaped_df

# Reshape the DataFrame
new_train_df = reshape_dataframe(train)

# Display the first few rows of the reshaped DataFrame
new_train_df.head()

In [None]:
# Print columns in a neat way
print("\nColumns in new_train_df:")
print(",".join(new_train_df.columns))

print("\nColumns in label:")
print(",".join(label.columns))

print("\nColumns in test_desc:")
print(",".join(test_desc.columns))

In [None]:
# Merge the dataframes on the common columns
merged_df = pd.merge(new_train_df, label, on=['study_id', 'condition', 'level'], how='inner')
# Merge the dataframes on the common column 'series_id'
final_merged_df = pd.merge(merged_df, train_desc, on='series_id', how='inner')

In [None]:
# Merge the dataframes on the common column 'series_id'
final_merged_df = pd.merge(merged_df, train_desc, on=['series_id','study_id'], how='inner')
# Display the first few rows of the final merged dataframe
final_merged_df.head()

In [None]:
final_merged_df[final_merged_df['study_id'] == 4003253].sort_values(['x','y'],ascending = True)

In [None]:
final_merged_df[final_merged_df['series_id'] == 2448190387].sort_values("instance_number")

In [None]:
# Filter the dataframe for the given study_id and sort by instance_number
filtered_df = final_merged_df[final_merged_df['study_id'] == 4003253].sort_values("instance_number")

# Display the resulting dataframe
filtered_df

In [None]:
# Sort final_merged_df by study_id, series_id, and series_description
sorted_final_merged_df = final_merged_df[final_merged_df['study_id'] == 4003253].sort_values(by=['series_id', 'series_description', 'instance_number'])
sorted_final_merged_df

In medical imaging for spinal conditions, specific MRI sequences are often used to identify different types of spinal stenosis:

- **Sagittal T1-weighted images** are primarily utilized to evaluate **Neural Foraminal Narrowing**. 
- **Axial T2-weighted images** are crucial for assessing **Subarticular Stenosis**. 
- **Sagittal T2-weighted or STIR (Short Tau Inversion Recovery) images** are typically employed to detect and analyze **Spinal Canal Stenosis**.

These imaging sequences are chosen for their ability to provide the most relevant anatomical and pathological information for each specific type of stenosis.

In [None]:
# Create the row_id column
final_merged_df['row_id'] = (
    final_merged_df['study_id'].astype(str) + '_' +
    final_merged_df['condition'].str.lower().str.replace(' ', '_') + '_' +
    final_merged_df['level'].str.lower().str.replace('/', '_')
)

# Create the image_path column
final_merged_df['image_path'] = (
    '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/' + 
    final_merged_df['study_id'].astype(str) + '/' +
    final_merged_df['series_id'].astype(str) + '/' +
    final_merged_df['instance_number'].astype(str) + '.dcm'
)

# Note: Check image path, since there's 1 instance id, for 1 image, but there's many more images other than the ones labelled in the instance ID. 

# Display the updated dataframe
final_merged_df.head()

In [None]:
final_merged_df[final_merged_df["severity"] == "Normal/Mild"].value_counts().sum()

In [None]:
final_merged_df[final_merged_df["severity"] == "Moderate"].value_counts().sum()

In [None]:
# Define the base path for test images
base_path = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images'

# Function to get image paths for a series
def get_image_paths(row):
    series_path = os.path.join(base_path, str(row['study_id']), str(row['series_id']))
    if os.path.exists(series_path):
        return [os.path.join(series_path, f) for f in os.listdir(series_path) if os.path.isfile(os.path.join(series_path, f))]
    return []

# Mapping of series_description to conditions
condition_mapping = {
    'Sagittal T1': {'left': 'left_neural_foraminal_narrowing', 'right': 'right_neural_foraminal_narrowing'},
    'Axial T2': {'left': 'left_subarticular_stenosis', 'right': 'right_subarticular_stenosis'},
    'Sagittal T2/STIR': 'spinal_canal_stenosis'
}

# Create a list to store the expanded rows
expanded_rows = []

# Expand the dataframe by adding new rows for each file path
for index, row in test_desc.iterrows():
    image_paths = get_image_paths(row)
    conditions = condition_mapping.get(row['series_description'], {})
    if isinstance(conditions, str):  # Single condition
        conditions = {'left': conditions, 'right': conditions}
    for side, condition in conditions.items():
        for image_path in image_paths:
            expanded_rows.append({
                'study_id': row['study_id'],
                'series_id': row['series_id'],
                'series_description': row['series_description'],
                'image_path': image_path,
                'condition': condition,
                'row_id': f"{row['study_id']}_{condition}"
            })

# Create a new dataframe from the expanded rows
expanded_test_desc = pd.DataFrame(expanded_rows)

# Display the resulting dataframe
expanded_test_desc.head(5)

In [None]:
# Replace null values in the 'severity' column with 'Normal/Mild'
final_merged_df['severity'].fillna('Normal/Mild', inplace=True)

In [None]:
test_data = expanded_test_desc
train_data = final_merged_df

In [None]:
train_data.isnull().sum()

In [None]:
train_data

Checking whether there are any errors or outliers in the co-ordinates that are necessary to remove or not

# Data Visualizations

In [None]:
# Display basic statistics for 'x' and 'y' columns
x_stats = train_data['x'].describe()
y_stats = train_data['y'].describe()

print("X Coordinate Statistics:")
print(x_stats)

print("\nY Coordinate Statistics:")
print(y_stats)

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create a histogram for 'x' values
x_hist = go.Histogram(
    x=train_data['x'],
    nbinsx=30,
    name='X Coordinates',
    marker_color='blue',
    opacity=0.7
)

# Create a histogram for 'y' values
y_hist = go.Histogram(
    x=train_data['y'],
    nbinsx=30,
    name='Y Coordinates',
    marker_color='green',
    opacity=0.7
)

# Create a figure with subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=('Distribution of X Coordinates', 'Distribution of Y Coordinates'))

# Add the histograms to the figure
fig.add_trace(x_hist, row=1, col=1)
fig.add_trace(y_hist, row=1, col=2)

# Update layout for a cleaner look
fig.update_layout(
    title_text="Distribution of X and Y Coordinates",
    showlegend=False,
    xaxis_title="X Values",
    yaxis_title="Frequency",
    xaxis2_title="Y Values",
    yaxis2_title="Frequency",
    bargap=0.2,  # Gap between bars
)

# Show the plot
fig.show()

In [None]:
import plotly.express as px

# Calculate the count of each condition in the 'condition' column
condition_counts = train_data['condition'].value_counts()

# Create a bar chart using Plotly
fig = px.bar(
    x=condition_counts.index,
    y=condition_counts.values,
    labels={'x': 'Condition', 'y': 'Count'},
    title='Count of Each Condition'
)

# Display the interactive plot
fig.show()

In [None]:
# Count the occurrences of each severity level
severity_counts = train_data['severity'].value_counts()

# Create a bar chart using Plotly
fig_bar = px.bar(
    severity_counts,
    x=severity_counts.index,
    y=severity_counts.values,
    labels={'index': 'Severity', 'y': 'Count'},
    title='Count of Each Severity Level'
)

# Display the bar chart
fig_bar.show()

In [None]:
# Create a pie chart using Plotly
fig_pie = px.pie(
    severity_counts,
    names=severity_counts.index,
    values=severity_counts.values,
    title='Distribution of Severity Levels',
    hole=0.3  # For a donut chart, otherwise remove this parameter
)

# Display the pie chart
fig_pie.show()

In [None]:
import plotly.express as px

# Count the occurrences of each severity within each condition
severity_condition_counts = train_data.groupby(['condition', 'severity']).size().reset_index(name='count')

# Create a grouped bar chart
fig = px.bar(
    severity_condition_counts,
    x='condition',
    y='count',
    color='severity',
    barmode='group',
    title='Distribution of Severities for Each Condition',
    labels={'condition': 'Condition', 'count': 'Number of Cases', 'severity': 'Severity'},
    color_discrete_sequence=px.colors.qualitative.Set1  # Custom color sequence
)

# Update the layout for better presentation
fig.update_layout(
    xaxis_title='Condition',
    yaxis_title='Number of Cases',
    legend_title='Severity',
    bargap=0.15,
    bargroupgap=0.1
)

fig.show()

In [None]:
# Count the occurrences of each severity within each condition
severity_condition_counts = train_data.groupby(['condition', 'series_description']).size().reset_index(name='count')

# Create a grouped bar chart
fig = px.bar(
    severity_condition_counts,
    x='condition',
    y='count',
    color='series_description',
    barmode='group',
    title='Distribution of Condition for Respective Angle',
    labels={'condition': 'Condition', 'count': 'Number of Cases', 'series_description': 'Angle of MR Image'},
    color_discrete_sequence=px.colors.qualitative.Set1  # Custom color sequence
)

# Update the layout for better presentation
fig.update_layout(
    xaxis_title='Condition',
    yaxis_title='Number of Cases',
    legend_title='Angle',
    bargap=0.15,
    bargroupgap=0.1
)

fig.show()

In medical imaging for spinal conditions, specific MRI sequences are often used to identify different types of spinal stenosis:

- **Sagittal T1-weighted images** are primarily utilized to evaluate **Neural Foraminal Narrowing**. 
- **Axial T2-weighted images** are crucial for assessing **Subarticular Stenosis**. 
- **Sagittal T2-weighted or STIR (Short Tau Inversion Recovery) images** are typically employed to detect and analyze **Spinal Canal Stenosis**.

These imaging sequences are chosen for their ability to provide the most relevant anatomical and pathological information for each specific type of stenosis.

In [None]:
# Group by 'level' and 'condition' and count the occurrences
level_condition_counts = train_data.groupby(['condition', 'level']).size().reset_index(name='count')

# Create a grouped bar chart
fig = px.bar(
    level_condition_counts,
    x='condition',
    y='count',
    color='level',
    barmode='group',
    title='Distribution of Levels for Each Condition',
    labels={'condition': 'Condition', 'count': 'Number of Cases', 'level': 'Level'},
    color_discrete_sequence=px.colors.qualitative.Set1  # Custom color sequence
)

# Update the layout for better presentation
fig.update_layout(
    xaxis_title='Condition',
    yaxis_title='Number of Cases',
    legend_title='Level',
    bargap=0.15,
    bargroupgap=0.1
)

fig.show()

In [None]:
# Count the occurrences of each level within each condition
level_condition_counts = train_data.groupby(['level', 'condition']).size().reset_index(name='count')

# Create a pivot table to structure the data for the heatmap
heatmap_data = level_condition_counts.pivot(index='level', columns='condition', values='count')

# Create the heatmap
fig = px.imshow(
    heatmap_data,
    labels={'x': 'Condition', 'y': 'Level', 'color': 'Count'},
    title='Heatmap of Levels by Condition',
    color_continuous_scale='Viridis'
)

fig.show()

In [None]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# List of unique levels and conditions
levels = level_condition_counts['level'].unique()
conditions = level_condition_counts['condition'].unique()

# Define a color scale for the conditions
colors = px.colors.qualitative.Plotly

# Create a subplot figure with 1 row for each level
fig = make_subplots(
    rows=len(levels), cols=1,
    subplot_titles=[f"Condition Distribution for {level}" for level in levels],
    shared_xaxes=True,
    vertical_spacing=0.1
)

# Add a bar chart for each level in a separate subplot
for i, level in enumerate(levels):
    # Filter data for the current level
    level_data = level_condition_counts[level_condition_counts['level'] == level]
    
    # Create the bar chart for each condition within the level
    for j, condition in enumerate(conditions):
        condition_data = level_data[level_data['condition'] == condition]
        bar = go.Bar(
            x=condition_data['condition'],
            y=condition_data['count'],
            name=condition if i == 0 else None,  # Only name bars in the first subplot
            showlegend=(i == 0),  # Only show legend items in the first subplot
            marker=dict(color=colors[j % len(colors)])
        )
        
        # Add the bar chart to the subplot
        fig.add_trace(bar, row=i+1, col=1)

# Update the layout for better presentation
fig.update_layout(
    height=600 + 200 * len(levels),  # Adjust height based on the number of levels
    title_text="Condition Distribution Across Levels",
    showlegend=True,  # Show the legend
    legend_title_text='Condition',
    xaxis_title='Condition',
    yaxis_title='Number of Cases'
)

# Display the figure
fig.show()

In [None]:
# Export the DataFrame to a CSV file
final_merged_df.to_csv('train_processed.csv', index=False)
test_data.to_csv('test_processed.csv', index=False)

## Setup & Data Loading

In [None]:
# Install EfficientNet
!pip install -U efficientnet

import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc
import efficientnet.tfkeras as efn
import pydicom
import matplotlib.pyplot as plt
import os

## Visualize Samples

In [None]:
# Function to visualize samples
def visualize_samples(image_dir, filenames):
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    for i, file in enumerate(filenames):
        ds = pydicom.dcmread(os.path.join(image_dir, file))
        img = ds.pixel_array
        
        axes[i].imshow(img, cmap='gray')
        axes[i].axis('off')
        axes[i].set_title(f"Sample {i+1}")
    
    plt.show()

# Provide filenames of different types of images
sagittal_t1_sample = "/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/2092806862/4.dcm"
sagittal_t2_stir_sample = "/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/9.dcm"
axial_view_sample = "/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1012284084/55.dcm"

# Visualize the provided sample images
visualize_samples('images', [sagittal_t1_sample, sagittal_t2_stir_sample, axial_view_sample])

## Data Augmentation and Splitting

In [None]:
import pandas as pd
import numpy as np
import pydicom
import cv2
import os

# Load CSV with image paths and labels
df = pd.read_csv('/kaggle/working/train_processed.csv')

# Check the dataframe structure
print(df.head())

# Example of how to load a DICOM image
def load_dicom_image(filepath, target_size=(224, 224)):
    dicom = pydicom.dcmread(filepath)
    img = dicom.pixel_array
    img = cv2.resize(img, target_size)  # Resize to target size
    img = np.stack((img,)*3, axis=-1)  # Convert grayscale to 3-channel image
    return img / 255.0  # Normalize pixel values

# Check if a DICOM file loads correctly
test_image = load_dicom_image(df['image_path'].iloc[0])
print(test_image.shape)  # Should be (224, 224, 3)

## Data Generator for Loading DICOM Images

In [None]:
from tensorflow.keras.utils import Sequence
from sklearn.preprocessing import LabelEncoder

# Encode condition labels to categorical values
label_encoder = LabelEncoder()
df['condition_encoded'] = label_encoder.fit_transform(df['condition'])

# Custom data generator to load DICOM images in batches
class DICOMDataGenerator(Sequence):
    def __init__(self, dataframe, batch_size, target_size=(224, 224), shuffle=True):
        self.dataframe = dataframe
        self.batch_size = batch_size
        self.target_size = target_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.dataframe) / self.batch_size))

    def __getitem__(self, index):
        batch_data = self.dataframe.iloc[index * self.batch_size:(index + 1) * self.batch_size]
        images = np.array([load_dicom_image(filepath, self.target_size) for filepath in batch_data['image_path']])
        
        # One-hot encode labels ensuring 5 output classes
        labels = np.zeros((len(batch_data), 5))  # Initialize array for 5 classes
        batch_labels = pd.get_dummies(batch_data['condition_encoded']).values
        labels[:, :batch_labels.shape[1]] = batch_labels  # Fill with available batch data
        
        return images, labels

    def on_epoch_end(self):
        if self.shuffle:
            self.dataframe = self.dataframe.sample(frac=1).reset_index(drop=True)

## Splitting Data into Training and Validation Sets

In [None]:
# Split data into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['condition_encoded'])

# Create the generators
train_generator = DICOMDataGenerator(train_df, batch_size=16, target_size=(224, 224))
val_generator = DICOMDataGenerator(val_df, batch_size=16, target_size=(224, 224))

## Data Generators for Efficient Memory Usage

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Assuming you already have data loaded (e.g., 'data' for tabular and 'image_paths' for images)
# Example: Split the dataset into training and testing sets (image and tabular data)
train_data, test_data = train_test_split(data, test_size=0.2, stratify=data['severity_encoded'], random_state=42)

# Load images from file paths in your train_data and test_data (assuming image paths are stored in 'image_paths' column)
def load_images(image_paths, target_size=(224, 224)):
    images = []
    for path in image_paths:
        img = tf.keras.preprocessing.image.load_img(path, target_size=target_size)
        img = tf.keras.preprocessing.image.img_to_array(img) / 255.0  # Normalize pixel values
        images.append(img)
    return np.array(images)

train_images = load_images(train_data['image_paths'])
test_images = load_images(test_data['image_paths'])

# Preprocess tabular data (assuming 'x' and 'y' are tabular features in your data)
scaler = StandardScaler()
train_tabular = scaler.fit_transform(train_data[['x', 'y']])
test_tabular = scaler.transform(test_data[['x', 'y']])

# Labels (assuming 'severity_encoded' is your target column)
train_labels = train_data['severity_encoded'].values
test_labels = test_data['severity_encoded'].values

# Now you can create the data generators using the preprocessed images, tabular data, and labels
train_gen = CombinedDataGenerator(train_images, train_tabular, train_labels, batch_size=8)
val_gen = CombinedDataGenerator(test_images, test_tabular, test_labels, batch_size=8)

## Model Setup with EfficientNetB0

In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, Concatenate, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# --------------------- Image Branch using EfficientNetB0 ---------------------

# Load the EfficientNetB0 model (pre-trained) without the top layer
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add custom layers on top of EfficientNetB0 for image data
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling
x = Dense(256, activation='relu')(x)  # Custom fully connected layer

# Freeze the base model's pre-trained layers
for layer in base_model.layers:
    layer.trainable = False

# --------------------- Tabular Data Branch ---------------------

# Define the input shape for tabular data (adjust based on your actual tabular input size)
tabular_input = Input(shape=(train_tabular.shape[1],))

# Define a simple MLP (Multilayer Perceptron) for tabular data
y = Dense(16, activation='relu')(tabular_input)  # Dense layer for tabular data
y = Dropout(0.5)(y)  # Dropout layer to prevent overfitting

# --------------------- Combine the Branches ---------------------

# Concatenate image branch (x) and tabular branch (y)
combined = Concatenate()([x, y])

# Add more dense layers on top of the combined data
z = Dense(128, activation='relu')(combined)  # Fully connected layer
z = Dense(64, activation='relu')(z)

# Final output layer for classification (5 classes for spinal degeneration)
predictions = Dense(5, activation='softmax')(z)  # Output layer for 5 unique conditions

# --------------------- Model Definition ---------------------

# Create the final model that takes both image and tabular inputs
model = Model(inputs=[base_model.input, tabular_input], outputs=predictions)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# --------------------- Model Summary ---------------------
# Show the model architecture
model.summary()

## Checking GPU Availability

In [None]:
import tensorflow as tf

# Check GPU availability
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
tf.config.list_physical_devices('GPU')

# Enable memory growth for GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Memory growth enabled")
    except RuntimeError as e:
        print(e)

## Mixed Precision Training

In [None]:
# Enable mixed precision
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

## Image Preprocessing

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load dataset
file_path = '/kaggle/working/train_processed.csv'
data = pd.read_csv(file_path)

# Encode severity labels
data['severity_encoded'] = data['severity'].map({
    'Normal/Mild': 0,
    'Moderate': 1,
    'Severe': 2
})

# Scale the 'x' and 'y' coordinates using StandardScaler
scaler = StandardScaler()
data[['x', 'y']] = scaler.fit_transform(data[['x', 'y']])

# Split the data into train and test sets
train_data, test_data = train_test_split(data, test_size=0.2, stratify=data['severity_encoded'], random_state=42)

# Display the first few rows of the train set
train_data.head()

## Load DICOM Images for Training & Testing

In [None]:
IMG_SIZE = (224, 224)

def load_and_preprocess_dicom(image_path):
    try:
        dicom = pydicom.dcmread(image_path)
        img = dicom.pixel_array
        img = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX)
        if len(img.shape) == 2:
            img = np.stack([img] * 3, axis=-1)
        img = cv2.resize(img, IMG_SIZE)
        img = img / 255.0
        return img
    except Exception as e:
        print(f"Error loading DICOM image: {image_path}, Error: {e}")
        return None

# Prepare training and test images
train_images = []
for path in train_data['image_path']:
    img = load_and_preprocess_dicom(path)
    if img is not None:
        train_images.append(img)

test_images = []
for path in test_data['image_path']:
    img = load_and_preprocess_dicom(path)
    if img is not None:
        test_images.append(img)

train_images = np.array(train_images)
test_images = np.array(test_images)

train_tabular = train_data[['x', 'y']].values
test_tabular = test_data[['x', 'y']].values

train_labels = train_data['severity_encoded'].values
test_labels = test_data['severity_encoded'].values

## Model Setup for Combined Image and Tabular Data

In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers, models

efficientnet_input = layers.Input(shape=(224, 224, 3))
efficientnet = EfficientNetB0(include_top=False, input_tensor=efficientnet_input, weights='imagenet')
efficientnet.trainable = False

x = layers.GlobalAveragePooling2D()(efficientnet.output)

tabular_input = layers.Input(shape=(2,))
y = layers.Dense(128, activation='relu')(tabular_input)
y = layers.Dense(64, activation='relu')(y)

combined = layers.Concatenate()([x, y])
output = layers.Dense(3, activation='softmax')(combined)

model = models.Model(inputs=[efficientnet_input, tabular_input], outputs=output)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

## Early Stopping & Training the Model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

# EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model with data generators
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=20,
    callbacks=[early_stopping], 
    verbose=2
)

# Evaluate the model
test_loss, test_acc = model.evaluate(val_gen)
print(f"Test accuracy: {test_acc}")