In [1]:
import cv2
import plotly.express as px
import seaborn as sns
import os
import pandas as pd
import numpy as np
import pydicom
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense, Input
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm
import pydicom as dicom
from sklearn.model_selection import train_test_split

AttributeError: partially initialized module 'cv2' has no attribute 'gapi_wip_gst_GStreamerPipeline' (most likely due to a circular import)

In [7]:
# read data
df_train_main = pd.read_csv(r'C:\Users\shaif\Downloads\Compressed\rsna-2024-lumbar-spine-degenerative-classification/train.csv')
df_train_label = pd.read_csv(r'C:\Users\shaif\Downloads\Compressed\rsna-2024-lumbar-spine-degenerative-classification/train_label_coordinates.csv')
df_train_desc = pd.read_csv(r'C:\Users\shaif\Downloads\Compressed\rsna-2024-lumbar-spine-degenerative-classification/train_series_descriptions.csv')
df_test_desc = pd.read_csv(r'C:\Users\shaif\Downloads\Compressed\rsna-2024-lumbar-spine-degenerative-classification/test_series_descriptions.csv')
df_sub = pd.read_csv(r'C:\Users\shaif\Downloads\Compressed\rsna-2024-lumbar-spine-degenerative-classification/sample_submission.csv')

In [11]:


# Function to load and preprocess test images
def load_and_preprocess_images(image_dir, unique_id, target_size=(32, 32), images_per_batch=5):
    images = []
    subfolders = [f.path for f in os.scandir(os.path.join(image_dir, unique_id)) if f.is_dir()]

    for folder in subfolders:
        files = [f for f in os.listdir(folder) if f.endswith('.dcm')]
        for file in files:
            image_path = os.path.join(folder, file)
            try:
                # Load DICOM image
                dicom = pydicom.dcmread(image_path)
                image = dicom.pixel_array

                # Convert image to uint8 if necessary
                if image.dtype != np.uint8:
                    image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
                image = cv2.resize(image, target_size)
                image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)  # Convert to RGB

                images.append(image)
            except Exception as e:
                print(f"Error loading image {image_path}: {e}")

    # If fewer than required images, use padding
    if len(images) < images_per_batch:
        padding = [np.zeros((target_size[0], target_size[1], 3), dtype=np.float32)] * (images_per_batch - len(images))
        images.extend(padding)
    else:
        images = sorted(images, key=lambda x: np.random.random())[:images_per_batch]

    images = np.array(images).astype('float32') / 255.0
    return images

# Load all data and preprocess
def load_data(df, image_dir, target_size=(32, 32), images_per_batch=5):
    x_data = []
    y_data = []
    label_mapping = {
        "Normal/Mild": [0.6, 0.2, 0.2],
        "Moderate": [0.2, 0.6, 0.2],
        "Severe": [0.2, 0.2, 0.6]
    }

    for _, row in tqdm(df.iterrows(), total=len(df)):
        study_id = row['study_id']
        images = load_and_preprocess_images(image_dir, str(study_id), target_size, images_per_batch)
        x_data.append(images)

        labels = {}
        for condition in df.columns[1:]:
            original_label = row[condition]
            if not pd.isna(original_label):
                labels[condition] = label_mapping.get(original_label, None)

        y_data.append(labels)

    return np.array(x_data), y_data

# Define the function to train the model for a specific condition
def train_condition_model(condition_name, x_train, y_train, image_size=(32, 32), epochs=2, batch_size=10):
    # Filter the labels based on the condition and ensure both x_train and y_filtered are of the same length
    filtered_data = [(x, labels[condition_name]) for x, labels in zip(x_train, y_train) if condition_name in labels]
    x_filtered, y_filtered = zip(*filtered_data)

    # Convert to numpy arrays
    x_filtered = np.array(x_filtered)
    y_filtered = np.array(y_filtered)

    # Define the CNN-LSTM model with smaller filter sizes and LSTM units
    def create_model(input_shape):
        model = Sequential()
        model.add(Input(shape=input_shape))
        model.add(TimeDistributed(Conv2D(4, (3, 3), activation='relu')))
        model.add(TimeDistributed(Conv2D(4, (3, 3), activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))
        model.add(TimeDistributed(Conv2D(8, (3, 3), activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))
        model.add(TimeDistributed(Flatten()))
        model.add(LSTM(50))
        model.add(Dense(3, activation='softmax'))

        model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    # Prepare the training and validation data
    split_index = int(0.9 * len(x_filtered))
    x_train_split, x_val_split = x_filtered[:split_index], x_filtered[split_index:]
    y_train_split, y_val_split = y_filtered[:split_index], y_filtered[split_index:]

    # Create the model
    input_shape = (5, image_size[0], image_size[1], 3)
    model = create_model(input_shape)

    # Train the model
    model.fit(x_train_split, y_train_split, validation_data=(x_val_split, y_val_split), epochs=epochs, batch_size=batch_size)

    return model

# Load the DataFrame
df_train_main = pd.read_csv(r'C:\Users\shaif\Downloads\Compressed\rsna-2024-lumbar-spine-degenerative-classification/train.csv')
image_dir = r'C:\Users\shaif\Downloads\Compressed\rsna-2024-lumbar-spine-degenerative-classification\train_images'

# Load the entire dataset
x_train, y_train = load_data(df_train_main, image_dir, images_per_batch=5)
print("Data Loaded")

# Example usage
condition_name = 'spinal_canal_stenosis_l1_l2'
model = train_condition_model(condition_name, x_train, y_train, image_size=(32, 32), epochs=1, batch_size=10)


AttributeError: partially initialized module 'cv2' has no attribute 'gapi_wip_gst_GStreamerPipeline' (most likely due to a circular import)