In [2]:
import numpy as np
import pandas as pd
import pydicom
from glob import glob
from fastai.vision.all import *
from sklearn.model_selection import train_test_split


In [None]:
# Load study slices and resample to a fixed number (e.g., 30)
def load_study_slices(study_id, target_slices=30):
    # Load the DICOM files for the given study
    study_folder = f'rsna-2024-lumbar-spine-degenerative-classification/train_images/{study_id}'
    dicom_files = sorted(glob(f'{study_folder}/**/*.dcm', recursive=True))
    
    # Load images and convert them to numpy arrays
    slices = [pydicom.dcmread(dcm_file).pixel_array for dcm_file in dicom_files]
    
    # Normalize the slices to the target number of slices
    if len(slices) < target_slices:
        # If fewer than target, repeat slices
        indices = np.linspace(0, len(slices)-1, target_slices, dtype=int)
        slices = [slices[i] for i in indices]
    elif len(slices) > target_slices:
        # If more than target, sample evenly
        indices = np.linspace(0, len(slices)-1, target_slices, dtype=int)
        slices = [slices[i] for i in indices]
    
    # Stack slices into a 3D array (depth, height, width)
    slices_stacked = np.stack(slices, axis=0)
    return slices_stacked

In [None]:
# Function to split data by study_id
def study_based_splitter(df, valid_pct=0.2):
    # Get unique study IDs
    study_ids = df['study_id'].unique()
    
    # Randomly shuffle and split the study IDs
    n_valid = int(len(study_ids) * valid_pct)
    np.random.shuffle(study_ids)
    
    # Define train and validation splits
    valid_study_ids = study_ids[:n_valid]
    train_study_ids = study_ids[n_valid:]
    
    # Create train and valid masks
    train_idx = df['study_id'].isin(train_study_ids)
    valid_idx = df['study_id'].isin(valid_study_ids)
    
    return np.where(train_idx)[0], np.where(valid_idx)[0]

In [None]:
# Load the CSV file with the study information
df = pd.read_csv('path_to_csv/train.csv')  # Make sure to provide the correct path to your CSV file

In [None]:
# Define the DataBlock with a study-based split
spine_block = DataBlock(
    blocks=(ImageBlock(cls=PILImageBW), MultiCategoryBlock),
    get_x=lambda row: load_study_slices(row['study_id'], target_slices=30),  # Fixed to 30 slices per study
    get_y=ColReader(['normal_mild', 'moderate', 'severe']),  # Adjust the columns to your labels
    splitter=IndexSplitter(study_based_splitter(df)),
    item_tfms=Resize(224),  # Resize each slice to 224x224
    batch_tfms=aug_transforms(flip_vert=True)
)



In [None]:
# Dataloader
dls = spine_block.dataloaders(df, bs=4)  # Adjust batch size (bs) as needed

# Define a simple 3D ResNet model for this task (e.g., MedicalNet or 3D ResNet)
def get_pretrained_resnet3d(num_classes):
    # Placeholder for 3D ResNet loading, assuming you have a model with pretrained weights
    # You can use a model like MedicalNet's ResNet (or any custom 3D CNN) and adjust accordingly
    # Return a model with the number of output classes for multi-label classification
    pass

model = get_pretrained_resnet3d(num_classes=3)

# FastAI Learner
learn = Learner(dls, model, metrics=accuracy_multi)

# Fine-tune the pre-trained model
learn.fine_tune(3, base_lr=1e-3)
