In [1]:
!pip install -q --no-index --find-links /kaggle/input/ultralyics ultralytics 

In [2]:
import os
import pydicom
from PIL import Image
import numpy as np
from multiprocessing import Pool, cpu_count

import sklearn.metrics
import torch
import cv2
import numpy as np 
import pandas as pd 
from tqdm.auto import tqdm

In [3]:
EVAL = False # Change to True to compute the validation score
IMG_DIR = '/images'
FOLD = 0
SAMPLE = False # True for quick debugging
SEVERITIES = ['Normal/Mild', 'Moderate', 'Severe']
LEVELS = ['l1_l2', 'l2_l3', 'l3_l4', 'l4_l5', 'l5_s1']

SCS_WEIGHTS = ['/kaggle/input/yolotrain_scs/pytorch/v1/1/best_scs.pt']

SS_WEIGHTS = ['/kaggle/input/yolo_train_ss/pytorch/v1/1/best_ss.pt',
             '/kaggle/input/lsdc-yolo-ssv3/best.pt']

NFN_WEIGHTS = ['/kaggle/input/yolo_train_nf/pytorch/v1/1/best_nf.pt',
              '/kaggle/input/lsdc-yolo-nfnv3/best.pt']

In [4]:
if EVAL:
    import sys
    sys.path.append('/kaggle/input/lsdc-utils')
    from metrics import score as lsdc_scoring

In [5]:
train_val_df = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train.csv')

In [6]:
if EVAL:
    train_xy = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_label_coordinates.csv')
    des = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_series_descriptions.csv')
else:    
    des = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_series_descriptions.csv')

In [7]:
def read_dcm(src_path):
    dicom_data = pydicom.dcmread(src_path)
    image = dicom_data.pixel_array
    image = (image - image.min()) / (image.max() - image.min() +1e-6) * 255
    return image

def convert_dcm_to_jpg(file_path):
    try:
        # Read the DICOM file
        image_array = read_dcm(file_path)
        
        # Define the output path
        relative_path = os.path.relpath(file_path, start=input_directory)
        output_path = os.path.join(output_directory, relative_path)
        output_path = output_path.replace('.dcm', '.jpg')
                
        # Create the output directory if it doesn't exist
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        
        # Save the image as a JPEG file
        cv2.imwrite(output_path, image_array)
        
        return output_path
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

def process_files(dcm_files):
    with Pool(cpu_count()) as pool:
        # Wrap pool.map with tqdm to show the progress bar
        list(tqdm(pool.imap(convert_dcm_to_jpg, dcm_files), total=len(dcm_files)))

def get_dcm_files(directory):
    dcm_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.dcm'):
                dcm_files.append(os.path.join(root, file))
    return dcm_files 

In [8]:
# Replace these with your input and output directories
if not EVAL:
    input_directory = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images'

    output_directory = IMG_DIR

    # Get all .dcm files in the input directory
    dcm_files = get_dcm_files(input_directory)

    # Process the files using multiprocessing
    process_files(dcm_files)

    print(f"Conversion completed. Images saved to {output_directory}")
else:
    if not os.path.exists(IMG_DIR):
        print('Unziping data..')
        !unzip -q -d / /kaggle/input/lsdc-get-all-images/images.zip
        print('Done unziping data')

  0%|          | 0/97 [00:00<?, ?it/s]

Conversion completed. Images saved to /images


In [9]:
if EVAL:
    fold_df = pd.read_csv('/kaggle/input/5folds/5folds.csv')
    test_df = fold_df[fold_df.fold == FOLD]
    
else:
    test_df = os.listdir('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images')
    test_df = pd.DataFrame(test_df, columns=['study_id'])
    test_df['study_id'] = test_df['study_id'].astype(int)
    
test_df = test_df.merge(des, on=['study_id'])

In [10]:
def gen_label_map(CONDITIONS):
    label2id = {}
    id2label = {}
    i = 0
    for cond in CONDITIONS:
        for level in LEVELS:
            for severity in SEVERITIES:
                cls_ = f"{cond.lower().replace(' ', '_')}_{level}_{severity.lower()}"
                label2id[cls_] = i
                id2label[i] = cls_
                i+=1
    return label2id, id2label
                
scs_label2id, scs_id2label = gen_label_map(['Spinal Canal Stenosis'])
ss_label2id, ss_id2label = gen_label_map(['Left Subarticular Stenosis', 'Right Subarticular Stenosis'])
nfn_label2id, nfn_id2label = gen_label_map(['Left Neural Foraminal Narrowing', 'Right Neural Foraminal Narrowing'])

In [11]:
from ultralytics import YOLO

# Load YOLO Model
scs_models = []
for weight in SCS_WEIGHTS:
    scs_models.append(YOLO(weight))
    
ss_models = []
for weight in SS_WEIGHTS:
    ss_models.append(YOLO(weight))
    
nfn_models = []
for weight in NFN_WEIGHTS:
    nfn_models.append(YOLO(weight))

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [12]:
all_label_set = train_val_df.iloc[0, 1:].index.tolist()
scs_label_set = all_label_set[:5]
nfn_label_set = all_label_set[5:15]
ss_label_set = all_label_set[15:]

In [13]:
settings = [
    ( 'Sagittal T2/STIR', scs_models, scs_id2label, scs_label_set, 0.01),
    ( 'Axial T2', ss_models, ss_id2label, ss_label_set, 0.01),
    ( 'Sagittal T1', nfn_models, nfn_id2label, nfn_label_set, 0.1)
]

In [14]:
from collections import defaultdict

In [15]:
pred_rows = []

for modality, models, id2label, label_set, thresh in settings:
    mod_df = test_df[test_df.series_description == modality]
    
    if SAMPLE:
        mod_df = mod_df.sample(20, random_state=610)
    
    # for each study, at each level and condition, get the maximum probability score
    for study_id, group in tqdm(mod_df.groupby('study_id')):
        predictions = defaultdict(list)
        for i, row in group.iterrows():
            # predict on all images from all the series
            series_dir = os.path.join(IMG_DIR, str(row['study_id']), str(row['series_id']))
            for model in models:
                results = model(series_dir, conf=thresh, verbose=False)
                for res in results:
                    for pred_class, conf in zip(res.boxes.cls, res.boxes.conf):
                        pred_class = pred_class.item()
                        conf = conf.item()
                        _class = id2label[pred_class]
                        predictions[_class].append(conf)
        
        # aggregate the result on images to obtain study-level prediction
        for condition in label_set:
            res_dict = {'row_id': f'{study_id}_{condition}' }

            score_vec = []
            for severity in SEVERITIES:
                severity = severity.lower()
                key = f'{condition}_{severity}'
                if len(predictions[key]) > 0:
                    score = np.max(predictions[key])
                else:
                    score = thresh
                score_vec.append(score)
                
            # normalize score to sum to 1
            score_vec = torch.tensor(score_vec)
            score_vec = score_vec / score_vec.sum()

            for idx, severity in enumerate(SEVERITIES):
                res_dict[severity.replace('/', '_').lower()] = score_vec[idx].item()

            pred_rows.append(res_dict)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

In [16]:
pred_df = pd.DataFrame(pred_rows)
pred_df

Unnamed: 0,row_id,normal_mild,moderate,severe
0,44036939_spinal_canal_stenosis_l1_l2,0.43686,0.553363,0.009777
1,44036939_spinal_canal_stenosis_l2_l3,0.0722,0.51403,0.41377
2,44036939_spinal_canal_stenosis_l3_l4,0.106059,0.473474,0.420467
3,44036939_spinal_canal_stenosis_l4_l5,0.77178,0.174966,0.053253
4,44036939_spinal_canal_stenosis_l5_s1,0.436761,0.454776,0.108463
5,44036939_left_subarticular_stenosis_l1_l2,0.564523,0.416813,0.018664
6,44036939_left_subarticular_stenosis_l2_l3,0.218913,0.271122,0.509965
7,44036939_left_subarticular_stenosis_l3_l4,0.245754,0.151069,0.603177
8,44036939_left_subarticular_stenosis_l4_l5,0.236753,0.309356,0.453891
9,44036939_left_subarticular_stenosis_l5_s1,0.285944,0.51407,0.199986


In [17]:
pred_df.to_csv('/kaggle/working/submission.csv', index=False)

In [18]:
def sample_weight(row):
    if row['normal_mild'] == 1:
        return 1
    if row['moderate'] == 1:
        return 2
    if row['severe'] == 1:
        return 4
    raise ValueError('No such value')
    
def get_class(row):
    return np.argmax([row['normal_mild'], row['moderate'], row['severe']])

In [19]:
if EVAL:
    gt_df = train_val_df.dropna().melt(id_vars=['study_id'], value_vars=all_label_set)
    gt_df['row_id'] = gt_df['study_id'].astype(str) + '_' + gt_df['variable']
    gt_df= gt_df[['row_id', 'value']]
    gt_df = pd.get_dummies(gt_df, columns=['value'], dtype=int)
    gt_df.columns = ['row_id', 'moderate', 'normal_mild', 'severe']
    gt_df = gt_df[['row_id', 'normal_mild', 'moderate', 'severe']]
    gt_df['sample_weight'] = gt_df.apply(sample_weight, axis=1)

    gt_df1 = gt_df.merge(pred_df['row_id'], how='inner', on='row_id').sort_values('row_id').reset_index(drop=True)
    pred_df1 = pred_df.merge(gt_df1['row_id'], how='inner', on='row_id').sort_values('row_id').reset_index(drop=True)
    gt_df1['pred_cls'] = gt_df1.apply(get_class, axis=1)
    pred_df1['pred_cls'] = pred_df1.apply(get_class, axis=1)

    gt_df1[(gt_df1['pred_cls'] != pred_df1['pred_cls'])]
    pred_df1[(gt_df1['pred_cls'] != pred_df1['pred_cls'])]
    print('Label count:\n', gt_df1['pred_cls'].value_counts(normalize=True))
    print('Prediction accuracy:', (gt_df1['pred_cls'] == pred_df1['pred_cls']).mean())
    print()

    target_levels = ['normal_mild', 'moderate', 'severe']
    loss = lsdc_scoring(gt_df1.drop(['pred_cls'], axis=1), pred_df1.drop(['pred_cls'], axis=1), row_id_column_name='row_id', any_severe_scalar=1)
    print('Total weighted log loss:', loss)