In [10]:
# dataset link: https://www.kaggle.com/code/namgalielei/ultralytics
!pip install -q --no-index --find-links /kaggle/input/ultralytics ultralytics

In [11]:
import os
import pydicom
from PIL import Image
import numpy as np
from multiprocessing import Pool, cpu_count

import sklearn.metrics
import torch
import cv2
import numpy as np 
import pandas as pd 
from tqdm.auto import tqdm

In [12]:
IMG_DIR = '/images'
SEVERITIES = ['Normal/Mild', 'Moderate', 'Severe']
LEVELS = ['l1_l2', 'l2_l3', 'l3_l4', 'l4_l5', 'l5_s1']

SCS_WEIGHTS = [
    '/kaggle/input/yolo-model/scs_fold0_best.pt',
    '/kaggle/input/yolo-model/scs_fold2_best.pt',
    '/kaggle/input/yolo-model/scs_fold3_best.pt',
    ]
NFN_WEIGHTS = [
    '/kaggle/input/yolo-model/nfn_fold0_best.pt',
    '/kaggle/input/yolo-model/nfn_fold2_best.pt',
    ]
SS_WEIGHTS =  [
    '/kaggle/input/yolo-model/ss_fold0_best.pt',
    '/kaggle/input/yolo-model/ss_fold4_best.pt',
    ]

In [13]:
train_val_df = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train.csv')
des = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_series_descriptions.csv')

In [14]:
def read_dcm(src_path):
    dicom_data = pydicom.dcmread(src_path)
    image = dicom_data.pixel_array
    image = (image - image.min()) / (image.max() - image.min() +1e-6) * 255
    return image

def convert_dcm_to_jpg(file_path):
    try:
        # Read the DICOM file
        image_array = read_dcm(file_path)
        
        # Define the output path
        relative_path = os.path.relpath(file_path, start=input_directory)
        output_path = os.path.join(output_directory, relative_path)
        output_path = output_path.replace('.dcm', '.jpg')
                
        # Create the output directory if it doesn't exist
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        
        # Save the image as a JPEG file
        cv2.imwrite(output_path, image_array)
        
        return output_path
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

def process_files(dcm_files):
    with Pool(cpu_count()) as pool:
        # Wrap pool.map with tqdm to show the progress bar
        list(tqdm(pool.imap(convert_dcm_to_jpg, dcm_files), total=len(dcm_files)))

def get_dcm_files(directory):
    dcm_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.dcm'):
                dcm_files.append(os.path.join(root, file))
    return dcm_files    

In [None]:
input_directory = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images'

output_directory = IMG_DIR

# Get all .dcm files in the input directory
dcm_files = get_dcm_files(input_directory)

# Process the files using multiprocessing
process_files(dcm_files)

print(f"Conversion completed. Images saved to {output_directory}")

In [16]:
test_df = os.listdir('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images')
test_df = pd.DataFrame(test_df, columns=['study_id'])
test_df['study_id'] = test_df['study_id'].astype(int)

test_df = test_df.merge(des, on=['study_id'])

In [17]:
def gen_label_map(CONDITIONS):
    label2id = {}
    id2label = {}
    i = 0
    for cond in CONDITIONS:
        for level in LEVELS:
            for severity in SEVERITIES:
                cls_ = f"{cond.lower().replace(' ', '_')}_{level}_{severity.lower()}"
                label2id[cls_] = i
                id2label[i] = cls_
                i+=1
    return label2id, id2label
                
scs_label2id, scs_id2label = gen_label_map(['Spinal Canal Stenosis'])
ss_label2id, ss_id2label = gen_label_map(['Left Subarticular Stenosis', 'Right Subarticular Stenosis'])
nfn_label2id, nfn_id2label = gen_label_map(['Left Neural Foraminal Narrowing', 'Right Neural Foraminal Narrowing'])

In [18]:
from ultralytics import YOLO

# Load YOLO Model
scs_models = []
for weight in SCS_WEIGHTS:
    scs_models.append(YOLO(weight))
    
ss_models = []
for weight in SS_WEIGHTS:
    ss_models.append(YOLO(weight))
    
nfn_models = []
for weight in NFN_WEIGHTS:
    nfn_models.append(YOLO(weight))

In [19]:
all_label_set = train_val_df.iloc[0, 1:].index.tolist()
scs_label_set = all_label_set[:5]
nfn_label_set = all_label_set[5:15]
ss_label_set = all_label_set[15:]

In [None]:
from collections import defaultdict
import glob

def infer(modality, models, id2label, label_set, thresh, series_range):
    pred_rows = []
    mod_df = test_df[test_df.series_description == modality]
    
    for study_id, group in tqdm(mod_df.groupby('study_id')):
        predictions = defaultdict(list)
        for i, row in group.iterrows():
            # predict on all images from all the series
            series_dir = os.path.join(IMG_DIR, str(row['study_id']), str(row['series_id']))
            jpgs_path = sorted(glob.glob(f"{series_dir}/*.jpg"), key=lambda x: int(x.split('/')[-1].split('.')[0]))
            jpgs_path = jpgs_path[int(len(jpgs_path) * series_range[0]):int(len(jpgs_path) * series_range[1])]
            for model in models:
                results = model(
                    jpgs_path, 
                    conf=thresh, 
                    verbose=False,
                    augment=True,
                )
                for res in results:
                    for pred_class, conf in zip(res.boxes.cls, res.boxes.conf):
                        pred_class = pred_class.item()
                        conf = conf.item()
                        _class = id2label[pred_class]
                        predictions[_class].append(conf)
        
        # 遍历 study-level 级别的所有标签
        for condition in label_set:
            res_dict = {'row_id': f'{study_id}_{condition}' }

            score_vec = []
            for severity in SEVERITIES:
                severity = severity.lower()
                key = f'{condition}_{severity}'
                if len(predictions[key]) > 0:
                    score = np.max(predictions[key])
                else:
                    score = thresh
                score_vec.append(score)
                
            # normalize score to sum to 1
            score_vec = torch.tensor(score_vec)
            score_vec = score_vec / score_vec.sum()

            for idx, severity in enumerate(SEVERITIES):
                res_dict[severity.replace('/', '_').lower()] = score_vec[idx].item()

            pred_rows.append(res_dict)

    pred_df = pd.DataFrame(pred_rows)
    pred_df = pred_df.sort_values(by=['row_id']).reset_index(drop=True)
    return pred_df

scs_pred_df = infer('Sagittal T2/STIR', scs_models, scs_id2label, scs_label_set, 0.01, (0.05, 0.95))
nfn_pred_df = infer('Sagittal T1',      nfn_models, nfn_id2label, nfn_label_set, 0.01, (0.00, 1.00))
ss_pred_df  = infer('Axial T2',         ss_models,  ss_id2label,  ss_label_set,  0.01, (0.00, 1.00))

In [None]:
print(f"{scs_pred_df.shape = }")
print(f"{nfn_pred_df.shape = }")
print(f"{ss_pred_df.shape = }")

In [None]:
submission_df = pd.concat([scs_pred_df, nfn_pred_df, ss_pred_df], axis=0).reset_index(drop=True)
submission_df.to_csv('/kaggle/working/submission.csv', index=False)

submission_df