In [162]:
%pip install --quiet/kaggle/input/timm_3d_deps/other/initial/ 3 /pydicom/pydicom/pydicom-2.4.4-py3-none-any.whl
%pip install timm_3d --no-index --quiet --find-links= /kaggle/input/timm_3d_deps/other/initial/ 3 /timm_3d/timm_3d/
%pip install torchio --no-index --quiet --find-links= /kaggle/input/timm_3d_deps/other/initial/ 3 /torchio/torchio/
%pip install itk --no-index --quiet --find-links= /kaggle/input/timm_3d_deps/other/initial/ 3 /itk/itk itk


Usage:   
  /opt/homebrew/Caskroom/miniconda/base/envs/pythonProject/bin/python -m pip install [options] <requirement specifier> [package-index-options] ...
  /opt/homebrew/Caskroom/miniconda/base/envs/pythonProject/bin/python -m pip install [options] -r <requirements file> [package-index-options] ...
  /opt/homebrew/Caskroom/miniconda/base/envs/pythonProject/bin/python -m pip install [options] [-e] <vcs project url> ...
  /opt/homebrew/Caskroom/miniconda/base/envs/pythonProject/bin/python -m pip install [options] [-e] <local project path> ...
  /opt/homebrew/Caskroom/miniconda/base/envs/pythonProject/bin/python -m pip install [options] <archive url/path> ...

no such option: --quiet/kaggle/input/timm_3d_deps/other/initial/
Note: you may need to restart the kernel to use updated packages.
[31mERROR: Invalid requirement: '/kaggle/input/timm_3d_deps/other/initial/'
Hint: It looks like a path. File '/kaggle/input/timm_3d_deps/other/initial/' does not exist.[0m[31m
[0mNote:

In [163]:
data_path = "/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/"

In [164]:
from rsna_dataloader import *
import torchio as tio

DATA_BASEPATH = "../data/rsna-2024-lumbar-spine-degenerative-classification/"
TRAINING_DATA = retrieve_coordinate_training_data(DATA_BASEPATH)

transform_3d_val = tio.Compose([
    tio.Resize((128, 128, 128), image_interpolation="bspline"),
    tio.RescaleIntensity(out_min_max=(0, 1)),
])

(trainloader, valloader, test_loader,
 trainset, valset, testset) = create_subject_level_datasets_and_loaders(TRAINING_DATA,
                                                                        transform_3d_train=transform_3d_val,
                                                                        transform_3d_val=transform_3d_val,
                                                                        base_path=os.path.join(
                                                                            DATA_BASEPATH,
                                                                            "train_images"),
                                                                        num_workers=0,
                                                                        split_factor=0.3,
                                                                        batch_size=1,
                                                                        )

The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


In [165]:
TEST_DATA = TRAINING_DATA[TRAINING_DATA["study_id"].isin(testset.subjects["study_id"].values)]

In [166]:
def convert_train_data_to_solution(train_df):
    ret = train_df.loc[:,["row_id", "severity"]]
    ret["normal_mild"] = 0
    ret["moderate"] = 0
    ret["severe"] = 0
    ret["sample_weight"] = 1
    
    for index, row in ret.iterrows():
        ret.loc[index, row["severity"]] = 1
        if row["severity"] == "normal_mild":
            ret.loc[index, "sample_weight"] = 1
        elif row["severity"] == "moderate":
            ret.loc[index, "sample_weight"] = 2
        elif row["severity"] == "severe":
            ret.loc[index, "sample_weight"] = 3
        else:
            print(row["severity"])

            
    return ret[["row_id", "normal_mild", "moderate", "severe", "sample_weight"]]


test_solution = convert_train_data_to_solution(TEST_DATA)
test_solution[test_solution["severe"] == 1]

Unnamed: 0,row_id,normal_mild,moderate,severe,sample_weight
1934,169651375_spinal_canal_stenosis_l2_l3,0,0,1,3
1947,169651375_right_neural_foraminal_narrowing_l5_s1,0,0,1,3
1951,169651375_left_subarticular_stenosis_l4_l5,0,0,1,3
1955,169651375_right_subarticular_stenosis_l3_l4,0,0,1,3
3263,283265383_left_neural_foraminal_narrowing_l4_l5,0,0,1,3
...,...,...,...,...,...
48533,4279881930_left_subarticular_stenosis_l2_l3,0,0,1,3
48534,4279881930_left_subarticular_stenosis_l3_l4,0,0,1,3
48535,4279881930_left_subarticular_stenosis_l4_l5,0,0,1,3
48539,4279881930_right_subarticular_stenosis_l3_l4,0,0,1,3


In [167]:
import os


def retrieve_image_paths(base_path, study_id, series_id):
    series_dir = os.path.join(base_path, str(study_id), str(series_id))
    images = os.listdir(series_dir)
    image_paths = [os.path.join(series_dir, img) for img in images]
    return image_paths

In [168]:
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torchio as tio
import albumentations
import cv2
import pydicom
import itk

CONDITIONS = {
    "Sagittal T2/STIR": ["Spinal Canal Stenosis"],
    "Axial T2": ["Left Subarticular Stenosis", "Right Subarticular Stenosis"],
    "Sagittal T1": ["Left Neural Foraminal Narrowing", "Right Neural Foraminal Narrowing"],
}


def read_series_as_volume(dirName, verbose=False):
    PixelType = itk.ctype("signed short")
    Dimension = 3

    ImageType = itk.Image[PixelType, Dimension]

    namesGenerator = itk.GDCMSeriesFileNames.New()
    namesGenerator.SetUseSeriesDetails(True)
    namesGenerator.AddSeriesRestriction("0008|0021")
    namesGenerator.SetGlobalWarningDisplay(False)
    namesGenerator.SetDirectory(dirName)

    seriesUID = namesGenerator.GetSeriesUIDs()

    if verbose:
        if len(seriesUID) < 1:
            print("No DICOMs in: " + dirName)

        print("The directory: " + dirName)
        print("Contains the following DICOM Series: ")
        for uid in seriesUID:
            print(uid)

    reader = None
    dicomIO = None
    for i in range(10):
        for uid in seriesUID:
            seriesIdentifier = uid
            if verbose:
                print("Reading: " + seriesIdentifier)
            fileNames = namesGenerator.GetFileNames(seriesIdentifier)

            reader = itk.ImageSeriesReader[ImageType].New()
            dicomIO = itk.GDCMImageIO.New()
            reader.SetImageIO(dicomIO)
            reader.SetFileNames(fileNames)
            reader.ForceOrthogonalDirectionOff()
        if reader is not None:
            break

    if reader is None or dicomIO is None:
        raise FileNotFoundError(f"Empty path? {os.path.abspath(dirName)}")
    reader.Update()
    data = itk.GetArrayFromImage(reader.GetOutput())

    del namesGenerator
    del dicomIO
    del reader

    return data


class PatientLevelTestset(Dataset):
    def __init__(self,
                 base_path: str,
                 dataframe: pd.DataFrame,
                 transform_3d=None):
        self.base_path = base_path

        self.dataframe = (dataframe[['study_id', "series_id", "series_description"]]
                          .drop_duplicates())

        self.subjects = self.dataframe[['study_id']].drop_duplicates().reset_index(drop=True)

        self.transform_3d = transform_3d

    def __len__(self):
        return len(self.subjects)

    def __getitem__(self, index):
        curr = self.subjects.iloc[index]
        images_basepath = os.path.join(self.base_path, str(curr["study_id"]))
        images = []

        for series_desc in CONDITIONS.keys():
            # !TODO: Multiple matching series
            series = self.dataframe.loc[
                (self.dataframe["study_id"] == curr["study_id"]) &
                (self.dataframe["series_description"] == series_desc)].sort_values("series_id")['series_id'].iloc[0]

            series_path = os.path.join(images_basepath, str(series))
            series_images = read_series_as_volume(series_path)

            if self.transform_3d is not None:
                series_images = self.transform_3d(np.expand_dims(series_images, 0))  #.data

            images.append(torch.Tensor(series_images).squeeze(0))

        return torch.stack(images), curr["study_id"]

In [169]:
transform_3d = tio.Compose([
    tio.Resize((128, 128, 128), image_interpolation="bspline"),
    tio.RescaleIntensity(out_min_max=(0, 1)),
])


In [170]:
def create_subject_level_testset_and_loader(df: pd.DataFrame,
                                            transform_3d,
                                            base_path: str,
                                            batch_size=1,
                                            num_workers=0):
    testset = PatientLevelTestset(base_path, df, transform_3d=transform_3d)
    test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return testset, test_loader

In [171]:
import os

dataset, dataloader = create_subject_level_testset_and_loader(TEST_DATA, transform_3d,
                                                              os.path.join(DATA_BASEPATH, "train_images"))

In [172]:
import torch

device = torch.device("cuda") if torch.cuda.is_available() else "cpu"

In [173]:
import torch.nn as nn
import timm_3d


class CNN_Model_3D(nn.Module):
    def __init__(self, backbone="efficientnet_lite0", in_chans=3, out_classes=75, pretrained=True):
        super(CNN_Model_3D, self).__init__()
        self.out_classes = out_classes

        self.encoder = timm_3d.create_model(
            backbone,
            num_classes=out_classes,
            features_only=False,
            drop_rate=0,
            drop_path_rate=0,
            pretrained=pretrained,
            in_chans=in_chans,
        ).to(device)

    def forward(self, x):
        # return self.encoder(x).reshape((-1, self.out_classes, 3))
        return self.encoder(x)


In [174]:
model = torch.load(
    "/Users/victorsahin/Downloads/efficientnet_b4_128_3d_comp_oversampling_7.pt", map_location=torch.device('cpu')).to(
    device)

In [175]:
CONDITIONS = {
    "Sagittal T2/STIR": ["spinal_canal_stenosis"],
    "Axial T2": ["left_subarticular_stenosis", "right_subarticular_stenosis"],
    "Sagittal T1": ["left_neural_foraminal_narrowing", "right_neural_foraminal_narrowing"],
}

ALL_CONDITIONS = sorted(["spinal_canal_stenosis", "left_subarticular_stenosis", "right_subarticular_stenosis",
                         "left_neural_foraminal_narrowing", "right_neural_foraminal_narrowing"])
LEVELS = ["l1_l2", "l2_l3", "l3_l4", "l4_l5", "l5_s1"]

results_df = pd.DataFrame({"row_id": [], "normal_mild": [], "moderate": [], "severe": []})

ALL_CONDITIONS

['left_neural_foraminal_narrowing',
 'left_subarticular_stenosis',
 'right_neural_foraminal_narrowing',
 'right_subarticular_stenosis',
 'spinal_canal_stenosis']

In [176]:
# Pre-populate results df
import glob
import os

study_ids = TEST_DATA["study_id"].values
# study_ids = glob.glob("/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images/*")
# study_ids = [os.path.basename(e) for e in study_ids]

results_df = pd.DataFrame({"row_id": [], "normal_mild": [], "moderate": [], "severe": []})
for study_id in study_ids:
    for condition in ALL_CONDITIONS:
        for level in LEVELS:
            row_id = f"{study_id}_{condition}_{level}"
            results_df = results_df._append(
                {"row_id": row_id, "normal_mild": 1 / 3, "moderate": 1 / 3, "severe": 1 / 3}, ignore_index=True)

In [177]:
import numpy as np
import pandas as pd
import pandas.api.types
import sklearn.metrics


class ParticipantVisibleError(Exception):
    pass


def get_condition(full_location: str) -> str:
    # Given an input like spinal_canal_stenosis_l1_l2 extracts 'spinal'
    for injury_condition in ['spinal', 'foraminal', 'subarticular']:
        if injury_condition in full_location:
            return injury_condition
    raise ValueError(f'condition not found in {full_location}')


def score(
        solution: pd.DataFrame,
        submission: pd.DataFrame,
        row_id_column_name: str,
        any_severe_scalar: float
    ) -> float:
    '''
    Pseudocode:
    1. Calculate the sample weighted log loss for each medical condition:
    2. Derive a new any_severe label.
    3. Calculate the sample weighted log loss for the new any_severe label.
    4. Return the average of all of the label group log losses as the final score, normalized for the number of columns in each group.
       This mitigates the impact of spinal stenosis having only half as many columns as the other two conditions.
    '''

    target_levels = ['normal_mild', 'moderate', 'severe']

    # Run basic QC checks on the inputs
    if not pandas.api.types.is_numeric_dtype(submission[target_levels].values):
        raise ParticipantVisibleError('All submission values must be numeric')

    if not np.isfinite(submission[target_levels].values).all():
        raise ParticipantVisibleError('All submission values must be finite')

    if solution[target_levels].min().min() < 0:
        raise ParticipantVisibleError('All labels must be at least zero')
    if submission[target_levels].min().min() < 0:
        raise ParticipantVisibleError('All predictions must be at least zero')

    solution['study_id'] = solution['row_id'].apply(lambda x: x.split('_')[0])
    solution['location'] = solution['row_id'].apply(lambda x: '_'.join(x.split('_')[1:]))
    solution['condition'] = solution['row_id'].apply(get_condition)

    # del solution[row_id_column_name]
    # del submission[row_id_column_name]
    # assert sorted(submission.columns) == sorted(target_levels)
    
    submission = submission.sort_values(by="row_id")
    solution = solution.sort_values(by="row_id")

    submission['study_id'] = solution['study_id']
    submission['location'] = solution['location']
    submission['condition'] = solution['condition']

    condition_losses = []
    condition_weights = []
    for condition in ['spinal', 'foraminal', 'subarticular']:
        condition_indices = solution.loc[solution['condition'] == condition].index.values
        condition_loss = sklearn.metrics.log_loss(
            y_true=solution.loc[condition_indices, target_levels].values,
            y_pred=submission.loc[condition_indices, target_levels].values,
            sample_weight=solution.loc[condition_indices, 'sample_weight'].values
        )
        condition_losses.append(condition_loss)
        condition_weights.append(1)

    any_severe_spinal_labels = pd.Series(solution.loc[solution['condition'] == 'spinal'].groupby('study_id')['severe'].max())
    any_severe_spinal_weights = pd.Series(solution.loc[solution['condition'] == 'spinal'].groupby('study_id')['sample_weight'].max())
    any_severe_spinal_predictions = pd.Series(submission.loc[submission['condition'] == 'spinal'].groupby('study_id')['severe'].max())
    any_severe_spinal_loss = sklearn.metrics.log_loss(
        y_true=any_severe_spinal_labels,
        y_pred=any_severe_spinal_predictions,
        sample_weight=any_severe_spinal_weights
    )
    condition_losses.append(any_severe_spinal_loss)
    condition_weights.append(any_severe_scalar)
    return np.average(condition_losses, weights=condition_weights)

In [178]:
results_df = results_df.drop_duplicates(subset=["row_id"]).reset_index(drop=True)
test_solution = test_solution.drop_duplicates(subset=["row_id"]).reset_index(drop=True)

score(test_solution, results_df, "row_id", 1.0)

0.9770528950624475

In [179]:
softmax = nn.Softmax(dim=2)

In [180]:
results_df

Unnamed: 0,row_id,normal_mild,moderate,severe
0,107698245_left_neural_foraminal_narrowing_l1_l2,0.333333,0.333333,0.333333
1,107698245_left_neural_foraminal_narrowing_l2_l3,0.333333,0.333333,0.333333
2,107698245_left_neural_foraminal_narrowing_l3_l4,0.333333,0.333333,0.333333
3,107698245_left_neural_foraminal_narrowing_l4_l5,0.333333,0.333333,0.333333
4,107698245_left_neural_foraminal_narrowing_l5_s1,0.333333,0.333333,0.333333
...,...,...,...,...
3345,4279881930_spinal_canal_stenosis_l1_l2,0.333333,0.333333,0.333333
3346,4279881930_spinal_canal_stenosis_l2_l3,0.333333,0.333333,0.333333
3347,4279881930_spinal_canal_stenosis_l3_l4,0.333333,0.333333,0.333333
3348,4279881930_spinal_canal_stenosis_l4_l5,0.333333,0.333333,0.333333


In [181]:
from tqdm import tqdm

with torch.no_grad():
    model.eval()

    for item_index, item in tqdm(enumerate(testset)):
        images, _  = item
        output = model(images.unsqueeze(0).to(device))
        output = output.reshape((-1, 25, 3))
        output = softmax(output)
        output = output.detach().cpu().numpy()[0]
        study_id = testset.subjects["study_id"].values[item_index]
        for index, level in enumerate(output):
            row_id = f"{str(study_id)}_{ALL_CONDITIONS[index // 5]}_{LEVELS[index % 5]}"
            results_df.loc[results_df.row_id == row_id, 'normal_mild'] = level[0]
            results_df.loc[results_df.row_id == row_id, 'moderate'] = level[1]
            results_df.loc[results_df.row_id == row_id, 'severe'] = level[2]

134it [06:05,  2.73s/it]


In [182]:
score(test_solution, results_df, "row_id", 1.0)

The y_pred values do not sum to one. Make sure to pass probabilities.
The y_pred values do not sum to one. Make sure to pass probabilities.
The y_pred values do not sum to one. Make sure to pass probabilities.


8.388581940809484

In [185]:
with torch.no_grad():
    model.eval()

    for item_index, item in tqdm(enumerate(testset)):
        images, _ = item
        output = model(images.unsqueeze(0).to(device))
        output = output.reshape((-1, 25, 3))
        output = softmax(output * (1 / np.array([1, 2, 4])))
        output = output.detach().cpu().numpy()[0]
        study_id = testset.subjects["study_id"].values[item_index]
        for index, level in enumerate(output):
            row_id = f"{str(study_id)}_{ALL_CONDITIONS[index // 5]}_{LEVELS[index % 5]}"
            results_df.loc[results_df.row_id == row_id, 'normal_mild'] = level[0]
            results_df.loc[results_df.row_id == row_id, 'moderate'] = level[1]
            results_df.loc[results_df.row_id == row_id, 'severe'] = level[2]


134it [06:07,  2.74s/it]


In [186]:
score(test_solution, results_df, "row_id", 1.0)

7.846171254651535