In [1]:
# packages

# standard
import numpy as np
import pandas as pd
import os
import time

# plots
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

# dicom
import pydicom as dicom
from sklearn.model_selection import train_test_split

In [2]:
# read data
df_train_main = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/train.csv')
df_train_label = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/train_label_coordinates.csv')
df_train_desc = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/train_series_descriptions.csv')
df_test_desc = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/test_series_descriptions.csv')
df_sub = pd.read_csv('../input/rsna-2024-lumbar-spine-degenerative-classification/sample_submission.csv')

In [132]:
df_train_main.head(2)

Unnamed: 0,study_id,spinal_canal_stenosis_l1_l2,spinal_canal_stenosis_l2_l3,spinal_canal_stenosis_l3_l4,spinal_canal_stenosis_l4_l5,spinal_canal_stenosis_l5_s1,left_neural_foraminal_narrowing_l1_l2,left_neural_foraminal_narrowing_l2_l3,left_neural_foraminal_narrowing_l3_l4,left_neural_foraminal_narrowing_l4_l5,...,left_subarticular_stenosis_l1_l2,left_subarticular_stenosis_l2_l3,left_subarticular_stenosis_l3_l4,left_subarticular_stenosis_l4_l5,left_subarticular_stenosis_l5_s1,right_subarticular_stenosis_l1_l2,right_subarticular_stenosis_l2_l3,right_subarticular_stenosis_l3_l4,right_subarticular_stenosis_l4_l5,right_subarticular_stenosis_l5_s1
0,4003253,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,...,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild
1,4646740,Normal/Mild,Normal/Mild,Moderate,Severe,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,...,Normal/Mild,Normal/Mild,Normal/Mild,Severe,Normal/Mild,Normal/Mild,Moderate,Moderate,Moderate,Normal/Mild


In [133]:
df_train_main.head(2)

Unnamed: 0,study_id,spinal_canal_stenosis_l1_l2,spinal_canal_stenosis_l2_l3,spinal_canal_stenosis_l3_l4,spinal_canal_stenosis_l4_l5,spinal_canal_stenosis_l5_s1,left_neural_foraminal_narrowing_l1_l2,left_neural_foraminal_narrowing_l2_l3,left_neural_foraminal_narrowing_l3_l4,left_neural_foraminal_narrowing_l4_l5,...,left_subarticular_stenosis_l1_l2,left_subarticular_stenosis_l2_l3,left_subarticular_stenosis_l3_l4,left_subarticular_stenosis_l4_l5,left_subarticular_stenosis_l5_s1,right_subarticular_stenosis_l1_l2,right_subarticular_stenosis_l2_l3,right_subarticular_stenosis_l3_l4,right_subarticular_stenosis_l4_l5,right_subarticular_stenosis_l5_s1
0,4003253,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,...,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild
1,4646740,Normal/Mild,Normal/Mild,Moderate,Severe,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,...,Normal/Mild,Normal/Mild,Normal/Mild,Severe,Normal/Mild,Normal/Mild,Moderate,Moderate,Moderate,Normal/Mild


In [148]:
# Usando melt para transformar colunas em linhas
df_unpivoted = df_train_main.melt(id_vars='study_id', var_name='condition', value_name='status')
frequency_table = df_unpivoted.groupby('condition')['status'].value_counts(normalize=True).unstack(fill_value=0)

# Resetando o índice para que 'condition' seja uma coluna novamente
frequency_table = frequency_table.reset_index()

frequency_table.rename(columns={'Moderate': 'moderate', 'Normal/Mild': 'normal_mild', 'Severe': 'severe'}, inplace=True)

df_sub['condition'] = df_sub['row_id'].str.extract(r'_(.*)')
df_sub = pd.merge(df_sub[['row_id', 'condition']],frequency_table, on='condition', how='inner')[['row_id', 'normal_mild', 'moderate', 'severe']]


In [3]:
df_train_main.columns

Index(['study_id', 'spinal_canal_stenosis_l1_l2',
       'spinal_canal_stenosis_l2_l3', 'spinal_canal_stenosis_l3_l4',
       'spinal_canal_stenosis_l4_l5', 'spinal_canal_stenosis_l5_s1',
       'left_neural_foraminal_narrowing_l1_l2',
       'left_neural_foraminal_narrowing_l2_l3',
       'left_neural_foraminal_narrowing_l3_l4',
       'left_neural_foraminal_narrowing_l4_l5',
       'left_neural_foraminal_narrowing_l5_s1',
       'right_neural_foraminal_narrowing_l1_l2',
       'right_neural_foraminal_narrowing_l2_l3',
       'right_neural_foraminal_narrowing_l3_l4',
       'right_neural_foraminal_narrowing_l4_l5',
       'right_neural_foraminal_narrowing_l5_s1',
       'left_subarticular_stenosis_l1_l2', 'left_subarticular_stenosis_l2_l3',
       'left_subarticular_stenosis_l3_l4', 'left_subarticular_stenosis_l4_l5',
       'left_subarticular_stenosis_l5_s1', 'right_subarticular_stenosis_l1_l2',
       'right_subarticular_stenosis_l2_l3',
       'right_subarticular_stenosis_l3_l4',
 

In [9]:
import pandas as pd
import os

# Load the sample submission or create an empty DataFrame if you have the structure
# Assuming df_submission is your DataFrame name with 'row_id' and prediction columns

# Define the path to your test images directory
test_images_dir = "/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images"

# Get all unique IDs from the filenames in the test images directory
test_ids = [filename.split('.')[0] for filename in os.listdir(test_images_dir)]
unique_ids = list(set(test_ids))

# Generate the row_ids needed for submission by repeating each unique_id for each condition from df_train_main
conditions = ['left_neural_foraminal_narrowing_l1_l2',
              'left_neural_foraminal_narrowing_l2_l3',
              'left_neural_foraminal_narrowing_l3_l4',
              'left_neural_foraminal_narrowing_l4_l5',
              'left_neural_foraminal_narrowing_l5_s1',
              'left_subarticular_stenosis_l1_l2',
              'left_subarticular_stenosis_l2_l3',
              'left_subarticular_stenosis_l3_l4',
              'left_subarticular_stenosis_l4_l5',
              'left_subarticular_stenosis_l5_s1',
              'right_neural_foraminal_narrowing_l1_l2',
              'right_neural_foraminal_narrowing_l2_l3',
              'right_neural_foraminal_narrowing_l3_l4',
              'right_neural_foraminal_narrowing_l4_l5',
              'right_neural_foraminal_narrowing_l5_s1',
              'right_subarticular_stenosis_l1_l2',
              'right_subarticular_stenosis_l2_l3',
              'right_subarticular_stenosis_l3_l4',
              'right_subarticular_stenosis_l4_l5',
              'right_subarticular_stenosis_l5_s1',
              'spinal_canal_stenosis_l1_l2',
              'spinal_canal_stenosis_l2_l3',
              'spinal_canal_stenosis_l3_l4',
              'spinal_canal_stenosis_l4_l5',
              'spinal_canal_stenosis_l5_s1']

row_ids = [f"{id}_{condition}" for id in unique_ids for condition in conditions]

# Create DataFrame
df_submission = pd.DataFrame(row_ids, columns=['row_id'])
df_submission['normal_mild'] = 0.333333
df_submission['moderate'] = 0.333333
df_submission['severe'] = 0.333333

df_submission

Unnamed: 0,row_id,normal_mild,moderate,severe
0,44036939_left_neural_foraminal_narrowing_l1_l2,0.333333,0.333333,0.333333
1,44036939_left_neural_foraminal_narrowing_l2_l3,0.333333,0.333333,0.333333
2,44036939_left_neural_foraminal_narrowing_l3_l4,0.333333,0.333333,0.333333
3,44036939_left_neural_foraminal_narrowing_l4_l5,0.333333,0.333333,0.333333
4,44036939_left_neural_foraminal_narrowing_l5_s1,0.333333,0.333333,0.333333
5,44036939_left_subarticular_stenosis_l1_l2,0.333333,0.333333,0.333333
6,44036939_left_subarticular_stenosis_l2_l3,0.333333,0.333333,0.333333
7,44036939_left_subarticular_stenosis_l3_l4,0.333333,0.333333,0.333333
8,44036939_left_subarticular_stenosis_l4_l5,0.333333,0.333333,0.333333
9,44036939_left_subarticular_stenosis_l5_s1,0.333333,0.333333,0.333333


In [10]:
# Save the DataFrame to a CSV file for submission
df_submission.to_csv('submission.csv', index=False)