In [1]:
import pandas as pd
import numpy as np
import json


# Load the CSV file
file_path = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_label_coordinates.csv'
df = pd.read_csv(file_path)

# Define the required levels
required_levels = ['L1/L2', 'L2/L3', 'L3/L4', 'L4/L5', 'L5/S1']

# One-hot encode the 'condition' column
condition_dummies = pd.get_dummies(df['condition'])

# Merge the one-hot encoded columns back into the original dataframe
df = pd.concat([df, condition_dummies], axis=1)

# Group by 'series_id' and 'condition'
grouped = df.groupby(['series_id', 'condition'])

# Initialize a list to hold the dictionaries
data_list = []

# Iterate through each group and construct the dictionary
for (series_id, condition), group in grouped:
    coordinates = []
    for level in required_levels:
        level_data = group[group['level'] == level]
        if not level_data.empty:
            x = level_data['x'].values[0]
            y = level_data['y'].values[0]
            z = level_data['instance_number'].values[0]
        else:
            x, y, z = np.nan, np.nan, np.nan
        coordinates.append({"level": level, "x": x, "y": y, "z": z})
    


    data = {
        "series_id": int(series_id),  # Convert to native Python int
        "condition": condition,
        "coordinates": [{"level": coord["level"], "x": float(coord["x"]) if isinstance(coord["x"], np.float64) else coord["x"], "y": float(coord["y"]) if isinstance(coord["y"], np.float64) else coord["y"], "z": float(coord["z"]) if isinstance(coord["z"], np.int64) else coord["z"]} for coord in coordinates]  # Convert numpy types
    }
    data_list.append(data)



In [2]:

filtered_list = [item for item in data_list if item['series_id'] == 9334563]

# If you expect only one result and want to retrieve the first match
if filtered_list:
    result = filtered_list[0]
    print(result)
else:
    print("No entry with series_id 9334563 found.")

{'series_id': 9334563, 'condition': 'Spinal Canal Stenosis', 'coordinates': [{'level': 'L1/L2', 'x': nan, 'y': nan, 'z': nan}, {'level': 'L2/L3', 'x': nan, 'y': nan, 'z': nan}, {'level': 'L3/L4', 'x': 415.7647593087013, 'y': 429.25688073394497, 'z': 8.0}, {'level': 'L4/L5', 'x': 435.0971323013306, 'y': 510.534375, 'z': 8.0}, {'level': 'L5/S1', 'x': 465.71588230133057, 'y': 573.103125, 'z': 8.0}]}


In [3]:
filtered_data = [
    entry for entry in data_list 
    if not (entry["series_id"] == 3951475160 and entry["condition"] == "Spinal Canal Stenosis")
]


In [4]:
data_list=filtered_data

In [5]:
#Save the data list as JSON
output_path = '/kaggle/working/json_label_coordinates.json'
with open(output_path, 'w') as f:
    json.dump(data_list, f, indent=4)
    
output_path

'/kaggle/working/json_label_coordinates.json'