# RSNA EDA for mapping segmentations files with patient series

In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import os

### Exploring train meta csv 

In [2]:
metadata_path = "D:/Downloads/rsna-2023-abdominal-trauma-detection/train_series_meta.csv"

In [3]:
train_metadata = pd.read_csv(metadata_path)
train_metadata.head()

Unnamed: 0,patient_id,series_id,aortic_hu,incomplete_organ
0,10004,21057,146.0,0
1,10004,51033,454.75,0
2,10005,18667,187.0,0
3,10007,47578,329.0,0
4,10026,29700,327.0,0


In [4]:
train_metadata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4711 entries, 0 to 4710
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   patient_id        4711 non-null   int64  
 1   series_id         4711 non-null   int64  
 2   aortic_hu         4711 non-null   float64
 3   incomplete_organ  4711 non-null   int64  
dtypes: float64(1), int64(3)
memory usage: 147.3 KB


In [5]:
patients = train_metadata["patient_id"].unique()
len(patients) 

3147

**So all patients are present in this meta csv**

Counting the amount of series for each patient:

In [6]:
patient_series = train_metadata[["patient_id", "series_id"]].groupby("patient_id").count()
patient_series.head()

Unnamed: 0_level_0,series_id
patient_id,Unnamed: 1_level_1
19,1
26,2
33,1
43,2
96,1


In [7]:
patient_series.describe()

Unnamed: 0,series_id
count,3147.0
mean,1.496981
std,0.50007
min,1.0
25%,1.0
50%,1.0
75%,2.0
max,2.0


Maximum number of series per patient = **2**

### Explore segmentations folder to match through series_id

In [14]:
segmentations_path = "D:/Downloads/rsna-2023-abdominal-trauma-detection/segmentations"

segmentations = os.listdir(segmentations_path)
segmentations = [int(segmentation) for segmentation in segmentations]
len(segmentations)

206

Only 206 segmentations 

In [15]:
series = train_metadata["series_id"].tolist()

In [16]:
len(series)

4711

In [18]:
matched_series = []

for segmentation in segmentations:
    if segmentation in series:
        matched_series.append(segmentation)
    else:
        pass 

In [20]:
len(matched_series)

206

We obtained the series but we want also the patients for those series

In [24]:
patients_segment = train_metadata[train_metadata["series_id"].isin(matched_series)]
patients_segment.head()

Unnamed: 0,patient_id,series_id,aortic_hu,incomplete_organ
0,10004,21057,146.0,0
1,10004,51033,454.75,0
21,10217,16066,208.0,0
22,10228,30522,145.0,0
23,10228,40471,291.0,0


In [27]:
patients_with_segmentations = patients_segment["patient_id"].unique()
patients_with_segmentations

array([10004, 10217, 10228, 10300, 10917, 10937, 11177, 11652, 13623,
       13741, 13794, 14309, 14321, 14343, 14950, 15117,  1523, 15876,
       16140, 16327, 16436, 16494, 16645, 16682, 17265, 18682, 18697,
       18877, 19249, 19410, 20621, 20951, 21141, 21411, 22623, 22755,
       24439, 24524, 25093, 25102, 25347, 28095, 28569, 28925, 28976,
       29565, 30446, 31636, 32272, 32425, 33007, 33251, 33848, 34409,
       35056,  3506, 35653, 36022, 36105, 36289, 37169, 37429, 37436,
       37459, 37551, 37802, 38045, 38238, 38427, 38541, 38982, 39330,
        3983,  4093, 41306, 41360, 42008, 42436, 43399, 43492, 43551,
       44136, 44507, 44693, 45488, 46904, 47065, 47263,  4791, 48710,
       50486, 50518, 50753, 50820, 50873, 51520, 51545, 52416, 53108,
       53294, 54183, 54361, 54722, 54832, 55567, 55710, 55888, 56050,
       57444, 57563, 58729, 60058, 60744, 60836, 61547, 62116, 62360,
       62397, 62763, 62845, 64194, 64256, 65326, 65456, 65504,  7642,
        8848,  9813]

In [28]:
len(patients_with_segmentations)

128

In [30]:
train_data = pd.read_csv("D:/Downloads/rsna-2023-abdominal-trauma-detection/train.csv")

In [32]:
train_data[train_data["patient_id"].isin(patients_with_segmentations)]

Unnamed: 0,patient_id,bowel_healthy,bowel_injury,extravasation_healthy,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high,any_injury
0,10004,1,0,0,1,0,1,0,1,0,0,0,0,1,1
16,10217,1,0,0,1,1,0,0,0,1,0,0,0,1,1
17,10228,1,0,1,0,1,0,0,0,1,0,0,1,0,1
24,10300,1,0,1,0,1,0,0,0,1,0,0,1,0,1
48,10917,0,1,1,0,1,0,0,1,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2972,65456,0,1,1,0,1,0,0,1,0,0,0,0,1,1
2975,65504,1,0,1,0,1,0,0,0,1,0,0,0,1,1
3038,7642,0,1,1,0,1,0,0,0,1,0,0,1,0,1
3091,8848,1,0,1,0,1,0,0,0,1,0,0,1,0,1
