In [1]:
import os
import json
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('sample_kub_pair_data.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   sop_inst_uid    2 non-null      object 
 1   path            2 non-null      object 
 2   clinic          2 non-null      object 
 3   his_patient_id  2 non-null      int64  
 4   age             2 non-null      float64
 5   gender          2 non-null      object 
 6   race            2 non-null      object 
 7   height          2 non-null      float64
 8   weight          2 non-null      float64
 9   dxa_uid         2 non-null      object 
 10  dxa_measures    2 non-null      object 
 11  date_xray       2 non-null      object 
 12  date_dxa        2 non-null      object 
 13  study_diff      2 non-null      int64  
dtypes: float64(3), int64(2), object(9)
memory usage: 352.0+ bytes


# Column Description
- `sop_inst_uid`: The SOP (Service Object Pair) instance UID from the DICOM file of the paired X-ray.
- `path`: The file path of the DICOM X-ray file.
- `clinic`: The clinic or medical facility where the DICOM is from.
- `his_patient_id`: The patient ID associated with the DICOM.
- `age`: The age of the patient when the X-ray was taken, calculated as the difference in years between the date of the X-ray and the patient's birthdate.
- `gender`: The biological sex of the patient, either "F" for female or "M" for male.
- `race`: The race or ethnicity of the patient, either "Asian" or "White".
- `height`: The height of the patient in centimeters.
- `weight`: The weight of the patient in kilograms.
- `dxa_uid`: The DXA (Dual-energy X-ray absorptiometry) study UID, composed of the {clinic}_{his_patient_id}_{date_dxa}.
- `dxa_measures`: The measurement results from the DXA, which could include LeftHip, RightHip, or LSPINE. Further details regarding these measurements can be found in the cell - `below.
- `date_xray`: The date when the X-ray was taken.
- `date_dxa`: The date when the DXA was taken.
- `study_diff`: The number of days between the dates of the paired X-ray and DXA studies.

## Example for `dxa_measures`
- A dictionary format (or JSON format) in string
- first layer is the body part to be measured.
- second layer is the detail position
    - `RightHIP` or `LeftHIP` : contains `neck_bmd`, `gtr_bmd`, `shaft_bmd`, `ward_bmd`, and `all_bmd`.
    - `LSPINE` : contains `L1`, `L2`, `L3`, and `L4`

In [4]:
json.loads(df.iloc[0]['dxa_measures'])

{'RightHIP': {'neck_bmd': 0.65071292210287,
  'ward_bmd': 0.4880841750969,
  'gtr_bmd': 0.53562286206283,
  'shaft_bmd': 0.7690510971402998,
  'all_bmd': 0.65940673087341},
 'LeftHIP': {'neck_bmd': 0.67406870943629,
  'ward_bmd': 0.56020339771895,
  'gtr_bmd': 0.51825857378416,
  'shaft_bmd': 0.77971307198891,
  'all_bmd': 0.6619477504162},
 'LSPINE': {'L1': 0.97561638685822,
  'L2': 1.0208276536751,
  'L3': 1.0957400092447,
  'L4': 1.0291571755536}}