In [2]:
import pandas as pd
import numpy as np
import pydicom
import glob

In [3]:
## First, read all of my DICOM files into a list
mydicoms = glob.glob("*.dcm")

### Let's look at the contents of the first DICOM:

In [4]:
dcm1 = pydicom.dcmread(mydicoms[0])
dcm1

(0008, 0016) SOP Class UID                       UI: Secondary Capture Image Storage
(0008, 0018) SOP Instance UID                    UI: 1.3.6.1.4.1.11129.5.5.162426174634548301003630270411628292460952
(0008, 0060) Modality                            CS: 'DX'
(0008, 1030) Study Description                   LO: 'Effusion|Nodule|Pleural_Thickening|Mass'
(0010, 0020) Patient ID                          LO: '29579'
(0010, 0040) Patient's Sex                       CS: 'F'
(0010, 1010) Patient's Age                       AS: '24'
(0020, 000d) Study Instance UID                  UI: 1.3.6.1.4.1.11129.5.5.113025392650823751977671880960497589856674
(0020, 000e) Series Instance UID                 UI: 1.3.6.1.4.1.11129.5.5.168055162156043936178718006100964727334210
(0028, 0002) Samples per Pixel                   US: 1
(0028, 0004) Photometric Interpretation          CS: 'MONOCHROME2'
(0028, 0010) Rows                                US: 1024
(0028, 0011) Columns                             US:

In [5]:
## Do some exploratory work before about how to extract these attributes using pydicom... 

print(dcm1.PatientAge, dcm1.PatientSex, dcm1.PhotometricInterpretation)



24 F MONOCHROME2


## Now, let's create the dataframe that we want, and populate it in a loop with all of our DICOMS:

To complete this exercise, create a single dataframe that has the following columns:
- Patient ID
- Patient Age (as an integer)
- Patient Sex (M/F)
- Imaging Modality
- Type of finding in the image
- Number of rows in the image
- Number of columns in the image

Save this dataframe as a .CSV file.

In [102]:
df = {"Patient ID":np.arange(0, 7).tolist(),
     "Patient Age":np.arange(0, 7).tolist(), "Patient Sex":np.arange(0, 7).tolist(), 
                            "Imaging Modality":np.arange(0, 7).tolist(), "Type of finding":np.arange(0, 7).tolist(),
                            "N-rows":np.arange(0, 7).tolist(), "N-columns":np.arange(0, 7).tolist()}

In [103]:
df

{'Patient ID': [0, 1, 2, 3, 4, 5, 6],
 'Patient Age': [0, 1, 2, 3, 4, 5, 6],
 'Patient Sex': [0, 1, 2, 3, 4, 5, 6],
 'Imaging Modality': [0, 1, 2, 3, 4, 5, 6],
 'Type of finding': [0, 1, 2, 3, 4, 5, 6],
 'N-rows': [0, 1, 2, 3, 4, 5, 6],
 'N-columns': [0, 1, 2, 3, 4, 5, 6]}

In [104]:
df = pd.DataFrame(df, columns = ["Patient ID", "Patient Age", "Patient Sex", 
                            "Imaging Modality", "Type of finding",
                            "N-rows", "N-columns"])

In [105]:
df

Unnamed: 0,Patient ID,Patient Age,Patient Sex,Imaging Modality,Type of finding,N-rows,N-columns
0,0,0,0,0,0,0,0
1,1,1,1,1,1,1,1
2,2,2,2,2,2,2,2
3,3,3,3,3,3,3,3
4,4,4,4,4,4,4,4
5,5,5,5,5,5,5,5
6,6,6,6,6,6,6,6


In [106]:
for i in range(7):
    dcm = pydicom.dcmread(mydicoms[i])
    df.iloc[i, 0] = [dcm.PatientID]
    df.iloc[i, 1] = [dcm.PatientAge]
    df.iloc[i, 2] = [dcm.PatientSex]
    df.iloc[i, 3] = [dcm.Modality]
    df.iloc[i, 4] = [dcm.StudyDescription]
    df.iloc[i, 5] = [dcm.Rows]
    df.iloc[i, 6] = [dcm.Columns]

In [107]:
df

Unnamed: 0,Patient ID,Patient Age,Patient Sex,Imaging Modality,Type of finding,N-rows,N-columns
0,29579,24,F,DX,Effusion|Nodule|Pleural_Thickening|Mass,1024,1024
1,1688,59,F,DX,Infiltration|Nodule,1024,1024
2,13659,62,F,DX,Consolidation|Mass|Pneumonia|Pneumothorax,1024,1024
3,13118,69,M,DX,Atelectasis,1024,1024
4,10172,59,F,DX,Atelectasis|Effusion,1024,1024
5,5066,52,M,DX,Cardiomegaly|Effusion|Infiltration,1024,1024
6,23075,31,M,DX,Mass,1024,1024
