In [21]:
import pandas as pd
import numpy as np
import pydicom
import glob
import os

In [2]:
## First, read all of my DICOM files into a list
mydicoms = glob.glob("*.dcm")

### Let's look at the contents of the first DICOM:

In [3]:
dcm1 = pydicom.dcmread(mydicoms[0])
dcm1

(0008, 0016) SOP Class UID                       UI: Secondary Capture Image Storage
(0008, 0018) SOP Instance UID                    UI: 1.3.6.1.4.1.11129.5.5.139539879914217162512411239901306132962191
(0008, 0060) Modality                            CS: 'DX'
(0008, 1030) Study Description                   LO: 'Atelectasis'
(0010, 0020) Patient ID                          LO: '13118'
(0010, 0040) Patient's Sex                       CS: 'M'
(0010, 1010) Patient's Age                       AS: '69'
(0020, 000d) Study Instance UID                  UI: 1.3.6.1.4.1.11129.5.5.120992059193772113283592409393507044871674
(0020, 000e) Series Instance UID                 UI: 1.3.6.1.4.1.11129.5.5.110922964580080663514009950443538578354984
(0028, 0002) Samples per Pixel                   US: 1
(0028, 0004) Photometric Interpretation          CS: 'MONOCHROME2'
(0028, 0010) Rows                                US: 1024
(0028, 0011) Columns                             US: 1024
(0028, 0100) Bits Allo

In [4]:
## Do some exploratory work before about how to extract these attributes using pydicom... 
type(dcm1)

pydicom.dataset.FileDataset

In [20]:
list([dcm1.PatientID, dcm1.PatientAge])

['13118', '69']

## Now, let's create the dataframe that we want, and populate it in a loop with all of our DICOMS:

To complete this exercise, create a single dataframe that has the following columns:
- Patient ID
- Patient Age (as an integer)
- Patient Sex (M/F)
- Imaging Modality
- Type of finding in the image
- Number of rows in the image
- Number of columns in the image

Save this dataframe as a .CSV file.

In [63]:
def read_header(files, path=''):
    head = list(['Patient ID', 'Patient Age', 'Patient Sex', 'Imaging Modality', 'Study Type', 'Rows', 'Cols'])
    l = []
    for f in files:
        dcm = pydicom.dcmread(f)
        l.append(list([dcm.PatientID, dcm.PatientAge, dcm.PatientSex, dcm.Modality, dcm.StudyDescription, 
                        dcm.Rows, dcm.Columns]))
    pd.DataFrame(l).to_csv(os.path.join(path, 'dicoms.csv'), header=head, index=False)

In [64]:
read_header(mydicoms)