In [None]:
!pip install pydicom
!conda install gdcm -c conda-forge

Collecting pydicom
[?25l  Downloading https://files.pythonhosted.org/packages/72/7b/6ed88f82dd33a32cdb43432dab7f84fcd40c49d63251442b3cfe0be983d4/pydicom-2.1.1-py3-none-any.whl (1.9MB)
[K     |████████████████████████████████| 1.9MB 5.4MB/s 
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-2.1.1
/bin/bash: conda: command not found


In [None]:
import pydicom
from pydicom.data import get_testdata_file
import matplotlib.pyplot as plt
import pandas as pd
import csv
import os

In [None]:
# CSV file containing all DICOM tags
dicom_tags = pd.read_csv('dicom_tags.data', sep="|")
dicom_tags

In [None]:
# Index of tags to record
tags_to_record = list(dicom_tags.Tag)
 
# Remove the index of tag - PixelData
# The content of this tag will be stored as images inna separate folder
tags_to_record.remove('(7FE0,0010)')
 
# Creating a reverse dictionary where the key is the index of the tag and the value is the index of the element in the array/list.
dicom_tags_index_reverse = {x:i for i,x in enumerate(tags_to_record)}
dicom_tags_index_reverse

In [None]:
 
# Path to folder with DICOM files
path = '/content/drive/My Drive/dcm_files/'
 
# Path to folder to store generated files
pathx = '/content/drive/My Drive/dcm_files/'
 
csv_filename =  'dicom_test.csv'
 
# New CSV file
with open(path + csv_filename, 'w') as csvfile:
 
    # Create CSV writer
    csvwriter = csv.writer(csvfile)
 
    # Elements in the header of the CSV
    header_row = ['Filename']    # DICOM filename
    header_row.extend(list(dicom_tags.Keyword))    # Keywords of tags
    header_row.extend(['Private'])    # Add element to keep track of private elements
    header_row.remove('PixelData')    # Remove PixelData tag
 
    csvwriter.writerow(header_row)
 
    print('Writing to file ', csv_filename)
 
    # for each file
    for filename in os.listdir(path):
        
        print('Currently reading', filename)
 
        newline = [None] * (len(header_row))    # One for each attribute to be stored
        
        newline[0] = filename    # Set filename attribute
 
        dcm_file = pydicom.dcmread(path + filename)
 
        # The features defined in the file
        features_present = list(dcm_file.keys())
 
        # Path to store image files
        folder_path = path + filename[:-4]
        os.makedirs(folder_path)
 
        private_tags = []    # To keep track of private tags
 
        for x in features_present:
 
            # Edit to match the keys of the dicom_tags_index_reverse dict
            xtag = str(x).upper().replace(" ", "")
            
            # Retrieve value
            val = dcm_file[x].value
 
            # If PixelData
            if xtag == '(7FE0,0010)':
 
                p = dcm_file.pixel_array
 
                print('Parsing image(s)')
 
                if len(p.shape)>2:
                    print("Multiple image files present")
                    for i in range(p.shape[0]):
                        plt.axis(False)
                        plt.imshow(p[i], cmap=plt.cm.gray)
                        file  = folder_path + '/' + 'Image' + str(i) + '.jpg'
                        print('Saving', file)
                        plt.savefig(file, dpi=500, bbox_inches='tight',pad_inches=0)
                else:
                    print("Single image file present")
                    plt.axis(False)
                    plt.imshow(p, plt.cm.gray)
                    file  = folder_path + '/' + 'Image.jpg'
                    print('Saving', file)
                    plt.savefig(file, dpi=500, bbox_inches='tight',pad_inches=0)
 
            else:
                # If private tag
                if xtag not in dicom_tags_index_reverse:
                    # Add to list of private tags
                    private_tags.append(dcm_file[x])
                else:
                    # Store value at location in newline array
                    newline[dicom_tags_index_reverse[xtag]+1] = val
        
        # Store the private tags at location in newline array
        newline[-1] = str(private_tags)
        
        # Write to csv file
        csvwriter.writerow(newline)

In [None]:

# View generated CSV file 
pd.read_csv(path + 'dicom_test.csv')

Unnamed: 0,Filename,Length​ToEnd,Specific​Character​Set,Language​Code​Sequence,Image​Type,Recognition​Code,Instance​Creation​Date,Instance​Creation​Time,Instance​CreatorUID,Instance​Coercion​Date​Time,SOP​ClassUID,SOP​InstanceUID,Related​GeneralSOP​ClassUID,Original​SpecializedSOP​ClassUID,Study​Date,Series​Date,Acquisition​Date,Content​Date,Overlay​Date,Curve​Date,Acquisition​Date​Time,Study​Time,Series​Time,Acquisition​Time,Content​Time,Overlay​Time,Curve​Time,Data​Set​Type,Data​Set​Subtype,Nuclear​Medicine​Series​Type,Accession​Number,Issuer​OfAccession​Number​Sequence,Query​Retrieve​Level,Query​Retrieve​View,RetrieveAE​Title,Instance​Availability,FailedSOP​InstanceUID​List,Modality,Modalities​InStudy,SOP​Classes​InStudy,...,Overlay​Bits​For​Code​Word,Overlay​Activation​Layer,Overlay​Descriptor​Gray,Overlay​Descriptor​Red,Overlay​Descriptor​Green,Overlay​Descriptor​Blue,Overlays​Gray,Overlays​Red,Overlays​Green,Overlays​Blue,ROI​Area,ROI​Mean,ROI​Standard​Deviation,Overlay​Label,Overlay​Data,Overlay​Comments,Coefficients​SDVN,Coefficients​SDHN,Coefficients​SDDN,Variable​Pixel​Data,Variable​Next​Data​Group,Variable​Coefficients​SDVN,Variable​Coefficients​SDHN,Variable​Coefficients​SDDN,Digital​Signatures​Sequence,Data​Set​Trailing​Padding,Item,Item​Delimitation​Item,Sequence​Delimitation​Item,File​Meta​Information​Group​Length,File​Meta​Information​Version,Media​StorageSOP​ClassUID,Media​StorageSOP​InstanceUID,Transfer​SyntaxUID,Implementation​ClassUID,Implementation​Version​Name,Source​Application​Entity​Title,Private​Information​CreatorUID,Private​Information,Private
0,0002.dcm,,,,"['DERIVED', 'PRIMARY', 'SINGLE PLANE', 'SINGLE...",,,,,,1.2.840.10008.5.1.4.1.1.12.1,1.3.12.2.1107.5.4.3.321890.19960124.162922.29,,,19941013,,,,,,,141917.0,,,,,,,,,,,,,,,,XA,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"[(0009, 0010) Private Creator ..."
1,MRBRAIN.dcm,,ISO_IR 100,,"['ORIGINAL', 'PRIMARY', 'OTHER', 'R', 'IR']",,19960823.0,93801.0,1.3.46.670589.11.0.5,,1.2.840.10008.5.1.4.1.1.4,1.3.46.670589.11.0.4.1996082307380006,,,19950330,19950330.0,,,,,,150829.0,155614.63,,,,,,,,,,,,,,,MR,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00...,,,,,,,,,,,,,,[]
