In [29]:
import sqlite3
import json
import pydicom
import os
from pydicom.tag import Tag
import re

In [15]:


# Step 1: Load the JSON file to get the list of required DICOM tags
json_file_path = 'C:/src/midi_b_challange/docs/ps3.3_profile_attrs.json'
with open(json_file_path, 'r') as file:
    required_tags = json.load(file)

def parse_dicom_tag(tag_str):
    # Remove parentheses and split the string by comma
    tag_str = tag_str.strip('()')
    group, element = tag_str.split(', ')
    
    # Convert the group and element to integers
    group = int(group, 16)
    element = int(element, 16)
    
    # Create and return the Tag object
    return Tag(group, element)

required_tags_values = [
    parse_dicom_tag(x)
    for xs in list(required_tags.values())
    for x in xs
]


# Step 2: Read the DICOM file
dicom_file_path = 'C:/src/midi_b_challange/data/images/manifest-1617826555824/Pseudo-PHI-DICOM-Data/292821506/07-13-2013-NA-XR CHEST AP PORTABLE for Douglas Davidson-46198/1001.000000-NA-37718/1-1.dcm'

test_dcm = pydicom.dcmread(dicom_file_path)

# Step 3: Extract the values of the required tags from the DICOM file
extracted_values = {}
for tag in required_tags_values:
    if tag in test_dcm:
        extracted_values[tag] = test_dcm[tag].name

# Print or use the extracted values as needed
print(extracted_values)

{(0020, 000e): 'Series Instance UID', (0008, 0018): 'SOP Instance UID', (0020, 000d): 'Study Instance UID', (0008, 0022): 'Acquisition Date', (0008, 0023): 'Content Date', (0010, 0030): "Patient's Birth Date", (0008, 0021): 'Series Date', (0008, 0020): 'Study Date', (0018, 700a): 'Detector ID', (0010, 0020): 'Patient ID', (0020, 0010): 'Study ID', (0008, 0050): 'Accession Number', (0010, 0010): "Patient's Name", (0010, 0040): "Patient's Sex", (0008, 0090): "Referring Physician's Name", (0008, 0030): 'Study Time', (0008, 2111): 'Derivation Description', (0008, 0081): 'Institution Address', (0010, 1010): "Patient's Age", (0008, 1050): "Performing Physician's Name", (0008, 0092): "Referring Physician's Address", (0008, 1030): 'Study Description', (0008, 0032): 'Acquisition Time', (0018, 1400): 'Acquisition Device Processing Description', (0008, 0031): 'Series Time', (0008, 0033): 'Content Time', (0018, 0010): 'Contrast/Bolus Agent', (0008, 0080): 'Institution Name'}


In [28]:
extracted_values.values()

dict_values(['Series Instance UID', 'SOP Instance UID', 'Study Instance UID', 'Acquisition Date', 'Content Date', "Patient's Birth Date", 'Series Date', 'Study Date', 'Detector ID', 'Patient ID', 'Study ID', 'Accession Number', "Patient's Name", "Patient's Sex", "Referring Physician's Name", 'Study Time', 'Derivation Description', 'Institution Address', "Patient's Age", "Performing Physician's Name", "Referring Physician's Address", 'Study Description', 'Acquisition Time', 'Acquisition Device Processing Description', 'Series Time', 'Content Time', 'Contrast/Bolus Agent', 'Institution Name'])

In [59]:
conn = sqlite3.connect('C:/src/midi_b_challange/data/answers.db')
c = conn.cursor()

RX = re.compile(r"([ '/])")
unique_compr_values = [RX.sub(r'', tn) for tn in set(extracted_values.values())]

# Create the answer_data table
c.execute('''
    CREATE TABLE IF NOT EXISTS answer_data (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        {}
    )
'''.format(', '.join([f"{tag} TEXT" for tag in unique_compr_values])))
conn.commit()


In [58]:
c.execute('''DROP TABLE IF EXISTS answer_data''')

<sqlite3.Cursor at 0x181c8bcc0c0>

In [56]:
cursor = conn.execute('SELECT * FROM answer_data')
results = cursor.fetchall()
print(results)

[]


In [69]:
group_id = 0
index_id = 0

columns = ', '.join(unique_compr_values)
columns_tagids = set(extracted_values.keys())
placeholders = ', '.join(['?'] * (len(extracted_values)))


current_subdir = ""
for subdir, dirs, files in os.walk("C:/src/midi_b_challange/data/images/manifest-1617826555824/Pseudo-PHI-DICOM-Data/"):
    if current_subdir != subdir:
        group_id += 1
        index_id = 0
        current_subdir = subdir
    for file in files:
        if not file.endswith('.dcm'):
            continue
        dcm = pydicom.dcmread(os.path.join(subdir, file))
        
        values = [str(dcm[c].value) if c in dcm else "" for c in columns_tagids]

        c.execute(f'''
            INSERT INTO answer_data ({columns})
            VALUES ({placeholders})
        ''', values)
        index_id += 1
        


KeyError: (0018, 1400)

In [67]:
values

[(0018, 1400) Acquisition Device Processing Descr LO: 'CHEST AP PORT X-WISE GRID',
 (0008, 0081) Institution Address                 ST: '334 Michael Manor Sarahview, PA 56560',
 (0008, 0080) Institution Name                    LO: 'Scott Community Hospital',
 (0018, 700a) Detector ID                         SH: '',
 (0020, 000d) Study Instance UID                  UI: 2.25.106461954783291641048254423668956446198,
 (0020, 000e) Series Instance UID                 UI: 2.25.159938781348401988370074200042204937718,
 (0020, 0010) Study ID                            SH: '',
 (0010, 0010) Patient's Name                      PN: 'DAVIDSON^DOUGLAS',
 (0008, 0090) Referring Physician's Name          PN: 'HUGHES^KATHLEEN',
 (0008, 2111) Derivation Description              ST: 'G1.0e#1.60+0.20,MDR0.2AM0.6',
 (0010, 1010) Patient's Age                       AS: '037Y',
 (0008, 0092) Referring Physician's Address       ST: '0544 Green Inlet Jeffreyland, HI 66060',
 (0018, 0010) Contrast/Bolus Agent