In [1]:
import pydicom
import os
from glob import glob

In [2]:
pwd

'/root/notebooks'

In [3]:
data_path = input("Enter data path: ")

Enter data path: /root/notebooks/NEW


In [4]:
#To Check if the files present in the folder consist DICOMS Files
g = glob(data_path + '/*.dcm')
print ("Total of %d DICOM images.\nFirst 5 filenames:" % len(g))
print ('\n'.join(g[:5]))

Total of 5 DICOM images.
First 5 filenames:
/root/notebooks/NEW/2.dcm
/root/notebooks/NEW/5.dcm
/root/notebooks/NEW/1.dcm
/root/notebooks/NEW/3.dcm
/root/notebooks/NEW/4.dcm


# Create mapping table:

In [70]:
import sqlite3 #Using the SQLite3 library
conn = sqlite3.connect(':memory:') #Making a connection with the databsse
c = conn.cursor()

In [71]:
#UNMASKED & MASKED TABLE:
unmasked_table= """CREATE TABLE unmasked(
                    PatientID text,
                    PatientName text,
                    PatientAddress text,
                    PatientDOB text,
                    PatientSex text,
                    PhysicianName text,
                    Manufacturer text,
                    PRIMARY KEY (PatientID)
                    )"""
masked_table="""CREATE TABLE masked( 
                    Pat_id integer, 
                    Mask_name text, 
                    Mask_address text, 
                    Mask_Birth_Date text, 
                    Mask_Sex text,  
                    Mask_Manufacturer text, 
                    Mask_Physician_Name text, 
                    FOREIGN KEY(Pat_id) REFERENCES unmasked (PatientID)
                )"""
c.execute(unmasked_table)
c.execute(masked_table)
conn.commit()

In [72]:
for s in os.listdir(data_path):
        if 'ipynb_checkpoints' not in s.split('.'):
            print (s)
            slices = pydicom.read_file(data_path + '/' + s)
            c.execute(f"""INSERT INTO unmasked VALUES ('{slices.PatientID}','{slices.PatientName}',
                      '{slices[0x0010, 0x1040].value}','{slices[0x0010, 0x0030].value}','{slices[0x0010, 0x0040].value}',
                      '{slices[0x0008, 0x0090].value}','{slices[0x0008, 0x0070].value}')""")
            c.execute(f"""INSERT INTO masked VALUES('{slices.PatientID}','XXXX','XXXX','XXXXXX','XX','XXXX','XXXX')""")
            conn.commit()

2.dcm
5.dcm
1.dcm
3.dcm
4.dcm


In [73]:
c.execute('SELECT * from unmasked')
c.fetchall()

[('2',
  'Jack Joe',
  'MARSHAM, START LANE, WHALEY BRIDGE, SK23 7BP',
  '19620726',
  'M',
  'WHITTINGHAM VM^-',
  'Philips Medical Systems'),
 ('5',
  'Rohan Gupta',
  'MARSHAM, START LANE, WHALEY BRIDGE, SK23 7BP',
  '19620726',
  'M',
  'WHITTINGHAM VM^-',
  'Philips Medical Systems'),
 ('1',
  'Joe William',
  'MARSHAM, START LANE, WHALEY BRIDGE, SK23 7BP',
  '19620726',
  'M',
  'WHITTINGHAM VM^-',
  'Philips Medical Systems'),
 ('3',
  'Rahul Subra',
  'MARSHAM, START LANE, WHALEY BRIDGE, SK23 7BP',
  '19620726',
  'M',
  'WHITTINGHAM VM^-',
  'Philips Medical Systems'),
 ('4',
  'Josh Mehta',
  'MARSHAM, START LANE, WHALEY BRIDGE, SK23 7BP',
  '19620726',
  'M',
  'WHITTINGHAM VM^-',
  'Philips Medical Systems')]

In [74]:
c.execute('SELECT * from masked')
c.fetchall()

[(2, 'XXXX', 'XXXX', 'XXXXXX', 'XX', 'XXXX', 'XXXX'),
 (5, 'XXXX', 'XXXX', 'XXXXXX', 'XX', 'XXXX', 'XXXX'),
 (1, 'XXXX', 'XXXX', 'XXXXXX', 'XX', 'XXXX', 'XXXX'),
 (3, 'XXXX', 'XXXX', 'XXXXXX', 'XX', 'XXXX', 'XXXX'),
 (4, 'XXXX', 'XXXX', 'XXXXXX', 'XX', 'XXXX', 'XXXX')]

# Mask the proper nouns in metadata

In [75]:
def anonymize(path):
    for i,s in enumerate(os.listdir(path)):
        if 'ipynb_checkpoints' not in s.split('.'):
            print (s)
            slices = pydicom.read_file(path + '/' + s)
            cur= c.execute('SELECT * from masked')
            for rows in cur:
                slices.PatientName=rows[1]
                slices[0x0010, 0x1040].value=rows[2] #address
                slices[0x0010, 0x0030].value=rows[3] #DOB
                slices[0x0010, 0x0040].value=rows[4] #SEX
                slices[0x0008, 0x0090].value=rows[5] #Manu
                slices[0x0008, 0x0070].value=rows[6] #Physician's Name   
            slices.save_as(f'masked{slices.PatientID}.dcm')
                

In [76]:
def deanonymize(path):
    for i,s in enumerate(os.listdir(path)):
        if 'ipynb_checkpoints' not in s.split('.'):
            print (s)
            slices = pydicom.dcmread(path + '/' + s)
            cur= c.execute(f'SELECT * from unmasked where PatientID = {slices.PatientID}')
            for rows in cur:
                slices.PatientName=rows[1]
                slices[0x0010, 0x1040].value=rows[2] #address
                slices[0x0010, 0x0030].value=rows[3] #DOB
                slices[0x0010, 0x0040].value=rows[4] #SEX
                slices[0x0008, 0x0090].value=rows[6] #Manu
                slices[0x0008, 0x0070].value=rows[5] #Physician's Name   
            slices.save_as(f'unmasked{slices.PatientID}.dcm')
              

In [77]:
anonymize(data_path)

2.dcm
5.dcm
1.dcm
3.dcm
4.dcm


In [78]:
deanonymize('/root/notebooks/Unmasked')

masked2.dcm
masked5.dcm
masked1.dcm
masked3.dcm
masked4.dcm


In [208]:
slices = pydicom.read_file('/root/notebooks/NEW/5.dcm')

In [209]:
slices['0x0010','0x0010'].value

'Rohan Gupta'

In [242]:
#def anonymize_any_tag(tag1,tag2):
#    PatID=[]
#    org_dict={}
#    for i,s in enumerate(os.listdir(data_path)):
#        if 'ipynb_checkpoints' not in s.split('.'):
#            print (s)
#            slices = pydicom.dcmread(data_path + '/' + s)
#            PatID.append(slices.PatientID) 
#            L1=tuple(PatID)
#            PatID.pop(0)
#            dict={L1:{f'{tag1},{tag2}': ''}}
#            dict[L1][f'{tag1},{tag2}']=slices[tag1,tag2].value
#            org_dict[slices.PatientID]={f'{tag1},{tag2}':slices[tag1,tag2].value}
#            slices[tag1,tag2].value='XXXX'
#    print(org_dict)
    
    
    
    

In [276]:
#anonymize_any_tag('0x0010','0x0010')

In [277]:
slices = pydicom.read_file('/root/notebooks/NEW/5.dcm')
slices['0x0010','0x0010'].value

'Rohan Gupta'

In [272]:
PatID=[]
org_dict={}
for i,s in enumerate(os.listdir(data_path)):
    if 'ipynb_checkpoints' not in s.split('.'):
        #print (s)
        slices = pydicom.dcmread(data_path + '/' + s)
        PatID.append(slices.PatientID) 
        L1=tuple(PatID)
        PatID.pop(0)
def anoy(tag1,tag2):
    for i,s in enumerate(os.listdir(data_path)):
        if 'ipynb_checkpoints' not in s.split('.'):
            #print (s)
            slices = pydicom.dcmread(data_path + '/' + s)
            dict={L1:{f'{tag1},{tag2}': ''}}
            dict[L1][f'{tag1},{tag2}']=slices[tag1,tag2].value
            org_dict[slices.PatientID]={f'{tag1},{tag2}':slices[tag1,tag2].value}
            slices[tag1,tag2].value='XXXX'
            print(slices[tag1,tag2].value)
            
def deanoy(tag1,tag2):
    for i,s in enumerate(os.listdir(data_path)):
        if 'ipynb_checkpoints' not in s.split('.'):
            slices = pydicom.dcmread(data_path + '/' + s)
            slices[tag1,tag2].value=org_dict[slices.PatientID][f'{tag1},{tag2}']
            print(slices[tag1,tag2].value)
            
            
    

In [273]:
anoy('0x0010','0x0010')

XXXX
XXXX
XXXX
XXXX
XXXX


In [274]:
print(org_dict)

{'2': {'0x0010,0x0010': 'Jack Joe'}, '5': {'0x0010,0x0010': 'Rohan Gupta'}, '1': {'0x0010,0x0010': 'Joe William'}, '3': {'0x0010,0x0010': 'Rahul Subra'}, '4': {'0x0010,0x0010': 'Josh Mehta'}}


In [275]:
deanoy('0x0010','0x0010')

Jack Joe
Rohan Gupta
Joe William
Rahul Subra
Josh Mehta
