In [None]:
import glob
from pathlib import Path
import matplotlib.pyplot as plt
import pydicom
from presidio_image_redactor import DicomImageRedactorEngine
from presidio_analyzer import PatternRecognizer, Pattern
import numpy as np 
import cv2
from pydicom.pixel_data_handlers.util import apply_voi_lut
from skimage import exposure
import dotenv
import os

### This notebook contains code to handle pixel-level de-identification for DICOM images, primarily using the Microsoft Presidio open source library

In [None]:
def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]

    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))

    return cv2.resize(image, dim, interpolation=inter)

# Testing

In [None]:

engine = DicomImageRedactorEngine()
input_path = 'poster_image.dcm'
ds = pydicom.dcmread(input_path)

new_ds = Dataset()

image = ds.pixel_array
# image = exposure.equalize_adapthist(image)
image = apply_voi_lut(image, ds)
resize = ResizeWithAspectRatio(image, width=1280)
ocr_kwargs = {"ocr_threshold": 0}


new_ds.PixelData = image.tobytes()

# new_ds.file_meta.TransferSyntaxUID = pydicom.uid.ImplicitVRLittleEndian
# Set endianness and VR attributes
new_ds.is_little_endian = True  # Set to True for little-endian
new_ds.is_implicit_VR = False   # Set to False for explicit VR

bits_allocated = ds.BitsAllocated
bits_stored = ds.BitsStored
high_bit = ds.HighBit


new_ds.BitsAllocated = ds.BitsAllocated
new_ds.BitsStored = ds.BitsStored
new_ds.HighBit = ds.HighBit


#Save the new DICOM instance to a file
dcmwrite("new_file.dcm", new_ds)

new_ds = pydicom.dcmread("new_file.dcm", force=True)

new_ds.file_meta.TransferSyntaxUID = pydicom.uid.ImplicitVRLittleEndian


redacted_dicom_instance = engine.redact(new_ds, fill="contrast", ocr_kwargs=ocr_kwargs, 
                                        ad_hoc_recognizers=[name_recognizer, phone_number_recognizer])




cv2.imshow("dicom", resize)
cv2.waitKey(0)

In [None]:
def compare_dicom_images(
    instance_original: pydicom.dataset.FileDataset,
    instance_redacted: pydicom.dataset.FileDataset,
    figsize: tuple = (250, 250)) -> None:
    
    """Display the DICOM pixel arrays of both original and redacted as images.

    Args:
        instance_original (pydicom.dataset.FileDataset): A single DICOM instance (with text PHI).
        instance_redacted (pydicom.dataset.FileDataset): A single DICOM instance (redacted PHI).
        figsize (tuple): Figure size in inches (width, height).
    """
    
#     original_image = instance_original.pixel_array.transpose(1, 2, 0)
#     redacted_image = instance_redacted.pixel_array.transpose(1, 2, 0)
    original_image = instance_original.pixel_array
    redacted_image = instance_redacted.pixel_array
    
    _, ax = plt.subplots(1, 2, figsize=figsize)
    ax[0].imshow(original_image, cmap="gray")
    ax[0].set_title('Original')
    ax[1].imshow(redacted_image, cmap="gray")
    ax[1].set_title('Redacted')

In [None]:
# Define a custom recognizer
class NameRecognizer(PatternRecognizer):
    def __init__(self):
        pattern = Pattern(name="NAME_PATTERN", regex=r"[A-Za-z]+,\s[A-Za-z]+\s[A-Za-z]", score=0.8)
        super().__init__(supported_entity="PERSON_NAME", patterns=[pattern])

    # Instantiate the custom recognizer
name_recognizer = NameRecognizer()

# Define a custom recognizer for phone numbers
class PhoneNumberRecognizer(PatternRecognizer):
    def __init__(self):
        pattern = Pattern(name="PHONE_PATTERN", regex=r" Mobile: ", score=0.99)
        super().__init__(supported_entity="PHONE_NUMBER", patterns=[pattern])

# Instantiate the custom recognizer
phone_number_recognizer = PhoneNumberRecognizer()

In [None]:

dotenv.load_dotenv("../config/.env")
os.getenv()

# Set input and output paths
INPUT_PATH = os.getenv("INPUT_PATH")
INPUT_PATH2 = os.getenv("INPUT_PATH2")
OUTPUT_DIR = os.getenv("OUTPUT_DIR")
INPUT_DIR = os.getenv("INPUT_DIR")
# Initialize the engine
engine = DicomImageRedactorEngine()
ocr_kwargs = {"ocr_threshold": 0}
# Option 1: Redact from a loaded DICOM image
dicom_instance = pydicom.dcmread(INPUT_PATH2)

redacted_dicom_instance = engine.redact(dicom_instance, fill="contrast", ocr_kwargs=ocr_kwargs, 
                                        ad_hoc_recognizers=[name_recognizer, phone_number_recognizer])



compare_dicom_images(dicom_instance, redacted_dicom_instance)
