# Anonymizing Medical Images

This notebook focuses on techniques for anonymizing medical images, including methods for removing patient-identifying data.

## Types of Patient Identifying Data

In medical imaging, patient identifying data can include:
- Patient name
- Patient ID
- Date of birth
- Other demographic information

It is crucial to anonymize this data to protect patient privacy.

## Text on Images

Medical images may contain text overlays that can reveal patient information. This notebook will demonstrate how to remove or obscure such text.

In [1]:
import matplotlib.pyplot as plt
import SimpleITK as sitk
from skimage import io
from ipywidgets import interact, fixed
from IPython.display import clear_output
import os

sag_image =  sitk.ReadImage("data/sitk/A1_grayT1.nrrd", sitk.sitkFloat32)
cor_image = sitk.PermuteAxes(sag_image, [2, 1, 0])

# General display function for any two 3D images
def display_images(image1_z, image2_z, image1_npa, image2_npa, title1="Image 1", title2="Image 2"):
    plt.subplots(1, 2, figsize=(10, 8))
    
    # Display the first image
    plt.subplot(1, 2, 1)
    plt.imshow(image1_npa[image1_z, :, :], cmap=plt.cm.Greys_r)
    plt.title(title1)
    plt.axis('off')
    
    # Display the second image
    plt.subplot(1, 2, 2)
    plt.imshow(image2_npa[image2_z, :, :], cmap=plt.cm.Greys_r)
    plt.title(title2)
    plt.axis('off')
    
    plt.show()

# Display the sagittal and coronal views of the original image
interact(
    display_images,
    image1_z=(0, sag_image.GetSize()[2] - 1),
    image2_z=(0, cor_image.GetSize()[2] - 1),
    image1_npa=fixed(sitk.GetArrayViewFromImage(sag_image)),
    image2_npa=fixed(sitk.GetArrayViewFromImage(cor_image)),
    title1=fixed("Sagittal Cut"),
    title2=fixed("Coronal Cut")
)

## Removing Soft Tissue

We will use SimpleITK to remove some of the soft tissue in the image.

In [2]:
# Apply thresholding to remove soft tissues by first taking out the air then dilating and cleaning
# the assumption is that the black around the brain is zero and low values
# Create the brain mask
lower_thresh = 0
upper_thresh = 100
brain_mask = sitk.BinaryThreshold(sag_image, lowerThreshold=lower_thresh, upperThreshold=upper_thresh)

# Morphological operations to clean the mask
brain_mask_cleaned = sitk.BinaryDilate(brain_mask, [5, 5, 5])
brain_mask_cleaned = sitk.BinaryErode(brain_mask_cleaned, [5, 5, 5])

# Display the original and cleaned mask images using the general display function
interact(
    display_images,
    image1_z=(0, brain_mask.GetSize()[2] - 1),
    image2_z=(0, brain_mask_cleaned.GetSize()[2] - 1),
    image1_npa=fixed(sitk.GetArrayViewFromImage(brain_mask)),
    image2_npa=fixed(sitk.GetArrayViewFromImage(brain_mask_cleaned)),
    title1=fixed("Original Mask"),
    title2=fixed("Cleaned Mask")
)

In [3]:
def keep_largest_component(mask_image):
    # Ensure mask_image is will work and use as little memory as possible 
    mask_image = sitk.Cast(mask_image, sitk.sitkUInt8)
    
    # Label connected components in the mask image
    labeled_image = sitk.ConnectedComponent(mask_image)
    
    # Measure the size of each labeled component
    label_shape_statistics = sitk.LabelShapeStatisticsImageFilter()
    label_shape_statistics.Execute(labeled_image)
    
    # Count and print the number of connected components
    component_count = len(label_shape_statistics.GetLabels())
    print(f"Number of connected components before filtering: {component_count}")
    
    # Find the label with the largest size
    largest_label = max(
        label_shape_statistics.GetLabels(),
        key=lambda label: label_shape_statistics.GetPhysicalSize(label)
    )
    
    # Create a new mask with only the largest component
    largest_component_mask = sitk.BinaryThreshold(labeled_image, lowerThreshold=largest_label, upperThreshold=largest_label, insideValue=1, outsideValue=0)
    
    # Verify the result by counting the components in the resulting image
    labeled_result = sitk.ConnectedComponent(largest_component_mask)
    label_shape_statistics.Execute(labeled_result)
    result_component_count = len(label_shape_statistics.GetLabels())
    print(f"Number of connected components after filtering: {result_component_count}")
    
    return largest_component_mask

largest_component_mask = keep_largest_component(brain_mask_cleaned)
# we actually want the opposite mask so we will invert the mask
inverted_mask = sitk.BinaryNot(largest_component_mask) 
# Apply the mask to the image
brain_only = sitk.Mask(sag_image, inverted_mask)

interact(
    display_images,
    image1_z = (0,brain_only.GetSize()[2]-1),
    image2_z = (0,largest_component_mask.GetSize()[2]-1),
    image1_npa = fixed(sitk.GetArrayViewFromImage(brain_only)),
    image2_npa = fixed(sitk.GetArrayViewFromImage(largest_component_mask)),
    title1=fixed("new image"),
    title2=fixed("mask")
)

## Seed Point Guessing

We will guess a seed point for the brain in the middle of the image.

In [4]:
def guess_seed_point(img):
   """
   This function guesses a seed point for the brain in the middle of the image, and returns some seed points.
   """
   possible_point = img.GetSize()[0]//2, img.GetSize()[1]//2, img.GetSize()[2]//2
   # Get the pixel value at the potential location
   pixel_value = img.GetPixel(*possible_point)
   if pixel_value > 0:
       picked_point = possible_point
   else:
       # just move over a bit and hope for better
       new_possible_point = img.GetSize()[0]//2 + img.GetSize()[0]//10 , img.GetSize()[1]//2, img.GetSize()[2]//2
       picked_point = new_possible_point
   return picked_point
# do some reality check of a look at the value in your seed point  
seed_point = guess_seed_point(sag_image) 
pixel_value = sag_image.GetPixel(seed_point)
print(pixel_value)

In [5]:
seed_point = guess_seed_point(sag_image)  
lower_threshold = 370   # lower threshold
upper_threshold = 480   # upper threshold

seed_mask = sitk.ConnectedThreshold(
   sag_image, seedList=[seed_point],
   lower=lower_threshold,
   upper=upper_threshold)
# let's dilate up a bit
seed_mask= sitk.BinaryDilate(seed_mask, [5, 5, 5])

# apply the mask to the image
brain_only = sitk.Mask(sag_image, seed_mask)
# display to see what happened
interact(
   display_images,
   image1_z=(0, sag_image.GetSize()[2] - 1),
   image2_z=(0, brain_only.GetSize()[2] - 1),
   image1_npa=fixed(sitk.GetArrayViewFromImage(sag_image)),
   image2_npa=fixed(sitk.GetArrayViewFromImage(brain_only)),
   title1=fixed("Original"),
   title2=fixed("Seeded and Masked")
)

## Other Parts of Images

We will also look at other parts of images, such as DICOM metadata.

In [6]:
import pydicom
from pydicom import dcmread
fpath = "data/anonym/our_sample_dicom.dcm"
ds = dcmread(fpath)
print(ds)

In [7]:
# modify elements of our DICOM metadata
elem = ds[0x0010, 0x0010]
print(elem.value)

In [8]:
elem.value = 'Citizen^Almoni'
print(elem)

In [9]:
ds.PatientName = 'Almoni^Shmalmoni'
print(elem)