# Fulmine LABS mini-PACs

## Overview

Fulmine Labs will use medical images for various quality/testing related, machine learning (ML) initiatives. 
The best practice for managing this data is to use Digital Imaging and Communications in Medicine (DICOM) standard compliant images with a PACS-like system.

The code in this project implements and tests a basic PACS with the following architecture:

```
[ Orthanc Repository (Open Source component) ]
       |
       | (DICOM Images) <----------------------------------------->  [ OHIF Viewer (Open Source component) ]
       v
[ Fulmine-Labs-Mini-PACS - Data Setup Script ]      
       |								
       | (Metadata and generated images)   
       |                                          
[ SQLite Database ]							
       |								
       | (API Requests)						 
       v
[ Flask Application ]
       |
       | (HTTP Requests for Data)
       v
[ Client (Pytest, Browser) ]
       |
       | (Model Training Data)
       v
[ Anomaly Detection Model Training Script (separate repository) ]

```

The data setup script will traverse all folders in a specified location, identify DICOM images and if they have appropriate Window Center and Width DICOM header information, will convert them to PNG files at another specified location and add the related metadata to an SQLite database. 

The database maintains the Patient -> Study -> Series -> Image relationship, as well as tracking the output image file names and parameters used in their creation, allowing PACS-like SQL queries to be constructed. 


## Author
Duncan Henderson
Fulmine Labs LLC

In [1]:
import os
import random
import pydicom
import sqlite3
import shutil 
import logging
from datetime import datetime
import numpy as np
from PIL import Image


In [2]:
# Test run variables

# Define a verbose flag (set it to True for verbose output)
#verbose = True
verbose = False

source_dir = r'D:\\Orthanc'
target_dir = r'D:\\training' # The output PNG files will be written to the same folder name with _png appended
training_ratio, validation_ratio = 0.7, 0.15

# Variable to control database deletion
delete_db = True
db_path = 'medical_imaging.db'

In [3]:
# Log to a log file that is specific for the test run and also to the screen if verbose is set

class CustomLogger:
    def __init__(self, verbose=False):
        self.verbose = verbose

        # Configure logging
        logging.basicConfig(filename=f'log_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log', level=logging.INFO)

    def iprint(self, message):
        if self.verbose:
            print(message)
        logging.info(message)
        
    def eprint(self, message):
        if self.verbose:
            print(message)
        logging.error(message)

In [4]:
def setup_database(db_path):
    
    # Connect to SQLite database (this will create the database if it doesn't exist)
    
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()

        # Create tables
        cursor.execute('''CREATE TABLE IF NOT EXISTS Patients (
                            PatientID TEXT PRIMARY KEY,
                            PatientInfo TEXT)''')

        cursor.execute('''CREATE TABLE IF NOT EXISTS Studies (
                            StudyID TEXT PRIMARY KEY,
                            PatientID TEXT,
                            StudyDate TEXT,
                            StudyDescription TEXT,
                            BodyPartExamined TEXT,
                            FOREIGN KEY (PatientID) REFERENCES Patients (PatientID))''')

        cursor.execute('''CREATE TABLE IF NOT EXISTS Series (
                            SeriesID TEXT PRIMARY KEY,
                            StudyID TEXT,
                            SeriesDate TEXT,
                            SeriesDescription TEXT,
                            Modality TEXT,
                            FOREIGN KEY (StudyID) REFERENCES Studies (StudyID))''')

        cursor.execute('''CREATE TABLE IF NOT EXISTS Images (
                            ImageID TEXT PRIMARY KEY,
                            SeriesID TEXT,
                            FilePath TEXT,
                            State TEXT,
                            PngFilePath TEXT,
                            WindowCenter TEXT,
                            WindowWidth TEXT,
                            RescaleIntercept TEXT,
                            RescaleSlope TEXT,
                            InstanceNumber TEXT,
                            FOREIGN KEY (SeriesID) REFERENCES Series (SeriesID))''')

    except sqlite3.DatabaseError as e:
        logger.eprint(f"Database error: {e}")
    finally:
        conn.close()

In [5]:
def query_database(query, args=()):
    with sqlite3.connect("medical_imaging.db") as conn:
        cursor = conn.cursor()
        cursor.execute(query, args)
        data = cursor.fetchall()
    return data

In [6]:
def is_dicom_file(file_path):
    try:
        pydicom.dcmread(file_path, stop_before_pixels=True)
        return True
    except Exception as e:
        logger.eprint(f"Error reading DICOM file {file_path}: {e}")
        return False

In [7]:
def copy_files(file_list, destination):
    try:
        os.makedirs(destination, exist_ok=True)
        for file_path in file_list:
            shutil.copy(file_path, os.path.join(destination, os.path.basename(file_path)))
    except (FileNotFoundError, PermissionError) as e:
        logger.eprint(f"File I/O error: {e}")

In [8]:
def extract_metadata(dicom_file_path):
    # Extract metadata like PatientID, StudyID, SeriesID, ImageID, Modality, and BodyPart
    ds = pydicom.dcmread(dicom_file_path, stop_before_pixels=True)
    patient_id = ds.PatientID
    study_id = ds.StudyInstanceUID
    series_id = ds.SeriesInstanceUID
    image_id = ds.SOPInstanceUID
    study_description = ds.StudyDescription if 'StudyDescription' in ds else 'N/A'
    series_description = ds.SeriesDescription if 'SeriesDescription' in ds else 'N/A'
    instance_number = ds.InstanceNumber if 'InstanceNumber' in ds else 'N/A'
    modality = ds.Modality if 'Modality' in ds else 'N/A'
    body_part_examined = ds.BodyPartExamined if 'BodyPartExamined' in ds else 'N/A'
    study_date = ds.StudyDate if 'StudyDate' in ds else 'N/A'
    series_date = ds.SeriesDate if 'SeriesDate' in ds else 'N/A'
 
    return patient_id, study_id, series_id, image_id, modality, body_part_examined, instance_number, study_description, series_description, study_date, series_date

In [9]:
def insert_metadata_into_db(cursor, patient_id, study_id, series_id, image_id, modality, body_part_examined, instance_number, study_description, series_description, study_date, series_date, file_path, state):
    
    # Insert data into the Patients table
    cursor.execute("INSERT OR IGNORE INTO Patients (PatientID) VALUES (?)", (patient_id,))

    # Insert data into the Studies table
    cursor.execute("INSERT OR IGNORE INTO Studies (StudyID, PatientID, StudyDate, StudyDescription, BodyPartExamined) VALUES (?, ?, ?, ?, ?)",
                   (study_id, patient_id, study_date, study_description, body_part_examined))  # Replace "StudyDate" with actual values if needed

    # Insert data into the Series table
    cursor.execute("INSERT OR IGNORE INTO Series (SeriesID, StudyID, SeriesDate, SeriesDescription, Modality) VALUES (?, ?, ?, ?, ?)",
                   (series_id, study_id, series_date, series_description, modality))  # Replace "SeriesDate" with actual values if needed

    # Insert data into the Images table
    cursor.execute("INSERT OR IGNORE INTO Images (ImageID, SeriesID, FilePath, State, InstanceNumber) VALUES (?, ?, ?, ?, ?)", 
                   (image_id, series_id, file_path, state, instance_number))

In [10]:
def apply_rescale(dcm):
    """Apply the rescale slope and intercept to the DICOM image data."""
    rescaled_image = dcm.pixel_array.astype(np.float64) * dcm.RescaleSlope + dcm.RescaleIntercept
    return rescaled_image

In [11]:
def window_image(image, window_center, window_width):
    """Apply the windowing (level and width) to the image data."""
    img_min = window_center - window_width / 2
    img_max = window_center + window_width / 2
    windowed_img = np.clip(image, img_min, img_max)
    return windowed_img

In [12]:
def normalize_image(image):
    """Normalize the image data to 0-255 and convert to uint8."""
    image = image - np.min(image)
    image = image / np.max(image)
    image = (image * 255).astype(np.uint8)
    return image

In [13]:
def get_first_value(dicom_value):
    """Retrieve the first value from DICOM elements that could be multi-valued and convert to float."""
    if isinstance(dicom_value, pydicom.multival.MultiValue):
        return float(dicom_value[0])
    else:
        return float(dicom_value)

In [14]:
def convert_dicom_to_png(dicom_dir, output_dir, state, cursor):
    
    try:
    
        os.makedirs(output_dir, exist_ok=True)

        for entry in os.listdir(dicom_dir):
            logger.iprint ("Processing DICOM file: " + entry)
            dicom_path = os.path.join(dicom_dir, entry)
            if os.path.isfile(dicom_path):  # Check if it's a file
                try:
                    dcm = pydicom.dcmread(dicom_path)

                    # Apply Rescale Slope and Intercept
                    rescaled_image = apply_rescale(dcm)

                    # Initialize normalized_image
                    normalized_image = None

                    # Check for window center and width
                    if hasattr(dcm, 'WindowCenter') and hasattr(dcm, 'WindowWidth'):
                        window_center = get_first_value(dcm.WindowCenter)
                        window_width = get_first_value(dcm.WindowWidth)
                        windowed_image = window_image(rescaled_image, window_center, window_width)
                        normalized_image = normalize_image(windowed_image)

                    # If no windowing is possible, handle the scenario
                    if normalized_image is None:
                        logger.iprint(f"No window center/width or unable to process windowing for file " + entry + ", skipping.")
                        continue  # Skip this file

                    # Generate PNG filename and save the PNG image
                    png_filename = entry + '.png'
                    png_path = os.path.join(output_dir, png_filename)
                    png_image = Image.fromarray(normalized_image)
                    png_image.save(png_path)

                    # Extract image ID and metadata
                    image_id = dcm.SOPInstanceUID
                    rescale_intercept = str(dcm.RescaleIntercept if "RescaleIntercept" in dcm else "N/A")
                    rescale_slope = str(dcm.RescaleSlope if "RescaleSlope" in dcm else "N/A")

                    # Update the database
                    cursor.execute("UPDATE Images SET PngFilePath = ?, WindowCenter = ?, WindowWidth = ?, RescaleIntercept = ?, RescaleSlope = ? WHERE ImageID = ?", 
                                   (png_path, window_center, window_width, rescale_intercept, rescale_slope, image_id))

                except Exception as e:
                    logger.eprint(f"Failed to convert: " + dicom_path + ", Error: {e}")

    except pydicom.errors.InvalidDicomError as e:
            logger.eprint(f"Invalid DICOM file {dicom_path}: {e}")    
    except IOError as e:
            logger.eprint(f"I/O error with file {dicom_path}: {e}")
    except Exception as e:
            logger.eprint(f"Unexpected error: {e}")

In [15]:
# Main code execution
logger = CustomLogger(verbose)

# Check if the database exists and delete it if delete_db is True
if delete_db and os.path.exists(db_path):
    os.remove(db_path)
    logger.iprint("Existing database removed.")

In [16]:
setup_database(db_path)
# In your main function or processing script
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

In [17]:
# Copy the DIDOM files from the image archive and add them to the database
dicom_files = []
for root, dirs, files in os.walk(source_dir):
    for file in files:
        file_path = os.path.join(root, file)
        if is_dicom_file(file_path):
            dicom_files.append(file_path)

random.shuffle(dicom_files)
total_files = len(dicom_files)
training_count = int(total_files * training_ratio)
validation_count = int(total_files * validation_ratio)

training_files = dicom_files[:training_count]
validation_files = dicom_files[training_count:training_count + validation_count]
test_files = dicom_files[training_count + validation_count:]

for file_path in training_files:
    patient_id, study_id, series_id, image_id, modality, body_part_examined, instance_number, study_description, series_description, study_date, series_date = extract_metadata(file_path)
    insert_metadata_into_db(cursor, patient_id, study_id, series_id, image_id, modality, body_part_examined, instance_number, study_description, series_description, study_date, series_date, file_path, 'train')

for file_path in validation_files:
    patient_id, study_id, series_id, image_id, modality, body_part_examined, instance_number, study_description, series_description, study_date, series_date = extract_metadata(file_path)
    insert_metadata_into_db(cursor, patient_id, study_id, series_id, image_id, modality, body_part_examined, instance_number, study_description, series_description, study_date, series_date, file_path, 'validate')

for file_path in test_files:
    patient_id, study_id, series_id, image_id, modality, body_part_examined, instance_number, study_description, series_description, study_date, series_date = extract_metadata(file_path)
    insert_metadata_into_db(cursor, patient_id, study_id, series_id, image_id, modality, body_part_examined, instance_number, study_description, series_description, study_date, series_date, file_path, 'test')

conn.commit()

copy_files(training_files, os.path.join(target_dir, 'train\\valid'))
copy_files(validation_files, os.path.join(target_dir, 'validate\\valid'))
copy_files(test_files, os.path.join(target_dir, 'test\\valid'))

logger.iprint(f"Total DICOM files: " + str(total_files))
logger.iprint(f"Training files: " + str(len(training_files)))
logger.iprint(f"Validation files: " + str(len(validation_files)))
logger.iprint(f"Test files: " + str(len(test_files)))



In [18]:
# After inserting DICOM metadata...
# Create the PNG files for training
convert_dicom_to_png(os.path.join(target_dir, 'train\\valid'), os.path.join(target_dir, '..', 'training_png', 'train', 'valid'), 'train', cursor)
convert_dicom_to_png(os.path.join(target_dir, 'validate\\valid'), os.path.join(target_dir, '..', 'training_png', 'validate', 'valid'), 'validate', cursor)
convert_dicom_to_png(os.path.join(target_dir, 'test\\valid'),os.path.join(target_dir, '..', 'training_png', 'test', 'valid'), 'test', cursor)

conn.commit()

In [19]:
conn.close()

In [20]:
logger.iprint ("Start DB API with python flask_API.py")
logger.iprint ("Run tests with pytest -v test_API.py")