# Redis + DICOM Demo
This notebook reads a sample DICOM data set, stores the meta data in JSON and stores each file of the data set as a binary Redis string.

## Install Necessary Python Modules

In [None]:
!pip install redis pydicom

## Start Redis Enterpise
This starts a 3-node Redis Enterprise cluster and builds 1 sharded (2 shards) database with Search and JSON enabled.

In [None]:
!./start.sh

## Import modules, Establish Redis connectivity

In [None]:
import redis
from redis.commands.search.field import TagField, TextField
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
from redis.commands.search.aggregation import AggregateRequest, Desc
from redis.commands.search.query import Query
from redis.commands.search import reducers
import pydicom
import os
from time import perf_counter
import json
import re
import shutil

CHUNK_SIZE = 5 * 1024 #5 kilobytes
OUTPUT_DIR = './dicom_out'

client = redis.Redis(
    host='localhost',
    port=12000,
    username='default',
    password='redis'
)
client.flushdb() 
try:
    shutil.rmtree(OUTPUT_DIR)
except:
    pass

## Create Redis Index

In [None]:
idx_def = IndexDefinition(index_type=IndexType.JSON, prefix=['file:'])
schema = [
    TextField('$.protocolName', as_name='protocolName'),
    TagField('$.patientSex', as_name='patientSex'),
    TagField('$.studyDate', as_name='studyDate'),
    TextField('$.manufacturer', as_name='manufacturer')
]
client.ft('dicom_idx').create_index(schema, definition=idx_def)

## Load DICOM Files
Write the DICOM file set to Redis as JSON objects.  Each object contains meta data about the file and array of Redis String keys.  Each Redis String holds a 5 KB chunk from the original DICOM file.

In [None]:
def load_chunks(key, file, chunk_size):
    i = 0
    chunk_keys = []
    with open(file, 'rb') as infile:
        while chunk := infile.read(chunk_size):
            chunk_key = f'chunk:{key}:{i}'
            client.set(chunk_key, chunk)
            chunk_keys.append(chunk_key)
            i += 1
    return chunk_keys

count = 0
pydicom.config.settings.reading_validation_mode = pydicom.config.RAISE
for file in pydicom.data.get_testdata_files():
    try:
        ds = pydicom.dcmread(file)
        key = f'file:{os.path.basename(file)}'
        image_name = os.path.basename(file)
        protocol_name = re.sub(r'\s+', ' ', ds.ProtocolName)
        patient_sex = ds.PatientSex
        study_date = ds.StudyDate
        manufacturer = ds.Manufacturer.upper()
        chunk_keys = load_chunks(key, file, CHUNK_SIZE)

        client.json().set(key, '$', {
            'imageName': image_name,
            'protocolName': protocol_name,  
            'patientSex': patient_sex,
            'studyDate': study_date,
            'manufacturer': manufacturer,
            'chunks': chunk_keys 
        })
        count += 1
    except:
        pass
print(f'Files loaded: {count}')

# Restore DICOM Files
Iterate across the JSON objects and restore the files from their byte chunks.

In [None]:
def get_bytes(chunks):
    chunk_bytes = bytearray()
    for chunk in chunks:
        chunk_bytes.extend(client.get(chunk))
    return chunk_bytes

os.mkdir(OUTPUT_DIR)
count = 0
for key in client.scan_iter(match='file:*', count=10):
    file = str(key, encoding='utf-8').split(':')[-1]
    chunks = client.json().get(key, '$.chunks')
    chunk_bytes = get_bytes(chunks[0])
    with open(os.path.join(OUTPUT_DIR, file), 'wb') as outfile:
        outfile.write(chunk_bytes)
    count += 1
print(f'Files written: {count}')

## Validate Restored DICOM File
Show file integrity is maintained after the write/read of DICOM bytes into Redis.

In [None]:
print(pydicom.misc.is_dicom(f'{OUTPUT_DIR}/J2K_pixelrep_mismatch.dcm'))

## Scenario 1
Hypothetical Business Problem: Retrieve all the bytes of an image given a known file name

In [None]:
file_name = 'JPGExtended.dcm'
t1 = perf_counter()
results = client.json().get(f'file:{file_name}', '$.chunks')
total_bytes = get_bytes(results[0])
t2 = perf_counter()
print(f'Exec time: {round((t2-t1)*1000,2)} ms')
print(f'Bytes Retrieved: {len(total_bytes)}')

## Scenario 2
Hypothetical Business Problem: Find a DICOM image with the 'protocolName' of '194' and 'studyDate' in 2019.  Retrieve all file bytes.

In [None]:

query = Query('@protocolName:194 @studyDate:{2019*}')\
    .return_field('$.chunks', as_field='chunks')\
    .return_field('$.imageName', as_field='imageName')
t1 = perf_counter()
result = client.ft('dicom_idx').search(query)
total_bytes = bytearray()
if len(result.docs) > 0:
    total_bytes = get_bytes(json.loads(result.docs[0].chunks))
t2 = perf_counter()

print(f'Exec time: {round((t2-t1)*1000,2)} ms')
print(f'Image name: {result.docs[0].imageName}')
print(f'Bytes Retrieved: {len(total_bytes)}')

## Scenario 3
Hypothetical Business Problem: Find the count of DICOM images by protocol

In [None]:
request = AggregateRequest('*')\
    .group_by('@protocolName', reducers.count().alias('count'))\
    .sort_by(Desc('@count'))
    
t1 = perf_counter()
results = client.ft('dicom_idx').aggregate(request)
t2 = perf_counter()
print(f'Exec time: {round((t2-t1)*1000,2)} ms')
counts = []
for row in results.rows:
    print(f'{str(row[1], "utf-8")}: {str(row[3], "utf-8")}')

## Shut Down
Shut down the Redis Enterprise Cluster

In [None]:
!./stop.sh