## Demo of work with microscopy data with DataJoint
The project is hosted at https://github.com/ActiveBrainAtlas/Orofacial_Collaboration

The project uses cloud store using a relational datababase and S3 for bulk storage.

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
import matplotlib
import numpy as np

import tempfile
import datajoint as dj
from minio import Minio
from os import path
import json
import re

glymur is the Python API for OpenJPEG. The installation instructions are at https://glymur.readthedocs.io/en/latest/detailed_installation.html

It requires installing openjpeg.

If you use brew on MacOS, then the installation can be done as:

$ brew install openjpeg
$ pip install glymur

In [None]:
import glymur

In [None]:
schema = dj.schema('common_distributed')   # please replace it with your database 

In [None]:
def get_s3_client():
    with open('s3-creds.json') as f:
        creds = json.load(f)
    return Minio('s3.amazonaws.com', secure=True, **creds)

In [None]:
@schema
class BaseImage(dj.Manual):
    definition = '''
    image_id:          int            # image id
    ---
    image_location:    varchar(1024)  # image location in bucket
    filesize : bigint  # (bytes)
    '''
    
    bucket_name = 'mousebraindata-open'
    
    @classmethod
    def fill(cls):   
        """
        fill the image list from the bucket
        """
        client = get_s3_client()
        objects = client.list_objects(BaseImage.bucket_name, prefix='MD657/')
        cls.insert(
            dict(image_id=i, image_location=obj.object_name, filesize=obj.size) 
            for i, obj in enumerate(objects) if obj.object_name.endswith('.jp2'))

In [None]:
if not BaseImage(): 
    # load directory from the repository
    BaseImage.fill()

In [None]:
@schema
class BaseProperties(dj.Computed):
    definition = """
    # Image properties inferred from file name
    -> BaseImage
    ---
    prep : varchar(16)
    image_datetime : datetime
    letter_number : char(8)
    major : smallint
    minor : smallint
    lossy : bool
    """
    def make(self, key):
        path = (BaseImage & key).fetch1('image_location')
        match = re.match(
            r'(?P<prep>\w*)/(?P=prep)-(?P<letter_number>\w*)-'
            r'(?P<date>\d{4}\.\d\d\.\d\d)-(?P<time>\d\d\.\d\d\.\d\d)_(?P=prep)_'
            r'(?P<major>\d+)_(?P<minor>\d+)_(?P<lossy>(lossy|lossless))\.jp2', path).groupdict()
        match['image_datetime'] = str.replace(match['date'], '.', '-') + ' ' + str.replace(match['time'], '.', ':')
        match['lossy'] = match['lossy'] == 'lossy' 
        self.insert1(dict(key, **match),  ignore_extra_fields=True)

In [None]:
BaseProperties.populate(display_progress=True)

In [None]:
BaseProperties * BaseImage & {'lossy': True}

In [None]:
@schema
class Thumbnail(dj.Imported):
    definition = """
    # Downsampled images from BaseImage
    -> BaseProperties
    ---
    image_width :  int  # (Voxels) original image width 
    image_height :  int  # (voxels) original image height
    image_depth : int # (voxels) original image depth (e.g. 3 for RGB)
    downsample : smallint  
    thumbnail : longblob
    """
    
    def make(self, key):
        path = (BaseImage & key).fetch1('image_location')
        client = get_s3_client()
        data = client.get_object(BaseImage.bucket_name, path).data
        f = tempfile.NamedTemporaryFile(delete=False)
        f.write(data)
        f.close()
        img = glymur.Jp2k(f.name)
        d = 32
        self.insert1(dict(
            key, 
            **dict(zip(('image_height', 'image_width', 'image_depth'), img.shape)), 
            downsample=d, thumbnail=np.uint8(np.maximum(0, np.minimum(255, img[::d, ::d, :])))))

In [None]:
dj.ERD(schema)

In [None]:
Thumbnail.populate('lossy=1', display_progress=True, reserve_jobs=True, limit=3)

In [None]:
schema.jobs

In [None]:
Thumbnail()

In [None]:
fig, ax = plt.subplots(4, 3, figsize=(12,16))
for a, thumb in zip(ax.flatten(), Thumbnail()):
    a.imshow(np.maximum(0, np.minimum(255, thumb['thumbnail'])))