In [None]:
import datajoint as dj
from minio import Minio
from os import path
import json

In [None]:
# load S3 access_key and secret_key
# The file s3-creds.json should contain the following:
# {"access_key": "...", "secret_key": "..."}

with open('s3-creds.json') as f:
    creds = json.load(f)
    
bucket_name = 'mousebraindata-open'

In [None]:
def get_client():
    return Minio(
        's3.amazonaws.com',
        secure=True, **creds)

In [None]:
@schema
class BaseImage(dj.Manual):
    definition = '''
    image_id:          int            # image id
    ---
    image_location:    varchar(1024)  # image location in bucket
    filesize : bigint  # (bytes)
    '''
    
    @classmethod
    def fill(cls):   
        """
        fill the image list from the bucket
        """
        client = get_client()
        objects = client.list_objects(bucket_name, prefix='MD657/')
        cls.insert(
            dict(image_id=i, image_location=obj.object_name, filesize=obj.size) 
            for i, obj in enumerate(objects) if obj.object_name.endswith('.jp2'))

In [None]:
# load directory from the repository
BaseImage.fill()

In [None]:
BaseImage()

In [None]:
@schema
class Contrast(dj.Imported):
    definition = '''
    -> BaseImage
    ---
    contrast: float
    '''
    
    def make(self, key):
        path = (BaseImage & key).fetch1('image_location')
        client = get_client()
        data = client.get_object(bucket_name, path).data
        img = convert_binary_jp2_data_into_image(data)   # This function needs to be defined
        self.insert1(dict(key, contrast=img.std()))

In [None]:
Contrast.populate(reserve_jobs=True, skip_errors=True)   
   # This does the distributed job -- run on many instances

In [None]:
# View job reservations
schema.jobs