In [16]:
import datajoint as dj
from datajoint import schema
from minio import Minio
from os import path
import json

In [7]:
# load S3 access_key and secret_key
# The file s3-creds.json should contain the following:
# {"access_key": "...", "secret_key": "..."}

with open('s3-creds.json') as f:
    creds = json.load(f)
    
bucket_name = 'mousebraindata-open'

In [8]:
def get_client():
    return Minio(
        's3.amazonaws.com',
        secure=True, **creds)

In [10]:
client = get_client()
print('got client',client)

got client <minio.api.Minio object at 0x1214aa518>


In [14]:
objects = client.list_objects(bucket_name, prefix='MD657/')
for object in objects:
    print(object.object_name)

MD657/
MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.png
MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.tif
MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001_lossless.jp2
MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001_lossless.lock-ip-172-30-1-84
MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001_lossless_patches.tgz
MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001_lossy.jp2
MD657/MD657-F1-2017.02.17-17.39.26_MD657_2_0002.png
MD657/MD657-F1-2017.02.17-17.39.26_MD657_2_0002.tif
MD657/MD657-F1-2017.02.17-17.39.26_MD657_2_0002_lossless.jp2
MD657/MD657-F1-2017.02.17-17.39.26_MD657_2_0002_lossless.lock-ip-172-30-1-84
MD657/MD657-F1-2017.02.17-17.39.26_MD657_2_0002_lossless_patches.tgz
MD657/MD657-F1-2017.02.17-17.39.26_MD657_2_0002_lossy.jp2
MD657/MD657-F1-2017.02.17-17.39.26_MD657_3_0003.png
MD657/MD657-F1-2017.02.17-17.39.26_MD657_3_0003.tif
MD657/MD657-F1-2017.02.17-17.39.26_MD657_3_0003_lossless.jp2
MD657/MD657-F1-2017.02.17-17.39.26_MD657_3_0003_lossless.lock-ip-172-30-1-39
MD657/MD657-F

MD657/MD657-F75-2017.02.18-16.21.28_MD657_2_0224_lossless.jp2
MD657/MD657-F75-2017.02.18-16.21.28_MD657_2_0224_lossless.lock-ip-172-30-1-211
MD657/MD657-F75-2017.02.18-16.21.28_MD657_2_0224_lossless_patches.tgz
MD657/MD657-F75-2017.02.18-16.21.28_MD657_2_0224_lossy.jp2
MD657/MD657-F76-2017.02.18-16.37.33_MD657_1_0226.png
MD657/MD657-F76-2017.02.18-16.37.33_MD657_1_0226.tif
MD657/MD657-F76-2017.02.18-16.37.33_MD657_1_0226_lossless.jp2
MD657/MD657-F76-2017.02.18-16.37.33_MD657_1_0226_lossless.lock-ip-172-30-1-66
MD657/MD657-F76-2017.02.18-16.37.33_MD657_1_0226_lossless_patches.tgz
MD657/MD657-F76-2017.02.18-16.37.33_MD657_1_0226_lossy.jp2
MD657/MD657-F76-2017.02.18-16.37.33_MD657_2_0227.png
MD657/MD657-F76-2017.02.18-16.37.33_MD657_2_0227.tif
MD657/MD657-F76-2017.02.18-16.37.33_MD657_2_0227_lossless.jp2
MD657/MD657-F76-2017.02.18-16.37.33_MD657_2_0227_lossless.lock-ip-172-30-1-44
MD657/MD657-F76-2017.02.18-16.37.33_MD657_2_0227_lossless_patches.tgz
MD657/MD657-F76-2017.02.18-16.37.33_MD6

In [17]:
dj.Manual

datajoint.user_tables.Manual

In [18]:
@schema
class BaseImage(dj.Manual):
    definition = '''
    image_id:          int            # image id
    ---
    image_location:    varchar(1024)  # image location in bucket
    filesize : bigint  # (bytes)
    '''
    
    @classmethod
    def fill(cls):   
        """
        fill the image list from the bucket
        """
        client = get_client()
        print('got client',client)
        objects = client.list_objects(bucket_name, prefix='MD657/')
        cls.insert(
            dict(image_id=i, image_location=obj.object_name, filesize=obj.size) 
            for i, obj in enumerate(objects) if obj.object_name.endswith('.jp2'))

DataJointError: Error in query:
SHOW DATABASES LIKE '<class '__main__.BaseImage'>'
Please check spelling, syntax, and existence of tables and attributes.
When restricting a relation by a condition in a string, enclose attributes in backquotes.

In [None]:
# load directory from the repository
BaseImage.fill()

In [None]:
BaseImage()

In [None]:
@schema
class Contrast(dj.Imported):
    definition = '''
    -> BaseImage
    ---
    contrast: float
    '''
    
    def make(self, key):
        path = (BaseImage & key).fetch1('image_location')
        client = get_client()
        data = client.get_object(bucket_name, path).data
        img = convert_binary_jp2_data_into_image(data)   # This function needs to be defined
        self.insert1(dict(key, contrast=img.std()))

In [None]:
Contrast.populate(reserve_jobs=True, skip_errors=True)   
   # This does the distributed job -- run on many instances

In [None]:
# View job reservations
schema.jobs