## Connect to DataJoint server
#### Downloading data from S3 will require your own AWS credentials

In [94]:
import datajoint as dj
import numpy as np
import json
from utilities import *
# Connect to datajoint server
dj.conn()

DataJoint connection (connected) alex@ucsd-demo-db.datajoint.io:3306

In [95]:
# Define which schema you're using
schema = dj.schema('common_atlas')
schema.spawn_missing_classes()

## Load dictionary of brain names from utilities.py

In [96]:
# From utilities.py
all_stacks = list( brain_names_dic.keys() )
print( all_stacks )

['MD585', 'MD589', 'MD590', 'MD591', 'MD592', 'MD593', 'MD594', 'MD595', 'MD598', 'MD599', 'MD602', 'MD603', 'CHATM2', 'CHATM3', 'CSHL2', 'MD658', 'MD661', 'MD662', 'MD635', 'MD636', 'MD639', 'MD642', 'MD652', 'MD653', 'MD657', 'MD175', 'UCSD001']


## Access Raw Data
#### - Example accessing MD585 data

In [13]:
# Going through each brain, prints out brains that do NOT have raw data
for stack in all_stacks:
    # rawstack_info keys: ['mouse', 'aws_bucket', 'processed_stack']
    raw_stack_info = (RawStack()&dict(mouse=stack)).fetch( as_dict=True )[0]
    if raw_stack_info['raw_stack']=='':
        print(raw_stack_info['mouse'])
        print( '  - Has no raw data on S3' )

CHATM2
  - Has no raw data on S3
CHATM3
  - Has no raw data on S3
CSHL2
  - Has no raw data on S3
MD639
  - Has no raw data on S3
MD175
  - Has no raw data on S3


In [26]:
stack = 'MD585'
raw_stack_info = (RawStack()&dict(mouse=stack)).fetch( as_dict=True )[0]

bucket =  raw_stack_info['aws_bucket']
filename_list = raw_stack_info['raw_stack'].split('|')

print( 'The 100th slice of '+stack+' with the bucket appended to the front:\n' )
fp = bucket + '://' + filename_list[100]
print( fp )

The 100th slice of MD585 with the bucket appended to the front:

mousebrainatlas-rawdata://CSHL_data/MD585/MD585-IHC42-2015.08.19-14.26.30_MD585_1_0124_lossless.jp2


## Access Processed Data
#### - Example accessing MD585 data

In [14]:
# Going through each brain, prints out brains that do NOT have processed data
for stack in all_stacks:
    # rawstack_info keys: ['mouse', 'aws_bucket', 'processed_stack']
    processed_stack_info = (ProcessedStack()&dict(mouse=stack)).fetch( as_dict=True )[0]
    if processed_stack_info['processed_stack']=='':
        print(processed_stack_info['mouse'])
        print( '  - Has no processed data on S3' )

CHATM2
  - Has no processed data on S3
CHATM3
  - Has no processed data on S3
CSHL2
  - Has no processed data on S3
MD636
  - Has no processed data on S3
MD639
  - Has no processed data on S3
MD175
  - Has no processed data on S3
UCSD001
  - Has no processed data on S3


In [25]:
stack = 'MD585'
processed_stack_info = (ProcessedStack()&dict(mouse=stack)).fetch( as_dict=True )[0]

bucket =  processed_stack_info['aws_bucket']
filename_list = processed_stack_info['processed_stack'].split('|')

print( 'The 100th slice of '+stack+' with the bucket appended to the front:\n' )
fp = bucket + '://' + filename_list[100]
print( fp )

The 100th slice of MD585 with the bucket appended to the front:

mousebrainatlas-data://CSHL_data_processed/MD585/MD585_prep2_lossless/MD585-IHC50-2015.07.16-18.02.54_MD585_2_0149_prep2_lossless.tif


# Retrieving and viewing image from S3
(Relies on `bucket` and `filename_list` from previous cell)

In [None]:
from minio import Minio

# CHANGE THIS LINE TO ACCESS S3
with open('/path_to_aws_credentials/aws_credentials.json') as f:
        creds = json.load(f)

def get_client():
    return Minio( 's3.amazonaws.com', secure=True, **creds)

client = get_client()

In [32]:
# Load the lossless, processed image from S3
lossless_processed_image = client.get_object(bucket, filename_list[100])
thumbnail_processed_image = lossless_processed_image[::32,::32]
del lossless_processed_image

TypeError: 'HTTPResponse' object is not subscriptable

In [38]:
lossless_processed_image.__dict__

{'headers': HTTPHeaderDict({'x-amz-id-2': 'x2HU370axOwBMsyiEbVVhFBO8a+dRZpdBpjbnaPfWYcLEuIO37u3R275p6b80N2hJ+G1W7OFtnU=', 'x-amz-request-id': 'E28B3A93CF0C8AD7', 'Date': 'Thu, 03 Jan 2019 03:56:39 GMT', 'Last-Modified': 'Sat, 18 Mar 2017 20:15:23 GMT', 'x-amz-restore': 'ongoing-request="false", expiry-date="Sat, 05 Jan 2019 00:00:00 GMT"', 'ETag': '"dd61808df3f6dbe1b10a8444f6dd675a-147"', 'x-amz-storage-class': 'GLACIER', 'Accept-Ranges': 'bytes', 'Content-Type': 'image/jp2', 'Content-Length': '1232394417', 'Server': 'AmazonS3'}),
 'status': 200,
 'version': 11,
 'reason': 'OK',
 'strict': 0,
 'decode_content': True,
 'retries': Retry(total=5, connect=None, read=None, redirect=None, status=None),
 'enforce_content_length': False,
 '_decoder': None,
 '_body': None,
 '_fp': <http.client.HTTPResponse at 0x7f14de29cf28>,
 '_original_response': <http.client.HTTPResponse at 0x7f14de29cf28>,
 '_fp_bytes_read': 0,
 'msg': None,
 '_request_url': None,
 '_pool': <urllib3.connectionpool.HTTPSConn

In [None]:
client = get_s3_client()
objects = client.list_objects(BaseImage.bucket_name, prefix='MD657/')
cls.insert(
        dict(image_id=i, image_location=obj.object_name, filesize=obj.size) 
        for i, obj in enumerate(objects) if obj.object_name.endswith('.jp2'))

In [71]:
objects = client.list_objects('mousebraindata-open', prefix='MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.png')
for i, obj in enumerate(objects):
    print(i)
    print(obj.size/1000000)

0
0.900864


In [97]:
img = client.get_object('mousebraindata-open', 'MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.png')

In [98]:
img.__dict__

{'headers': HTTPHeaderDict({'x-amz-id-2': 'G9TqRJsN1bZgK+rv9skN2CTe4Q5JTXmRrgGg8zCHQygcWVMdU2lNxeLNbkX6LkkE+k/FZkxiR3U=', 'x-amz-request-id': 'AE6D54A1DE8C2069', 'Date': 'Thu, 03 Jan 2019 20:19:25 GMT', 'Last-Modified': 'Wed, 29 Aug 2018 04:16:33 GMT', 'ETag': '"2ea51d17c3b6ad95209ec65aa59325cc"', 'Accept-Ranges': 'bytes', 'Content-Type': 'image/png', 'Content-Length': '900864', 'Server': 'AmazonS3'}),
 'status': 200,
 'version': 11,
 'reason': 'OK',
 'strict': 0,
 'decode_content': True,
 'retries': Retry(total=5, connect=None, read=None, redirect=None, status=None),
 'enforce_content_length': False,
 '_decoder': None,
 '_body': None,
 '_fp': <http.client.HTTPResponse at 0x7f14de201e10>,
 '_original_response': <http.client.HTTPResponse at 0x7f14de201e10>,
 '_fp_bytes_read': 0,
 'msg': None,
 '_request_url': None,
 '_pool': <urllib3.connectionpool.HTTPSConnectionPool at 0x7f14de1fa0b8>,
 '_connection': <urllib3.connection.VerifiedHTTPSConnection at 0x7f14de201630>,
 'chunked': False,
 

In [99]:
img_data = img.data

In [100]:
img.chunked

False

In [102]:
img_data # 738 x 664

b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02\xe2\x00\x00\x02\x98\x08\x02\x00\x00\x00\xb98\n\xcf\x00\x00\x00&sCAL\x001.000000000000e+00\x001.000000000000e+00\xecx\xd7\x04\x00\x00 \x00IDATx\x9cD\xbbY\xac%Yv\x9e\xf7\xaf\x7f\xef\x883\xdc9of\xde\xca\xcc\xca\xac\xb9*\xabzd\xf5\xc8\xa6H\xaa\x9bt\xb7\x08\x8a\xa6$\x1a\xb6%\x086@\x0f\x80 \xeb\xc1\x06\x04\xc30\x0c\x0b\xf0\xb3m\xf8\xcd0`\xfb\xc9\xd6\x835\xc2\xa6\xe46\xd5\x14\xc9f7\xc9n\x0e\xc5\xee\x9a\xba+\xabk\xc8\xcc\xbay\xeb\xe6\xcd;\x9e!"\xd6\xfa\x97\x1fN\xb6\xfd\x128/\x11g\xc7\x8a\xb5\xd7\xf0\xafo\xdbKW\xbf\xd2\xbaD\x07\x16\x02/\xb0\xe5\xcd\xde\x0c\xfd\xf3\xd3/\x9f.\xff\x87\xf1\xf0\xe4\xa3\'\xff\xc3z\xfeOn\\\xfb\x95\xb7\x0f\xfeq\xbd\xf6\xf7\xaf]\xfc\x93\x93\xb7\xff!\xb5\xe9xT\xc7W\xf1\xfc\xdf\xc5\x9d\x7f4~\xf5\x97\x0e\xde\xfb\xa3v\xeb\xb9\xc9\xc1\x07>{\x13\xacX\x9eh\xbc\xcd\xba\xa6W\xfe\xcb\xa9\x1f\xcd_\xfb\xcfY/\t\x15\x00%P\x0bG%\n\x11\x02@\x10p\x95Z\x079|\x8e\xad\xcf7\xb3\xd7\x07\xb2Q\r\n\x92@\xc2E6\xed\xeepq\x88J\x90X.\xd9\xb6\xbd\xb4

In [104]:
dt = np.dtype(int)
dt = dt.newbyteorder('>')
data = np.frombuffer(img_data, dtype=dt)

In [106]:
np.shape(data)

(112608,)

In [108]:
data

array([-8552249625308161526,          57064047698,        3169685865112,
       ...,  6140657129700226223,  7635279566734360576,
        5279712195050102914])

In [92]:
s3_http_prefix = 'https://s3-us-west-1.amazonaws.com/mousebraindata-open/'
url = s3_http_prefix + 'MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001_lossy.jp2'

! wget -N -P /'./' $url

--2019-01-02 21:05:59--  https://s3-us-west-1.amazonaws.com/mousebraindata-open/MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001_lossy.jp2
Resolving s3-us-west-1.amazonaws.com (s3-us-west-1.amazonaws.com)... 52.219.20.17
Connecting to s3-us-west-1.amazonaws.com (s3-us-west-1.amazonaws.com)|52.219.20.17|:443... connected.
HTTP request sent, awaiting response... 403 Forbidden
2019-01-02 21:05:59 ERROR 403: Forbidden.



In [None]:
img_test = Image.fromstring('L', (738,664), rawData, 'raw', 'F;16')


In [112]:
112608/4

28152.0