## Connect to DataJoint server
#### Downloading data from S3 will require your own AWS credentials

In [1]:
import datajoint as dj
import numpy as np
import json
from utilities import *
# Connect to datajoint server
dj.conn()

Connecting alex@ucsd-demo-db.datajoint.io:3306


DataJoint connection (connected) alex@ucsd-demo-db.datajoint.io:3306

In [2]:
# Define which schema you're using
schema = dj.schema('common_atlas')
schema.spawn_missing_classes()

## Load dictionary of brain names from utilities.py

In [3]:
# From utilities.py
all_stacks = list( brain_names_dic.keys() )
print( all_stacks )

['MD585', 'MD589', 'MD590', 'MD591', 'MD592', 'MD593', 'MD594', 'MD595', 'MD598', 'MD599', 'MD602', 'MD603', 'CHATM2', 'CHATM3', 'CSHL2', 'MD658', 'MD661', 'MD662', 'MD635', 'MD636', 'MD639', 'MD642', 'MD652', 'MD653', 'MD657', 'MD175', 'UCSD001']


## Access Raw Data
#### - Example accessing MD585 data

In [13]:
# Going through each brain, prints out brains that do NOT have raw data
for stack in all_stacks:
    # rawstack_info keys: ['mouse', 'aws_bucket', 'processed_stack']
    raw_stack_info = (RawStack()&dict(mouse=stack)).fetch( as_dict=True )[0]
    if raw_stack_info['raw_stack']=='':
        print(raw_stack_info['mouse'])
        print( '  - Has no raw data on S3' )

CHATM2
  - Has no raw data on S3
CHATM3
  - Has no raw data on S3
CSHL2
  - Has no raw data on S3
MD639
  - Has no raw data on S3
MD175
  - Has no raw data on S3


In [26]:
stack = 'MD585'
raw_stack_info = (RawStack()&dict(mouse=stack)).fetch( as_dict=True )[0]

bucket =  raw_stack_info['aws_bucket']
filename_list = raw_stack_info['raw_stack'].split('|')

print( 'The 100th slice of '+stack+' with the bucket appended to the front:\n' )
fp = bucket + '://' + filename_list[100]
print( fp )

The 100th slice of MD585 with the bucket appended to the front:

mousebrainatlas-rawdata://CSHL_data/MD585/MD585-IHC42-2015.08.19-14.26.30_MD585_1_0124_lossless.jp2


## Access Processed Data
#### - Example accessing MD585 data

In [14]:
# Going through each brain, prints out brains that do NOT have processed data
for stack in all_stacks:
    # rawstack_info keys: ['mouse', 'aws_bucket', 'processed_stack']
    processed_stack_info = (ProcessedStack()&dict(mouse=stack)).fetch( as_dict=True )[0]
    if processed_stack_info['processed_stack']=='':
        print(processed_stack_info['mouse'])
        print( '  - Has no processed data on S3' )

CHATM2
  - Has no processed data on S3
CHATM3
  - Has no processed data on S3
CSHL2
  - Has no processed data on S3
MD636
  - Has no processed data on S3
MD639
  - Has no processed data on S3
MD175
  - Has no processed data on S3
UCSD001
  - Has no processed data on S3


In [25]:
stack = 'MD585'
processed_stack_info = (ProcessedStack()&dict(mouse=stack)).fetch( as_dict=True )[0]

bucket =  processed_stack_info['aws_bucket']
filename_list = processed_stack_info['processed_stack'].split('|')

print( 'The 100th slice of '+stack+' with the bucket appended to the front:\n' )
fp = bucket + '://' + filename_list[100]
print( fp )

The 100th slice of MD585 with the bucket appended to the front:

mousebrainatlas-data://CSHL_data_processed/MD585/MD585_prep2_lossless/MD585-IHC50-2015.07.16-18.02.54_MD585_2_0149_prep2_lossless.tif


# Retrieving and viewing image from S3
(Relies on `bucket` and `filename_list` from previous cell)

In [None]:
from minio import Minio

# CHANGE THIS LINE TO ACCESS S3
with open('/path_to_aws_credentials/aws_credentials.json') as f:
        creds = json.load(f)

def get_client():
    return Minio( 's3.amazonaws.com', secure=True, **creds)

client = get_client()

In [32]:
# Load the lossless, processed image from S3
lossless_processed_image = client.get_object(bucket, filename_list[100])
thumbnail_processed_image = lossless_processed_image[::32,::32]
del lossless_processed_image

TypeError: 'HTTPResponse' object is not subscriptable

In [38]:
lossless_processed_image.__dict__

{'headers': HTTPHeaderDict({'x-amz-id-2': 'x2HU370axOwBMsyiEbVVhFBO8a+dRZpdBpjbnaPfWYcLEuIO37u3R275p6b80N2hJ+G1W7OFtnU=', 'x-amz-request-id': 'E28B3A93CF0C8AD7', 'Date': 'Thu, 03 Jan 2019 03:56:39 GMT', 'Last-Modified': 'Sat, 18 Mar 2017 20:15:23 GMT', 'x-amz-restore': 'ongoing-request="false", expiry-date="Sat, 05 Jan 2019 00:00:00 GMT"', 'ETag': '"dd61808df3f6dbe1b10a8444f6dd675a-147"', 'x-amz-storage-class': 'GLACIER', 'Accept-Ranges': 'bytes', 'Content-Type': 'image/jp2', 'Content-Length': '1232394417', 'Server': 'AmazonS3'}),
 'status': 200,
 'version': 11,
 'reason': 'OK',
 'strict': 0,
 'decode_content': True,
 'retries': Retry(total=5, connect=None, read=None, redirect=None, status=None),
 'enforce_content_length': False,
 '_decoder': None,
 '_body': None,
 '_fp': <http.client.HTTPResponse at 0x7f14de29cf28>,
 '_original_response': <http.client.HTTPResponse at 0x7f14de29cf28>,
 '_fp_bytes_read': 0,
 'msg': None,
 '_request_url': None,
 '_pool': <urllib3.connectionpool.HTTPSConn

In [None]:
client = get_s3_client()
objects = client.list_objects(BaseImage.bucket_name, prefix='MD657/')
cls.insert(
        dict(image_id=i, image_location=obj.object_name, filesize=obj.size) 
        for i, obj in enumerate(objects) if obj.object_name.endswith('.jp2'))

In [71]:
objects = client.list_objects('mousebraindata-open', prefix='MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.png')
for i, obj in enumerate(objects):
    print(i)
    print(obj.size/1000000)

0
0.900864


In [5]:
img = client.get_object('mousebraindata-open', 'MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.png')

In [6]:
img.__dict__

{'headers': HTTPHeaderDict({'x-amz-id-2': 'xtSLHGcqk+7tWiqwiMGNJ62CvES/GNXb2jZqeTBhjz//Ga+r4Oaf8P7XlvuT4Q2JAzx6/otjsPI=', 'x-amz-request-id': 'DA4506291EAA3552', 'Date': 'Thu, 03 Jan 2019 21:04:02 GMT', 'Last-Modified': 'Wed, 29 Aug 2018 04:16:33 GMT', 'ETag': '"2ea51d17c3b6ad95209ec65aa59325cc"', 'Accept-Ranges': 'bytes', 'Content-Type': 'image/png', 'Content-Length': '900864', 'Server': 'AmazonS3'}),
 'status': 200,
 'version': 11,
 'reason': 'OK',
 'strict': 0,
 'decode_content': True,
 'retries': Retry(total=5, connect=None, read=None, redirect=None, status=None),
 'enforce_content_length': False,
 '_decoder': None,
 '_body': None,
 '_fp': <http.client.HTTPResponse at 0x7f6840fe35c0>,
 '_original_response': <http.client.HTTPResponse at 0x7f6840fe35c0>,
 '_fp_bytes_read': 0,
 'msg': None,
 '_request_url': None,
 '_pool': <urllib3.connectionpool.HTTPSConnectionPool at 0x7f67f650fb70>,
 '_connection': <urllib3.connection.VerifiedHTTPSConnection at 0x7f67f650f358>,
 'chunked': False,
 

In [7]:
img_data = img.data

In [8]:
img.chunked

False

In [40]:
#img_data # 738 x 664

In [11]:
dt = np.dtype(int)
dt = dt.newbyteorder('>')
data = np.frombuffer(img_data, dtype=dt)

In [12]:
np.shape(data)

(112608,)

In [13]:
data

array([-8552249625308161526,          57064047698,        3169685865112,
       ...,  6140657129700226223,  7635279566734360576,
        5279712195050102914])

In [92]:
s3_http_prefix = 'https://s3-us-west-1.amazonaws.com/mousebraindata-open/'
url = s3_http_prefix + 'MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001_lossy.jp2'

! wget -N -P /'./' $url

--2019-01-02 21:05:59--  https://s3-us-west-1.amazonaws.com/mousebraindata-open/MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001_lossy.jp2
Resolving s3-us-west-1.amazonaws.com (s3-us-west-1.amazonaws.com)... 52.219.20.17
Connecting to s3-us-west-1.amazonaws.com (s3-us-west-1.amazonaws.com)|52.219.20.17|:443... connected.
HTTP request sent, awaiting response... 403 Forbidden
2019-01-02 21:05:59 ERROR 403: Forbidden.



In [38]:
from PIL import Image
img = client.get_object('mousebraindata-open', 'MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.png')
img_data = img.data
img_test = Image.frombytes('L', (738,664), img_data, 'raw')
#img_test

In [79]:
from PIL import Image
img = client.get_object('mousebraindata-open', 'MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.tif')
img_data = img.data
img_test = Image.frombytes('L', (738,664), img_data)
img_test

data = np.asarray( img_test, dtype="int32" )

In [1]:
# Normalize data
data = data

NameError: name 'data' is not defined

In [81]:
maxN = max(data[0])
for i in range(np.shape(data)[0]):
    if max(data[i])>maxN:
        maxN = max(data[i])

In [86]:
fh = open("TEST.png", "wb")
fh.write(img_data.decode('base64'))
fh.close()

LookupError: 'base64' is not a text encoding; use codecs.decode() to handle arbitrary codecs

In [45]:
import subprocess


def download_to_demo(fp):
    fn = 'MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.tif'
    
    bucket = 'mousebraindata-open'
    s3_http_prefix = 'https://s3-us-west-1.amazonaws.com/'+bucket+'/'
    
    url = s3_http_prefix + fn
    local_fp = fp + fn
    
#     subprocess.call(['wget','-N','-P',local_fp,url])
    print('wget','-N','-P',local_fp,url)
    ! wget -N -P \
    /mnt/c/Users/Alex/Documents/MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.tif \
    https://s3-us-west-1.amazonaws.com/mousebraindata-open/MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.tif
    #execute_command('wget -N -P \"%s\" \"%s\"' % (local_fp, url))
    return local_fp

In [46]:
download_to_demo('/mnt/c/Users/Alex/Documents/')

wget -N -P /mnt/c/Users/Alex/Documents/MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.tif https://s3-us-west-1.amazonaws.com/mousebraindata-open/MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.tif
--2019-01-06 18:42:45--  https://s3-us-west-1.amazonaws.com/mousebraindata-open/MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.tif
Resolving s3-us-west-1.amazonaws.com (s3-us-west-1.amazonaws.com)... 52.219.20.1
Connecting to s3-us-west-1.amazonaws.com (s3-us-west-1.amazonaws.com)|52.219.20.1|:443... connected.
HTTP request sent, awaiting response... 403 Forbidden
2019-01-06 18:42:45 ERROR 403: Forbidden.



'/mnt/c/Users/Alex/Documents/MD657/MD657-F1-2017.02.17-17.39.26_MD657_1_0001.tif'

In [13]:
from subprocess import call
call(["ls"])

0

In [24]:
! ls -A

 .ipynb_checkpoints	       config.ipynb		    db_manager.ipynb
'Accessing Atlas Data.ipynb'   creator_atlas.ipynb	    dj_local_conf.json
 TEST.png		       creator_atlas_neu.ipynb	    tests.ipynb
 __pycache__		       data_retriever_atlas.ipynb   utilities.py


In [16]:
import subprocess
subprocess.call(['ls'])

0

In [49]:
subprocess.check_output(['aws','s3'])

CalledProcessError: Command '['aws', 's3']' returned non-zero exit status 255.

In [51]:
! aws s3


usage: aws [options] <command> <subcommand> [parameters]
aws: error: too few arguments
