## Imports
utilities.py contains a number of useful functions including loading credentials from a json file, retrieving files from S3, and defining thecurrent list of brains that are associated with the Active Atlas.

In [1]:
import datajoint as dj
import numpy as np
import json
from utilities import *
from subprocess import call

## Load Credentials, Connect to Database
User must specify their own filepath to their aws S3 credentials and Datajoint credentials in json format.

In [20]:
%%writefile credFiles.yaml
aws_fp: /Users/yoavfreund/VaultBrain/s3-creds.json
dj_fp: /Users/yoavfreund/VaultBrain/dj_local_conf.json

Writing credFiles.yaml


In [22]:
import yaml
credFiles=yaml.load(open('credFiles.yaml','r'))
credFiles

{'aws_fp': '/Users/yoavfreund/VaultBrain/s3-creds.json',
 'dj_fp': '/Users/yoavfreund/VaultBrain/dj_local_conf.json'}

In [23]:
# Load AWS Credentials
# `creds` needs the following fields: 'access_key', 'secret_access_key'
creds = load_aws_creds( fp=credFiles['aws_fp'] )

In [24]:
# Load Datajoint Credentials
# `dj_creds` needs the following fields: 'user', 'passwd'
dj_creds = load_dj_creds(fp=credFiles['dj_fp'])

In [26]:
dj.config['database.user'] = dj_creds['database.user']
dj.config['database.password'] = dj_creds['database.password']
dj.config['database.host'] =  dj_creds['database.host']
dj.config['database.port'] = dj_creds['database.port']

# Connect to datajoint server
dj.conn()

DataJoint connection (connected) yoav@ucsd-demo-db.datajoint.io:3306

In [5]:
# Define which schema you're using
schema = dj.schema('common_atlas_v2')
schema.spawn_missing_classes()

In [27]:
#dj.ERD(schema)

['dot', '-Tsvg', '/var/folders/80/c2kfvdvx5cx570r4vlzqgb840000gq/T/tmpbr4all9h'] return code: -11

stdout, stderr:
 b''
b''



AssertionError: -11

<datajoint.erd.ERD at 0x11f88bb70>

## Printing all table headers (every stack in the db)

In [29]:
brainStackTables = (BrainStack()).fetch( as_dict=False )
brainStackTables.describe()

AttributeError: 'numpy.ndarray' object has no attribute 'describe'

In [7]:
brainStackTables = (BrainStack()).fetch( as_dict=True )
for brainStackTable in brainStackTables:
    print( brainStackTable['mouse'] )

CHATM2
CHATM3
CSHL2
MD175
MD585
MD589
MD590
MD591
MD592
MD593
MD594
MD595
MD598
MD599
MD602
MD603
MD635
MD636
MD639
MD642
MD652
MD653
MD657
MD658
MD661
MD662
UCSD001


In [18]:
brainStackTables[0].describe

AttributeError: 'collections.OrderedDict' object has no attribute 'describe'

In [8]:
# From utilities.py
all_stacks = list( brain_names_dic.keys() )
print( all_stacks )

['MD585', 'MD589', 'MD590', 'MD591', 'MD592', 'MD593', 'MD594', 'MD595', 'MD598', 'MD599', 'MD602', 'MD603', 'CHATM2', 'CHATM3', 'CSHL2', 'MD658', 'MD661', 'MD662', 'MD635', 'MD636', 'MD639', 'MD642', 'MD652', 'MD653', 'MD657', 'MD175', 'UCSD001']


## List all Section-names & Section-numbers for a given stack
#### - Example displaying MD585 sections
`Placeholder` indicates a damaged section, not used in the pipeline

In [9]:
stack = 'MD585'
MD585_stack_info = (BrainStack()&dict(mouse=stack)).fetch( as_dict=True )[0]
print( 'Keys for <STACK>_stack_info:')
print( MD585_stack_info.keys() )

# `MD585_sections` contains a list of every filename, <space>, and the section number
#    If filename == 'Placeholder' then the section is unusable and not used in the pipeline
MD585_sections = MD585_stack_info['sorted_filenames'].split('|')

Keys for <STACK>_stack_info:
odict_keys(['mouse', 'stack_name', 'num_slices', 'num_valid_slices', 'channels', 'sorted_filenames', 'human_annotated', 'planar_resolution_um', 'section_thickness_um'])


## Access Raw Data
#### - Example accessing MD585 data

In [10]:
# Going through each brain, prints out brains that do NOT have raw data
for stack in all_stacks:
    # rawstack_info keys: ['mouse', 'aws_bucket', 'processed_stack']
    raw_stack_info = (RawSlices()&dict(mouse=stack)).fetch( as_dict=True )[0]
    if raw_stack_info['raw_stack']=='':
        print(raw_stack_info['mouse'])
        print( '  - Has no raw data on S3' )

CHATM2
  - Has no raw data on S3
CHATM3
  - Has no raw data on S3
CSHL2
  - Has no raw data on S3
MD639
  - Has no raw data on S3
MD175
  - Has no raw data on S3


In [11]:
stack = 'MD585'
# Load RawStack table (as dict) for a particular stack
raw_stack_info = (RawSlices()&dict(mouse=stack)).fetch( as_dict=True )[0]

bucket =  raw_stack_info['aws_bucket']
filename_list = raw_stack_info['raw_stack'].split('|')

print( 'The 100th slice of '+stack+' with the bucket appended to the front:\n' )
fp = bucket + '://' + filename_list[100]
print( fp )

The 100th slice of MD585 with the bucket appended to the front:

mousebrainatlas-rawdata://CSHL_data/MD585/MD585-IHC42-2015.08.19-14.26.30_MD585_1_0124_lossless.jp2


## Access Processed Data
#### - Example accessing MD585 data

In [13]:
# Going through each brain, prints out brains that do NOT have processed data
for stack in all_stacks:
    # rawstack_info keys: ['mouse', 'aws_bucket', 'processed_stack']
    processed_stack_info = (ProcessedSlices()&dict(mouse=stack)).fetch( as_dict=True )[0]
    print(processed_stack_info['mouse'], end='')
    if processed_stack_info['processed_stack']=='':
        print( '  - Has no processed data on S3' )
    else:
        print( '  - File on S3: ', processed_stack_info['processed_stack'].split('|')[:4])
        

MD585  - File on S3:  ['CSHL_data_processed/MD585/MD585_prep2_lossless/MD585-IHC16-2015.08.19-13.14.51_MD585_3_0048_prep2_lossless.tif', 'CSHL_data_processed/MD585/MD585_prep2_lossless/MD585-IHC17-2015.07.16-15.32.03_MD585_1_0049_prep2_lossless.tif', 'CSHL_data_processed/MD585/MD585_prep2_lossless/MD585-IHC17-2015.07.16-15.32.03_MD585_2_0050_prep2_lossless.tif', 'CSHL_data_processed/MD585/MD585_prep2_lossless/MD585-IHC17-2015.07.16-15.32.03_MD585_3_0051_prep2_lossless.tif']
MD589  - File on S3:  ['CSHL_data_processed/MD589/MD589_prep2_lossless/MD589-IHC16-2015.07.30-22.20.07_MD589_3_0048_prep2_lossless.tif', 'CSHL_data_processed/MD589/MD589_prep2_lossless/MD589-IHC17-2015.07.30-22.23.59_MD589_1_0049_prep2_lossless.tif', 'CSHL_data_processed/MD589/MD589_prep2_lossless/MD589-IHC17-2015.07.30-22.23.59_MD589_2_0050_prep2_lossless.tif', 'CSHL_data_processed/MD589/MD589_prep2_lossless/MD589-IHC17-2015.07.30-22.23.59_MD589_3_0051_prep2_lossless.tif']
MD590  - File on S3:  ['CSHL_data_processe

MD652  - File on S3:  ['CSHL_data_processed/MD652/MD652_prep2_lossless/MD652-F15-2016.12.16-20.45.52_MD652_3_0045_prep2_lossless.tif', 'CSHL_data_processed/MD652/MD652_prep2_lossless/MD652-F16-2016.12.16-21.00.10_MD652_1_0046_prep2_lossless.tif', 'CSHL_data_processed/MD652/MD652_prep2_lossless/MD652-F16-2016.12.16-21.00.10_MD652_2_0047_prep2_lossless.tif', 'CSHL_data_processed/MD652/MD652_prep2_lossless/MD652-F16-2016.12.16-21.00.10_MD652_3_0048_prep2_lossless.tif']
MD653  - File on S3:  ['CSHL_data_processed/MD653/MD653_prep2_lossless/MD653-F15-2016.12.20-21.04.12_MD653_1_0043_prep2_lossless.tif', 'CSHL_data_processed/MD653/MD653_prep2_lossless/MD653-F15-2016.12.20-21.04.12_MD653_2_0044_prep2_lossless.tif', 'CSHL_data_processed/MD653/MD653_prep2_lossless/MD653-F15-2016.12.20-21.04.12_MD653_3_0045_prep2_lossless.tif', 'CSHL_data_processed/MD653/MD653_prep2_lossless/MD653-F16-2016.12.20-21.19.34_MD653_1_0046_prep2_lossless.tif']
MD657  - File on S3:  ['CSHL_data_processed/MD657/MD657_pr

In [16]:
stack = 'MD585'
# Load ProcessedStack table (as dict) for a particular stack
processed_stack_info = (ProcessedSlices()&dict(mouse=stack)).fetch( as_dict=True )[0]

bucket =  processed_stack_info['aws_bucket']
filename_list = processed_stack_info['processed_stack'].split('|')

print( 'The 100th slice of '+stack+' with the bucket appended to the front:\n' )
fp = bucket + '/' + filename_list[100]
print( fp )

The 100th slice of MD585 with the bucket appended to the front:

mousebrainatlas-data/CSHL_data_processed/MD585/MD585_prep2_lossless/MD585-IHC50-2015.07.16-18.02.54_MD585_2_0149_prep2_lossless.tif


# Download Images from S3 to Local Computer
(Relies on `bucket` and `filename_list` from previous cell)

In [3]:
from subprocess import call

# `fp` includes the S3 bucket as shown belows
def download_from_s3(local_root, fp):
    s3_url = "s3://"+fp
    local_fp = local_root+fp
    
    print('Downloading file to ' + local_fp)

    call(["aws",\
          "s3",\
          "cp",\
          s3_url,\
          local_fp]) # need to check whether download worked.

In [18]:
stack = 'MD585'
# Contains all information on downloading processed files (post-preprocessing images)
processed_stack_info = (ProcessedSlices()&dict(mouse=stack)).fetch( as_dict=True )[0]
bucket =  processed_stack_info['aws_bucket']
filename_list = processed_stack_info['processed_stack'].split('|')
num_slices = len(filename_list)

# Using `download_from_s3` on my desktop to download first 10 files of MD585
#   and then display a downsampled version of it
local_root_alex_pc = "/mnt/c/Users/Alex/Documents/"

# Downloading valid sections 150-151
for i in range(150,152):
    fp = bucket+'/'+filename_list[i]
    download_from_s3( local_root_alex_pc, fp)

Downloading file to /mnt/c/Users/Alex/Documents/mousebrainatlas-data/CSHL_data_processed/MD585/MD585_prep2_lossless/MD585-N22-2015.07.18-07.10.40_MD585_1_0064_prep2_lossless.tif
Downloading file to /mnt/c/Users/Alex/Documents/mousebrainatlas-data/CSHL_data_processed/MD585/MD585_prep2_lossless/MD585-N22-2015.07.18-07.10.40_MD585_2_0065_prep2_lossless.tif
