## Connect to DataJoint server
#### Downloading data from S3 will require your own AWS credentials

In [1]:
import datajoint as dj
import numpy as np
import json
from utilities import *
from subprocess import call

# Connect to datajoint server
dj.conn()

Connecting alex@ucsd-demo-db.datajoint.io:3306


DataJoint connection (connected) alex@ucsd-demo-db.datajoint.io:3306

In [2]:
# Define which schema you're using
schema = dj.schema('common_atlas')
schema.spawn_missing_classes()

## Load dictionary of brain names from utilities.py

In [3]:
# From utilities.py
all_stacks = list( brain_names_dic.keys() )
print( all_stacks )

['MD585', 'MD589', 'MD590', 'MD591', 'MD592', 'MD593', 'MD594', 'MD595', 'MD598', 'MD599', 'MD602', 'MD603', 'CHATM2', 'CHATM3', 'CSHL2', 'MD658', 'MD661', 'MD662', 'MD635', 'MD636', 'MD639', 'MD642', 'MD652', 'MD653', 'MD657', 'MD175', 'UCSD001']


## List all Section-names & Section-numbers for a given stack
#### - Example displaying MD585 sections
`Placeholder` indicates a damaged section, not used in the pipeline

In [66]:
stack = 'MD585'
MD585_stack_info = (BrainStackInfo()&dict(mouse=stack)).fetch( as_dict=True )[0]
print( MD585_stack_info.keys() )
MD585_stack_info['sorted_filenames'].split('|')

odict_keys(['mouse', 'stack_name', 'num_slices', 'num_valid_slices', 'channels', 'sorted_filenames', 'human_annotated', 'planar_resolution_um', 'section_thickness_um'])


['Placeholder 1',
 'Placeholder 2',
 'MD585-N3-2015.07.16-19.48.34_MD585_1_0007 3',
 'MD585-IHC3-2015.07.16-15.26.44_MD585_1_0007 4',
 'MD585-N3-2015.07.16-19.48.34_MD585_2_0008 5',
 'MD585-IHC3-2015.07.16-15.26.44_MD585_2_0008 6',
 'MD585-N4-2015.07.16-19.50.10_MD585_1_0010 7',
 'Placeholder 8',
 'MD585-N4-2015.07.16-19.50.10_MD585_2_0011 9',
 'MD585-IHC4-2015.07.18-07.01.12_MD585_1_0010 10',
 'MD585-N4-2015.07.16-19.50.10_MD585_3_0012 11',
 'MD585-IHC4-2015.07.18-07.01.12_MD585_2_0011 12',
 'MD585-N5-2015.07.16-19.53.21_MD585_1_0013 13',
 'MD585-IHC5-2015.07.18-07.03.14_MD585_1_0013 14',
 'MD585-N5-2015.07.16-19.53.21_MD585_2_0014 15',
 'MD585-IHC5-2015.07.18-07.03.14_MD585_2_0014 16',
 'MD585-N5-2015.07.16-19.53.21_MD585_3_0015 17',
 'MD585-IHC5-2015.07.18-07.03.14_MD585_3_0015 18',
 'MD585-N6-2015.07.16-19.56.32_MD585_1_0016 19',
 'MD585-IHC6-2015.07.16-14.42.32_MD585_1_0016 20',
 'MD585-N6-2015.07.16-19.56.32_MD585_2_0017 21',
 'MD585-IHC6-2015.07.16-14.42.32_MD585_2_0017 22',
 'M

## Access Raw Data
#### - Example accessing MD585 data

In [13]:
# Going through each brain, prints out brains that do NOT have raw data
for stack in all_stacks:
    # rawstack_info keys: ['mouse', 'aws_bucket', 'processed_stack']
    raw_stack_info = (RawStack()&dict(mouse=stack)).fetch( as_dict=True )[0]
    if raw_stack_info['raw_stack']=='':
        print(raw_stack_info['mouse'])
        print( '  - Has no raw data on S3' )

CHATM2
  - Has no raw data on S3
CHATM3
  - Has no raw data on S3
CSHL2
  - Has no raw data on S3
MD639
  - Has no raw data on S3
MD175
  - Has no raw data on S3


In [26]:
stack = 'MD585'
raw_stack_info = (RawStack()&dict(mouse=stack)).fetch( as_dict=True )[0]

bucket =  raw_stack_info['aws_bucket']
filename_list = raw_stack_info['raw_stack'].split('|')

print( 'The 100th slice of '+stack+' with the bucket appended to the front:\n' )
fp = bucket + '://' + filename_list[100]
print( fp )

The 100th slice of MD585 with the bucket appended to the front:

mousebrainatlas-rawdata://CSHL_data/MD585/MD585-IHC42-2015.08.19-14.26.30_MD585_1_0124_lossless.jp2


## Access Processed Data
#### - Example accessing MD585 data

In [59]:
# Going through each brain, prints out brains that do NOT have processed data
for stack in all_stacks:
    # rawstack_info keys: ['mouse', 'aws_bucket', 'processed_stack']
    processed_stack_info = (ProcessedStack()&dict(mouse=stack)).fetch( as_dict=True )[0]
    if processed_stack_info['processed_stack']=='':
        print(processed_stack_info['mouse'])
        print( '  - Has no processed data on S3' )

CHATM2
  - Has no processed data on S3
CHATM3
  - Has no processed data on S3
CSHL2
  - Has no processed data on S3
MD636
  - Has no processed data on S3
MD639
  - Has no processed data on S3
MD175
  - Has no processed data on S3
UCSD001
  - Has no processed data on S3


In [60]:
stack = 'MD585'
processed_stack_info = (ProcessedStack()&dict(mouse=stack)).fetch( as_dict=True )[0]

bucket =  processed_stack_info['aws_bucket']
filename_list = processed_stack_info['processed_stack'].split('|')

print( 'The 100th slice of '+stack+' with the bucket appended to the front:\n' )
fp = bucket + '/' + filename_list[100]
print( fp )

The 100th slice of MD585 with the bucket appended to the front:

mousebrainatlas-data/CSHL_data_processed/MD585/MD585_prep2_lossless/MD585-IHC50-2015.07.16-18.02.54_MD585_2_0149_prep2_lossless.tif


# Easily Retrieving Images from S3
(Relies on `bucket` and `filename_list` from previous cell)

In [68]:
from subprocess import call

# `fp` includes the S3 bucket as shown belows
def download_from_s3(local_root, fp):
    s3_url = "s3://"+fp
    local_fp = "/mnt/c/Users/Alex/Documents/"+fp
    
    print('Downloading file to ' + local_fp)

    call(["aws",\
          "s3",\
          "cp",\
          s3_url,\
          local_fp])

In [73]:
stack = 'MD585'
# Contains all information on downloading processed files (post-preprocessing images)
processed_stack_info = (ProcessedStack()&dict(mouse=stack)).fetch( as_dict=True )[0]
bucket =  processed_stack_info['aws_bucket']
filename_list = processed_stack_info['processed_stack'].split('|')

# Using `download_from_s3` on my desktop to download first 10 files of MD585
#   and then display a downsampled version of it
local_root_alex_pc = "/mnt/c/Users/Alex/Documents/"

# Downloading sections 150-151
for i in range(150,152):
    fp = bucket+'/'+filename_list[i]
    download_from_s3( local_root_alex_pc, fp)

Downloading file to /mnt/c/Users/Alex/Documents/mousebrainatlas-data/CSHL_data_processed/MD585/MD585_prep2_lossless/MD585-N22-2015.07.18-07.10.40_MD585_1_0064_prep2_lossless.tif
Downloading file to /mnt/c/Users/Alex/Documents/mousebrainatlas-data/CSHL_data_processed/MD585/MD585_prep2_lossless/MD585-N22-2015.07.18-07.10.40_MD585_2_0065_prep2_lossless.tif
