In [1]:
import datajoint as dj
import numpy as np
from minio import Minio
import json
import yaml
import sys, os

sys.path.append('./lib')
from utilities import *
from initialization_of_db import *

In [2]:
# Fp to file pointers assumes to be 'setup/credFiles.yaml', otherwise pass it directly
# Load AWS Credentials
# `creds` needs the following fields: 'access_key', 'secret_access_key
s3_client = get_s3_client()
# Load Datajoint Credentials
# `dj_creds` needs the following fields: 'user', 'passwd'
dj_creds = get_dj_creds()

In [3]:
# Connect to datajoint server
dj.conn()

Connecting dbadmin@ucsd-demo-db.datajoint.io:3306


DataJoint connection (connected) dbadmin@ucsd-demo-db.datajoint.io:3306

In [4]:
# Define which schema you're using
schema = dj.schema('common_atlas_v3')
schema.spawn_missing_classes()

In [7]:
# Displays graphical reprasentation of the schema. NOT NECESSARY
# dj.ERD(schema)

In [20]:
# BaseImage.drop()
# BaseImageYoav.drop()

`common_atlas_v3`.`base_image` (0 tuples)
Proceed? [yes, No]: yes
Tables dropped.  Restart kernel.
`common_atlas_v3`.`base_image_yoav` (926 tuples)
Proceed? [yes, No]: yes
Tables dropped.  Restart kernel.


## Create Tables

In [5]:
@schema
class Mouse(dj.Manual):
    definition = """
    mouse : char(18)                   # Name for lab mouse, max 8 chars
    -------
    date_of_birth  : date              # (date) the mouse's date of birth
    sex            : enum('M','F') # (M/F) either 'M' for male, 'F' for female
    genotype       : enum('C57','U')     # (Lookup) indicating the genotype
    weight         : double            # (int) weight of the mouse in grams. -1 if unknown
    bred           : varchar(20)       # (Str) Vendor where the mouse was bred (bred in house, \
    #purchased by vendor)
    """
    
@schema
class Perfusion(dj.Manual): # Everyone should be doing the same type of perfusion
    definition = """
    -> Mouse                        # One injection per mouse
    ----------
    injection_date  : date          # (date) what day was the injection performed

    post_fixation_condition_hours  : int   # (int) How long kept in fix (overnight)
    percent_sucrose_of_fix         : int   # (int) 10 or 20 percent for CSHL stuff

    date_frozen    : date     # (date) The date the brain was frozen
    date_sectioned : date     # (date) The date the brain was sectioned

    injection_type  : varchar(30)   # (Str) what kind of tracer/injection
    perfusion_lab   : varchar(30)   # (Str) Which lab perfused the mouse? This lab also kept the mouse
    
    assessment=''   : varchar(1000) # (Str) optional, qualitative assessment of injection
    """
    
@schema
class Injection(dj.Manual): # Viral injections
    definition = """
    -> Mouse                        # One injection per mouse
    injection_number : int          # iterative, how many injections have already been performed
    -------
    injection_date  : date          # (date) what day was the injection performed
    injection_type  : varchar(30)   # (Str) what kind of tracer/injection (flourescent?)
    injection_length: int           # UNSURE. Assumed: the length of time the virus was allowed to propagate
    
    assessment=''   : varchar(1000) # (Str) qualitative assessment of injection
    """
    
@schema
class Histology(dj.Manual):
    definition = """
    -> Mouse                        # One Histology per injection per mouse
    ------------
    region         : varchar(10)    # (Str) [UNSURE]
    thickness      : int            # (int) thickness of each slice in microns
    orientation    : enum('sagittal','coronal','horozontal')    # (Str) horizontal, sagittal, coronal
    counter_stain  : varchar(30)    # (Str) what stain was used on the brain (thionin or NeuroTrace)
    lab            : varchar(20)    # (Str) Which lab did the histology
    series         : enum('all','every other','unknown') # Every section OR alternate sections
    """
# AFTER sectioning, the reporter can either be directly visualized with fuorscence or needs to be 
#  amplified with immunostaining
# Hannah, with Axio Scanner, will manually select level of exposure to reduce saturation but make sure the 
#  the fluorescent molecules are visible
#    - add: CSHL_did_their_own_blackbox_preprocessing : True or False
# Assume calibration

@schema 
class Stack(dj.Manual):
    definition = """
    -> Histology            # One Histology per injection per mouse
    ------------
    stack_name       : varchar(10)   # (Str) unique designation for each mouse
    num_slices       : int           # (int) total number of histology slices
    num_valid_slices : int           # (int) total number of useable histology slices
    channels         : int           # (int) number of channels for each slice
    human_annotated  : boolean       # (bool) does this stack have human annotations

    planar_resolution_um : double    # (double) 0.325 for AxioScanner, 0.46 from CSHL
    section_thickness_um : double    # (double) typically 20um
    
    unique index (stack_name)   # Adds constraint, stack name must be unique accross brains
    """


In [6]:
@schema
class Slice(dj.Imported):
    definition = """
    -> Stack
    slice_num       : int           # (int) the unique index of the brain slice. Thickness found in Histology table
    ---
    slice_name      : varchar(100)  # (str) the name of the slice. Naming scheme may vary from lab to lab
    valid           : boolean       # (bool) if false, the slice does not exist
    raw_s3_fp       : varchar(200)  # (str)
    processed_s3_fp : varchar(200)  # (str)
    """
    def make(self, key):
        """
        For every major key in the master table (Stack) the make function will run, once for every unique stack.
        """
        stack_info = (Stack()&key).fetch( as_dict=True )[0]
        stack_name = stack_info["stack_name"]
        
        processed_files = get_processed_files( s3_client, \
                                              stack=stack_name, \
                                              prep_id="2", \
                                              version="", \
                                              resol="raw", \
                                              returntype="list" )
        raw_files = get_raw_files( s3_client, \
                                  stack=stack_name, \
                                  returntype="list" )
        
        # Load the sorted_filenames.txt into a dictionary
        try:
            sorted_fns,_,_ = get_sorted_filenames( s3_client, stack_name=stack_name, return_type='dictionary')
            
            for slice_num in sorted_fns:
                key['slice_num'] = slice_num
                key['slice_name'] = str( sorted_fns[slice_num] )
                if key['slice_name'] == 'Placeholder':
                    key['valid'] = False
                else:
                    key['valid'] = True

                # Fill in the RAW and PROCESSED S3 filepaths
                key['processed_s3_fp'] = ''
                key['raw_s3_fp'] = ''
                
                for fp in processed_files:
                    if key['slice_name'] in fp:
                        key['processed_s3_fp'] = fp
                        break
                for fp in raw_files:
                    if key['slice_name'] in fp:
                        key['raw_s3_fp'] = fp
                        break

                self.insert1(key)
                
                
        except Exception as e:
            print(e)
            
        print(stack_name+' finished \n')
        

In [9]:
# Mouse.drop()
# Slice.drop()

# Fill manual tables from S3

In [7]:
for brain_name in brain_names_list:

    print("\nAdding "+brain_name+' to the database')
    
    # Fill in MOUSE info for UCSD
    if brain_name == 'UCSD001':
        Mouse.insert1(dict(mouse=brain_name,
                   date_of_birth='2020-01-01',
                   sex='M',
                   genotype='C57',
                   weight=-1,
                   bred='Unknown')
                 ,skip_duplicates=True)
    else: # Fill in MOUSE info non-UCSD mice
        Mouse.insert1(dict(mouse=brain_name,
                       date_of_birth='2017-12-05',
                       sex='M',
                       genotype='C57',
                       weight=-1,
                       bred='Unknown')
                     ,skip_duplicates=True)
        
    # Fill in HISTOLOGY info
    Histology.insert1((brain_name,
                   'Unknown', # region
                   '20', # thickness
                   brain_names_dic[brain_name][3],  # orientation
                   brain_names_dic[brain_name][0],  # counter_stain
                   brain_names_dic[brain_name][1],  # lab
                   'unknown') # series  
                 ,skip_duplicates=True)
    
    # Try to load STACK_sorted_filenames.txt from AWS S3, on failure the default values are filled
    try:
        _, total_slices, valid_slices = get_sorted_filenames( s3_client, \
                                                             stack_name=brain_name, \
                                                             return_type="string" )
    except Exception as e:
        total_slices   = -1
        valid_slices   = -1
        print('No sorted_filenames.txt exists for '+brain_name)
        print(e)

    # Fill in STACK info for UCSD
    if brain_name == 'UCSD001':
        Stack.insert1(dict(mouse=brain_name,
                                stack_name=brain_name,
                                num_slices       = total_slices,
                                num_valid_slices = valid_slices,
                                channels         = brain_names_dic[brain_name][0].count('/') + 1,
                                human_annotated  = brain_names_dic[brain_name][2],
                                planar_resolution_um = 0.325,
                                section_thickness_um = 20)
                     ,skip_duplicates=True)
    else:# Fill in STACK info for non-UCSD brains
        Stack.insert1(dict(mouse=brain_name,
                                stack_name=brain_name,
                                num_slices       = total_slices,
                                num_valid_slices = valid_slices,
                                channels         = brain_names_dic[brain_name][0].count('/') + 1,
                                human_annotated  = brain_names_dic[brain_name][2],
                                planar_resolution_um = 0.46,
                                section_thickness_um = 20)
                     ,skip_duplicates=True)


Adding MD585 to the database
447

Adding MD589 to the database
448

Adding MD590 to the database
429

Adding MD591 to the database
456

Adding MD592 to the database
455

Adding MD593 to the database
454

Adding MD594 to the database
434

Adding MD595 to the database
445

Adding MD598 to the database
438

Adding MD599 to the database
449

Adding MD602 to the database
445

Adding MD603 to the database
438

Adding CHATM2 to the database
328

Adding CHATM3 to the database
413

Adding CSHL2 to the database
No sorted_filenames.txt exists for CSHL2
NoSuchKey: message: The specified key does not exist.
-1

Adding MD658 to the database
451

Adding MD661 to the database
439

Adding MD662 to the database
439

Adding MD635 to the database
445

Adding MD636 to the database
No sorted_filenames.txt exists for MD636
NoSuchKey: message: The specified key does not exist.
-1

Adding MD639 to the database
No sorted_filenames.txt exists for MD639
NoSuchKey: message: The specified key does not exist.
-1

A

In [8]:
Slice.populate()

CHATM2 finished 

CHATM3 finished 

NoSuchKey: message: The specified key does not exist.
CSHL2 finished 

NoSuchKey: message: The specified key does not exist.
MD175 finished 

MD585 finished 

MD589 finished 

MD590 finished 

MD591 finished 

MD592 finished 

MD593 finished 

MD594 finished 

MD595 finished 

MD598 finished 

MD599 finished 

MD602 finished 

MD603 finished 

MD635 finished 

NoSuchKey: message: The specified key does not exist.
MD636 finished 

NoSuchKey: message: The specified key does not exist.
MD639 finished 

MD642 finished 

MD652 finished 

MD653 finished 

MD657 finished 

MD658 finished 

MD661 finished 

MD662 finished 

UCSD001 finished 

