## Ingest_lightsheet_data
The idea here is to use the google sheets API to ingest the information stored in the core facility clearing google sheets into the braincogs00 database. We will be making a new database table for each clearing protocol spreadsheet. These will be linked to the Experiments() table in the database by the experiment_id primary key.

In [1]:
import pickle
import os.path, sys
from datetime import datetime
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pandas as pd
import numpy as np
import datajoint as dj

## Google Spreadsheet API setup

We will be using the Google Sheets API: https://developers.google.com/sheets/api/quickstart/python

For details on how to set this up on your machine, see this notebook: [Microscope_management.ipynb](Microscope_management.ipynb).

In [2]:
SPREADSHEET_ID = '15NmKBIPfSSpjTFoHS6K2jREsbMZHueyQ5psub-bctjI' # The copy of the clearing spreadsheet, where I made some formatting changes to make it more consistent.
# Set the scope to be read only since we are not adding anything to the google sheet, just reading it
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']

secrets_file = 'credentials.json' # Has to be called "credentials.json"  
assert os.path.exists(secrets_file)

In [3]:
def retrieve_google_sheet(spreadsheet_id,range_query):
    """
    ---PURPOSE---
    Gets the data from the range of cells in the google sheet specified.
    """
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                secrets_file, SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    service = build('sheets', 'v4', credentials=creds)

    # Call the Sheets API
    sheet = service.spreadsheets()
    result = sheet.values().get(spreadsheetId=spreadsheet_id,
                                range=range_query).execute()
    values = result.get('values', [])

    return values

## Connect to the db

In [4]:
dj.config['database.host'] = 'datajoint00.pni.princeton.edu'
dj.conn()

Please enter DataJoint username: ahoag
Please enter DataJoint password: ········
Connecting ahoag@datajoint00.pni.princeton.edu:3306


DataJoint connection (connected) ahoag@datajoint00.pni.princeton.edu:3306

In [5]:
# Need to port forward 3306 from jtb3-dev@pni.princeton.edu first (set this up externally on the machine)
db_lightsheet = dj.create_virtual_module('ahoag_lightsheet_demo','ahoag_lightsheet_demo')

## Read in Form Responses sheet and the various clearing sheets from google sheets

In [6]:
# We are in readonly (see SCOPES) so we cannot destroy it by accident.
form_responses_range_query = 'Form Responses!A4:T54' # Syntax is Sheet_name!cell_start:cell_end, e.g. 
form_responses_values = retrieve_google_sheet(spreadsheet_id=SPREADSHEET_ID,range_query=form_responses_range_query)
df_form_responses = pd.DataFrame(form_responses_values)

In [7]:
# iDISCO+
idisco_plus_range_query = 'iDISCO+!A3:BF20' # Syntax is Sheet_name!cell_start:cell_end, e.g. 
idisco_plus_values = retrieve_google_sheet(spreadsheet_id=SPREADSHEET_ID,range_query=idisco_plus_range_query)
df_idisco_plus = pd.DataFrame(idisco_plus_values)
# Assign column names based on first row, then remove top two rows as first is header and second is an example row
new_header = df_idisco_plus.iloc[0]
df_idisco_plus = df_idisco_plus[2:]
df_idisco_plus.columns = new_header

# iDISCO abbreviated clearing
idisco_abbrev_range_query = 'iDISCO_NOF!A2:T27' # Syntax is Sheet_name!cell_start:cell_end, e.g. 
idisco_abbrev_values = retrieve_google_sheet(spreadsheet_id=SPREADSHEET_ID,range_query=idisco_abbrev_range_query)
df_idisco_abbrev = pd.DataFrame(idisco_abbrev_values)
# Assign column names based on first row, then remove top two rows as first is header and second is an example row
new_header = df_idisco_abbrev.iloc[0]
df_idisco_abbrev = df_idisco_abbrev[3:]
df_idisco_abbrev.columns = new_header

# iDISCO abbreviated clearing (rat)
idisco_abbrev_rat_range_query = 'iDISCO_NOF_Rat!A2:X12' # Syntax is Sheet_name!cell_start:cell_end, e.g. 
idisco_abbrev_rat_values = retrieve_google_sheet(spreadsheet_id=SPREADSHEET_ID,range_query=idisco_abbrev_rat_range_query)
df_idisco_abbrev_rat = pd.DataFrame(idisco_abbrev_rat_values)
# Assign column names based on first row, then remove top two rows as first is header and second is an example row
new_header = df_idisco_abbrev_rat.iloc[0]
df_idisco_abbrev_rat = df_idisco_abbrev_rat[2:]
df_idisco_abbrev_rat.columns = new_header

# uDISCO 
udisco_range_query = 'uDISCO!A3:O8' # Syntax is Sheet_name!cell_start:cell_end, e.g. 
udisco_values = retrieve_google_sheet(spreadsheet_id=SPREADSHEET_ID,range_query=udisco_range_query)
df_udisco = pd.DataFrame(udisco_values)
# Assign column names based on first row, then remove top two rows as first is header and second is an example row
new_header = df_udisco.iloc[0]
df_udisco = df_udisco[2:]
df_udisco.columns = new_header

# iDISCO+_EdU 
idisco_edu_range_query = 'iDISCO+_EdU!A3:AV6' # Syntax is Sheet_name!cell_start:cell_end, e.g. 
idisco_edu_values = retrieve_google_sheet(spreadsheet_id=SPREADSHEET_ID,range_query=idisco_edu_range_query)
df_idisco_edu = pd.DataFrame(idisco_edu_values)
# Assign column names based on first row, then remove top two rows as first is header and second is an example row
new_header = df_idisco_edu.iloc[0]
df_idisco_edu = df_idisco_edu[3:]
df_idisco_edu.columns = new_header

# iDISCO+_rat 
idisco_rat_range_query = 'iDISCO+_Rat!A3:BH6' # Syntax is Sheet_name!cell_start:cell_end, e.g. 
idisco_rat_values = retrieve_google_sheet(spreadsheet_id=SPREADSHEET_ID,range_query=idisco_rat_range_query)
df_idisco_rat = pd.DataFrame(idisco_rat_values)
# Assign column names based on first row, then remove top two rows as first is header and second is an example row
new_header = df_idisco_rat.iloc[0]
df_idisco_rat = df_idisco_rat[1:]
df_idisco_rat.columns = new_header


## Do the ingestion

In [8]:
# some setup
clearing_protocol_dict = {'iDISCO+ (immunostaining)':'iDISCO+_immuno',
                         'iDISCO for non-oxidizable fluorophores (abbreviated clearing)':'iDISCO abbreviated clearing',
                         'uDISCO':'uDISCO',
                         'Wang Lab iDISCO Protocol-EdU':'iDISCO_EdU'}
clearing_protocol_link_dict = {'iDISCO+_immuno':'https://docs.google.com/spreadsheets/d/1A83HVyy1bEhctqArwt4EiT637M8wBxTFodobbt1jrXI/edit#gid=0',
                              'iDISCO abbreviated clearing':'https://docs.google.com/spreadsheets/d/1A83HVyy1bEhctqArwt4EiT637M8wBxTFodobbt1jrXI/edit#gid=895577002',
                              'iDISCO abbreviated clearing (rat)': 'https://docs.google.com/spreadsheets/d/1A83HVyy1bEhctqArwt4EiT637M8wBxTFodobbt1jrXI/edit#gid=782871049',
                              'uDISCO':'https://docs.google.com/spreadsheets/d/1A83HVyy1bEhctqArwt4EiT637M8wBxTFodobbt1jrXI/edit#gid=1195842433',
                              'iDISCO+_rat':'https://docs.google.com/spreadsheets/d/1A83HVyy1bEhctqArwt4EiT637M8wBxTFodobbt1jrXI/edit#gid=1114714575',
                              'iDISCO_EdU':'https://docs.google.com/spreadsheets/d/1A83HVyy1bEhctqArwt4EiT637M8wBxTFodobbt1jrXI/edit#gid=746482133'}
def request_fxn(x,length):
    """ A function to be applied to a pandas column
    for getting the 0:length characters in a modified string
    of the column"""
    return '_'.join(x[0:64].split(' '))[0:length]

In [9]:
def ingest_request_form(insert=False):
    user_insert_list = [{'username':'ahoag','princeton_email':'ahoag@princeton.edu'},
                        {'username':'zmd','princeton_email':'zmd@princeton.edu'},
                        {'username':'jduva','princeton_email':'jduva@princeton.edu'},
                        {'username':'kellyms','princeton_email':'kellyms@princeton.edu'}] # admins
    request_insert_list = []
    clearing_batch_insert_list = []
    sample_insert_list = []
    imaging_request_insert_list = []
    imaging_resolution_request_insert_list = []
    processing_request_insert_list = []
    for row in df_form_responses.values.tolist():
        if not any(row): 
            print("blank row, skipping.")
            # skip blank rows
            continue
        request_insert_dict = {}
        # User() table
        # handle email and user 
        email = row[-4]
        requested_by = ''
        if not email.endswith('princeton.edu'):
            if email == 'zahra.dhanerawala@gmail.com':
                email = 'marlies.oostland@princeton.edu'
                requested_by = 'zmd'
            elif email == 'emilyjanedennis@gmail.com':
                email = 'ejdennis@princeton.edu'
            else:
                continue
        email = email.lower()
        username = email.split('@')[0]
        user_insert_dict = {'username':username,'princeton_email':email}
        user_insert_list.append(user_insert_dict)
        request_insert_dict['username'] = username
        # Request() table
        # handle request_name
        request_name = row[3][0:64].strip()
        print(username,request_name)
        request_name = '_'.join(request_name.split(' '))
        request_insert_dict['request_name'] = request_name
        # requested_by
        if requested_by != 'zmd':
            requested_by = username
        request_insert_dict['requested_by'] = requested_by
        # date_submitted and time_submitted
        date_input,time_input = row[0].split(' ')
        date_submitted = datetime.strptime(date_input,'%m/%d/%Y').strftime('%Y-%m-%d')
        time_submitted = time_input
        request_insert_dict['date_submitted'] = date_submitted
        request_insert_dict['time_submitted'] = time_submitted
        # labname
        labname = row[-1]
        if not labname:
            labname = 'not provided'
        request_insert_dict['labname'] = labname
        # subject_fullname
        request_insert_dict['subject_fullname'] = ''
        # correspondence email
        request_insert_dict['correspondence_email'] = email
        # description
        description = row[5][0:250]
        request_insert_dict['description'] = description
        # species
        species = row[1].lower()
        request_insert_dict['species'] = species
        # number_of_samples
        number_of_samples = row[4]
        try:
            number_of_samples = int(number_of_samples)
        except:
            number_of_samples = 1
        request_insert_dict['number_of_samples'] = number_of_samples
        # flag this request as an archival request 
        request_insert_dict['is_archival'] = True
        request_insert_list.append(request_insert_dict)
        
        # ClearingBatch() table
        clearing_batch_insert_dict = {
            'username':username,
            'request_name':request_name}
        # clearing protocol
        clearing_protocol_sheet = row[2]
        clearing_protocol = clearing_protocol_dict[clearing_protocol_sheet]
        if species == 'rat':
            if clearing_protocol == 'iDISCO abbreviated clearing':
                clearing_protocol = 'iDISCO abbreviated clearing (rat)'
            elif clearing_protocol == 'iDISCO+_immuno':
                clearing_protocol = 'iDISCO+_rat'

        clearing_batch_insert_dict['clearing_protocol'] = clearing_protocol
        
        if clearing_protocol == 'iDISCO+_immuno':
            df_clearing = df_idisco_plus
            df_clearing = df_clearing.rename(columns={'Sample Name/#':'Sample Name'})
            mask = df_clearing['Sample Name'].apply(request_fxn,args=(len(request_name),)) == request_name
            assert len(df_clearing[mask]) == 1
            df_clearing_this_request = df_clearing[mask]
            antibody1,antibody2 = df_clearing_this_request.iloc[0][['Primary antibody+conc','Secondary antibody+conc']].to_numpy()
        elif clearing_protocol == 'iDISCO abbreviated clearing':
            df_clearing = df_idisco_abbrev
            antibody1,antibody2 = '',''
        elif clearing_protocol == 'iDISCO abbreviated clearing (rat)':
            df_clearing = df_idisco_abbrev_rat
        elif clearing_protocol == 'iDISCO+_rat':
            df_clearing = df_idisco_rat
        elif clearing_protocol == 'uDISCO':
            df_clearing = df_udisco
        elif clearing_protocol == 'iDISCO_EdU':
            df_clearing = df_idisco_edu
        else:
            sys.exit(f'Clearing protocol {clearing_protocol} is not accepted')
        # link to clearing sheet
        clearing_link = clearing_protocol_link_dict[clearing_protocol]
        clearing_batch_insert_dict['link_to_clearing_spreadsheet'] = clearing_link
        clearing_batch_insert_dict['antibody1'] = antibody1
        clearing_batch_insert_dict['antibody2'] = antibody2
        # Find the row in the clearing sheet corresponding to this request
        
        # clearing batch number
        clearing_batch_number = 1 # always only 1 batch if submitted from the google form
        clearing_batch_insert_dict['clearing_batch_number'] = clearing_batch_number
        # clearing progress
        clearing_progress = 'complete' # if it is in the spreadsheet
        clearing_batch_insert_dict['clearing_progress'] = clearing_progress
        # number in batch 
        clearing_batch_insert_dict['number_in_batch'] = number_of_samples
        # perfusion date
        perfusion_date_input = row[-3]
        if perfusion_date_input: # can be NULL, so just don't insert if it is empty in sheet
            perfusion_date_submitted = datetime.strptime(perfusion_date_input,'%m/%d/%Y').strftime('%Y-%m-%d')
            clearing_batch_insert_dict['perfusion_date_submitted'] = perfusion_date_submitted
        # handoff date 
        handoff_date_input = row[-2]
        if handoff_date_input: # can be NULL, so just don't insert if it is empty in sheet
            handoff_date_submitted = datetime.strptime(handoff_date_input,'%m/%d/%Y').strftime('%Y-%m-%d')
            clearing_batch_insert_dict['handoff_date_submitted'] = handoff_date_submitted
        # clearer - not currently possible to tell so leave blank - OK since NULLable column
        
        # notes for clearer - there was not a space for users to submit this so leaving blank.
        notes_for_clearer = ''
        clearing_batch_insert_dict['notes_for_clearer'] = notes_for_clearer
        
        clearing_batch_insert_list.append(clearing_batch_insert_dict)
        
        # Sample(), ImagingRequest(), ImagingResolutionRequest(), ProcessingRequest() tables 
        notes_for_imager = row[11]
        was_imaged = int(row[12])
        processed_data_location_bucket = row[13]
        if processed_data_location_bucket:
            notes_from_imaging = f'Processed files are here: {processed_data_location_bucket}'
        else:
            notes_from_imaging = ''
        sample_master_dict = {
            'username':username,'request_name':request_name,
            'clearing_protocol':clearing_protocol,
            'antibody1':antibody1,'antibody2':antibody2,
            'clearing_batch_number':clearing_batch_number,
        }
        
        imaging_request_master_dict = {
            'username':username,'request_name':request_name,
            'imaging_request_number':1,
            'imaging_progress':'complete',
            'imaging_request_date_submitted':date_submitted,
            'imaging_request_time_submitted':time_submitted
        }
        imaging_resolution_request_master_dict = {
            'username':username,'request_name':request_name,
            'imaging_request_number':1,
            'notes_for_imager':notes_for_imager,
            'notes_from_imaging':notes_from_imaging
        }
        processing_request_master_dict = {
            'username':username,'request_name':request_name,
            'imaging_request_number':1,
            'processing_request_number':1,
            'processor':'zmd',
            'processing_request_date_submitted':date_submitted,
            'processing_request_time_submitted':time_submitted,
            'processing_progress':'complete'
        }
       
        image_resolutions = []
        imaging_resolution_input_str = row[9]
        if '1.3x' in imaging_resolution_input_str:
            image_resolutions.append('1.3x')
        if '1.1x' in imaging_resolution_input_str:
            image_resolutions.append('1.1x')
        if '4x' in imaging_resolution_input_str:
            image_resolutions.append('4x')
        if '2x' in imaging_resolution_input_str:
            image_resolutions.append('2x')
        assert len(image_resolutions) > 0
        
        for ii in range(number_of_samples):
            sample_number_str = str(ii+1)
            sample_name = 'sample-' + '0'*(3-len(sample_number_str))+sample_number_str
            # Sample() table
            sample_insert_dict = sample_master_dict.copy()
            sample_insert_dict['sample_name'] = sample_name
            sample_insert_list.append(sample_insert_dict)
            if was_imaged:
                # ImagingRequest() table
                imaging_request_insert_dict = imaging_request_master_dict.copy()
                imaging_request_insert_dict['sample_name'] = sample_name
                imaging_request_insert_list.append(imaging_request_insert_dict)
                # ProcessingRequest() table
                processing_request_insert_dict = processing_request_master_dict.copy()
                processing_request_insert_dict['sample_name'] = sample_name
                processing_request_insert_list.append(processing_request_insert_dict) 
                # ImagingResolutionRequest() table
                for image_resolution in image_resolutions:
                    imaging_resolution_request_insert_dict = imaging_resolution_request_master_dict.copy()
                    imaging_resolution_request_insert_dict['sample_name'] = sample_name
                    imaging_resolution_request_insert_dict['image_resolution'] = image_resolution
                    imaging_resolution_request_insert_list.append(imaging_resolution_request_insert_dict)
                    
    if insert:
        db_lightsheet.User().insert(user_insert_list,skip_duplicates=True)
        db_lightsheet.Request().insert(request_insert_list,skip_duplicates=True)
        db_lightsheet.Request.ClearingBatch().insert(clearing_batch_insert_list,skip_duplicates=True)
        db_lightsheet.Request.Sample().insert(sample_insert_list,skip_duplicates=True)
        db_lightsheet.Request.ImagingRequest().insert(imaging_request_insert_list,skip_duplicates=True)
        db_lightsheet.Request.ImagingResolutionRequest().insert(
            imaging_resolution_request_insert_list,skip_duplicates=True)
        db_lightsheet.Request.ProcessingRequest.insert(processing_request_insert_list,skip_duplicates=True)
        
ingest_request_form(insert=True)

jverpeut DREADDymaze
blank row, skipping.
willmore 20190304_LW_m340
jverpeut an1-31
apv2 20190313_IBL_DiI_1
marlies.oostland 20181217_marlies, 20181013_marlies_M12
jverpeut EAAT4- 14 samples
mioffe a1_20190327 a2_20190327 a3_20190327
jverpeut LindsayCrusI_ymaze_cfos (25 samples)
jverpeut AdultChronicD_MLI_Lawrence (1-12 each batch)
pbibawi 20190405_pb_X015, 20190405_pb_X045, 20190405_pb_A233,20190405_pb
willmore 20190510_lw_059
marlies.oostland not sure - check when samples are given to us?
ejdennis 201905_atlas00x where x=1:n
mioffe five samples, h1 and h2, r1,r2,r3
soline Mouse_058
ejdennis 20190606_atlas00x where x=11-20
rmallarino 171
marlies.oostland Marlies_190614_M21
jverpeut cruslat_ymaze_TiffanyP_6.20.19 (12 samples)
jverpeut cruslat_ymaze_TiffanyP_6.20.19 (13 samples)
marlies.oostland Marlies_190618_M30
jverpeut opto_ai27D_lobVI
soline 190725 _Target_Practice
ejdennis W118, K292, K293, K295, K301, K302, K303, K304, K305, K306, K307
ejdennis 10-13 brains, names TBD
willmore 20

In [None]:
replicated_args = dict(number_of_samples='number_of_samples',description='description',
        species='species',datetime_submitted='datetime_submitted')
username = 'ejdennis'
request_name = 'X050'
request_contents = db_lightsheet.Request() & f'request_name="{request_name}"' & \
    f'username="{username}"'
clearing_batch_contents = db_lightsheet.Request.ClearingBatch() & \
    f'request_name="{request_name}"' & f'username="{username}"'
imaging_request_contents = db_lightsheet.Request.ImagingRequest() & \
    f'request_name="{request_name}"' & f'username="{username}"'
replicated_args = dict(number_of_samples='number_of_samples',description='description',
        species='species',datetime_submitted='datetime_submitted')

sample_joined_contents = dj.U('username','request_name').aggr(
        request_contents * clearing_batch_contents,
        number_of_samples='number_of_samples',
        number_in_batch='number_in_batch',
        description='description',
        species='species',
        datetime_submitted='TIMESTAMP(date_submitted,time_submitted)',
        n_cleared='CONVERT(SUM(IF(clearing_progress="complete",number_in_batch,0)),char)').proj(
        **replicated_args,
            fraction_cleared='CONCAT(n_cleared,"/",CONVERT(number_of_samples,char))')
# sample_joined_contents * imaging_request_contents
imaging_joined_contents = sample_joined_contents.aggr(
    imaging_request_contents,
    **replicated_args,
    fraction_cleared='fraction_cleared',
    n_imaged='CONVERT(SUM(imaging_progress="complete"),char)',
    total_imaging_requests='CONVERT(COUNT(*),char)',
    keep_all_rows=True
    ).proj(**replicated_args,
        fraction_cleared='fraction_cleared',
        # fraction_imaged='CONCAT(n_imaged,"/",total_imaging_requests)'
        fraction_imaged='IF(n_imaged is NULL,"0/0",CONCAT(n_imaged,"/",total_imaging_requests))' 
        )
imaging_joined_contents

In [None]:
(dj.U('username','request_name')*sample_joined_contents).aggr(
sample_joined_contents * imaging_request_contents)

In [None]:
sample_joined_contents

In [None]:
(sample_joined_contents * imaging_request_contents).aggr(
    fraction_cleared='fraction_cleared',
    n_imaged='CONVERT(SUM(imaging_progress="complete"),char)',
    total_imaging_requests='CONVERT(COUNT(*),char)',
    keep_all_rows=True)

In [None]:
combined_contents

In [None]:
replicated_args = dict(number_of_samples='number_of_samples',description='description',
        species='species')
username='ejdennis'
request_name='X050'
# username='soline'
# request_name='Mouse092,_MouseK01'
request_contents = db_lightsheet.Request() & f'request_name="{request_name}"' & \
            f'username="{username}"'
request_contents = request_contents.proj('description','species','number_of_samples',
    datetime_submitted='TIMESTAMP(date_submitted,time_submitted)')
sample_contents = db_lightsheet.Request.Sample() & f'request_name="{request_name}"' & f'username="{username}"' 
clearing_batch_contents = db_lightsheet.Request.ClearingBatch() & \
f'request_name="{request_name}"' & f'username="{username}"' 
imaging_request_contents = db_lightsheet.Request.ImagingRequest() & \
 f'request_name="{request_name}"' & f'username="{username}"' 
processing_request_contents = db_lightsheet.Request.ProcessingRequest() & \
 f'request_name="{request_name}"' & f'username="{username}"' 
sample_joined_contents = request_contents * sample_contents * clearing_batch_contents
imaging_joined_contents = sample_joined_contents.aggr(
    imaging_request_contents,
    **replicated_args,
    imaging_request_number='imaging_request_number',
    n_imaged='CONVERT(SUM(imaging_progress="complete"),char)',
    total_imaging_requests='COUNT(*)',
    keep_all_rows=True
    ).proj(**replicated_args,
           total_imaging_requests='IF(n_imaged is NULL, "0",total_imaging_requests)',
           imaging_request_number='IF(imaging_request_number is NULL, 499,imaging_request_number)'
        # fraction_imaged='CONCAT(n_imaged,"/",total_imaging_requests)'
        )
processing_joined_contents = (dj.U('username','request_name') * imaging_joined_contents).aggr(   
    processing_request_contents,
    ''
    **replicated_args,
    total_imaging_requests='total_imaging_requests',
    n_processed='CONVERT(SUM(processing_progress="complete"),char)',
    total_processing_requests='CONVERT(COUNT(processing_progress),char)',
    keep_all_rows=True
    ).proj(
        **replicated_args,
        total_imaging_requests='total_imaging_requests',
        total_processing_requests='IF(n_processed is NULL,0,total_processing_requests)',
        
        )
processing_joined_contents
# imaging_joined_contents

# sample_joined_contents = sample_contents.aggr(
#         request_contents * clearing_batch_contents,
#         number_of_samples='number_of_samples',
#         number_in_batch='number_in_batch',
#         description='description',
#         species='species',
#         ).proj(
#             **replicated_args,
#             fraction_cleared='CONCAT(n_cleared,"/",CONVERT(number_of_samples,char))')
# sample_joined_contents

In [None]:
# username='ejdennis'
# request_name='X050'
# username='soline'
# request_name='Mouse092,_MouseK01'
request_contents = db_lightsheet.Request() & f'request_name="{request_name}"' & \
            f'username="{username}"'
sample_contents = db_lightsheet.Request.Sample() & f'request_name="{request_name}"' & \
            f'username="{username}"'
clearing_batch_contents = db_lightsheet.Request.ClearingBatch() & f'request_name="{request_name}"' & \
            f'username="{username}"'
imaging_request_contents = db_lightsheet.Request.ImagingRequest() & f'request_name="{request_name}"' & \
            f'username="{username}"'
processing_request_contents = db_lightsheet.Request.ProcessingRequest() & f'request_name="{request_name}"' & \
            f'username="{username}"'

replicated_args = dict(description='description',
        species='species')

sample_joined_contents = request_contents * sample_contents * clearing_batch_contents
imaging_joined_contents = sample_joined_contents.aggr(
    imaging_request_contents,
    **replicated_args,
    imaging_request_number='imaging_request_number',
    n_imaged='CONVERT(SUM(imaging_progress="complete"),char)',
    total_imaging_requests='COUNT(*)',
    keep_all_rows=True
    ).proj(**replicated_args,
           total_imaging_requests='IF(n_imaged is NULL, "0",total_imaging_requests)',
           imaging_request_number='IF(imaging_request_number is NULL, -1,imaging_request_number)'
        )
imaging_joined_contents
processing_joined_contents = (dj.U('username','request_name') * imaging_joined_contents).aggr(   
        processing_request_contents,
        'imaging_request_number',
        **replicated_args,
        processing_request_number='processing_request_number',
        total_imaging_requests='total_imaging_requests',
        n_processed='CONVERT(SUM(processing_progress="complete"),char)',
        total_processing_requests='CONVERT(COUNT(processing_progress),char)',
        keep_all_rows=True).proj(
            **replicated_args,
            imaging_request_number='IF(imaging_request_number = -1,"N/A",imaging_request_number)',
            processing_request_number='IF(processing_request_number is NULL, "N/A",processing_request_number)',
            total_imaging_requests='total_imaging_requests',
            total_processing_requests='IF(n_processed is NULL,0,total_processing_requests)', 
            )
processing_joined_contents

In [None]:
request_contents = db_lightsheet.Request()
sample_contents = db_lightsheet.Request.Sample()
clearing_batch_contents = db_lightsheet.Request.ClearingBatch()
imaging_request_contents = db_lightsheet.Request.ImagingRequest()
processing_request_contents = db_lightsheet.Request.ProcessingRequest()
clearing_joined_contents = (sample_contents * request_contents * clearing_batch_contents).proj(
        request_name='request_name',sample_name='sample_name',
        species='species',clearing_protocol='clearing_protocol',
        clearing_progress='clearing_progress',
        datetime_submitted='TIMESTAMP(date_submitted,time_submitted)')
''' Now figure out what fraction of imaging requests have been fulfilled '''    
replicated_args = dict(species='species',clearing_protocol='clearing_protocol',
imaging_request_number='imaging_request_number',imager='imager',
imaging_progress='imaging_progress',
clearing_progress='clearing_progress',
antibody1='antibody1',antibody2='antibody2',
clearing_batch_number='clearing_batch_number',
datetime_submitted='datetime_submitted')

# imaging_joined_contents = dj.U('username','request_name','sample_name').aggr(
#     clearing_joined_contents*imaging_request_contents,
#     **replicated_args)

# processing_joined_contents = (dj.U('username','request_name')*imaging_joined_contents).aggr(   
# processing_request_contents,
# **replicated_args,processor='processor',processing_progress='processing_progress',
# processing_request_number='processing_request_number',
# keep_all_rows=True
# )
# sample_joined_contents = request_contents * sample_contents * clearing_joined_contents 
imaging_joined_contents = clearing_joined_contents.aggr(
    imaging_request_contents,
    'imaging_request_number',
    **replicated_args,
    n_imaged='CONVERT(SUM(imaging_progress="complete"),char)',
    total_imaging_requests='COUNT(*)',
    keep_all_rows=True
    ).proj(
    'imaging_request_number')
imaging_joined_contents 
# processing_joined_contents = (dj.U('username','request_name') * imaging_joined_contents).aggr(   
#         processing_request_contents,
#         'imaging_request_number',
#         **replicated_args,
#         processing_request_number='processing_request_number',
#         total_imaging_requests='total_imaging_requests',
#         n_processed='CONVERT(SUM(processing_progress="complete"),char)',
#         total_processing_requests='CONVERT(COUNT(processing_progress),char)',
#         keep_all_rows=True).proj(
#             **replicated_args,
#             imaging_request_number='IF(imaging_request_number = -1,"N/A",imaging_request_number)',
#             processing_request_number='IF(processing_request_number is NULL, "N/A",processing_request_number)',
#             total_imaging_requests='total_imaging_requests',
#             total_processing_requests='IF(n_processed is NULL,0,total_processing_requests)', 
#             )
# processing_joined_contents


In [None]:
request_contents = db_lightsheet.Request()
sample_contents = db_lightsheet.Request.Sample()
clearing_batch_contents = db_lightsheet.Request.ClearingBatch()
imaging_request_contents = db_lightsheet.Request.ImagingRequest()
processing_request_contents = db_lightsheet.Request.ProcessingRequest()


replicated_args = dict(number_of_samples='number_of_samples',description='description',
    species='species')
sample_joined_contents = request_contents * sample_contents * clearing_batch_contents
imaging_joined_contents = sample_joined_contents.aggr(
    imaging_request_contents,
    **replicated_args,
    imaging_request_number='imaging_request_number',
    n_imaged='CONVERT(SUM(imaging_progress="complete"),char)',
    total_imaging_requests='COUNT(*)',
    keep_all_rows=True
    ).proj(**replicated_args,
           total_imaging_requests='IF(n_imaged is NULL, "0",total_imaging_requests)',
           imaging_request_number='IF(imaging_request_number is NULL, "N/A",imaging_request_number)'
        # fraction_imaged='CONCAT(n_imaged,"/",total_imaging_requests)'
        )
processing_joined_contents = (dj.U('username','request_name') * imaging_joined_contents).aggr(   
    processing_request_contents,
    **replicated_args,
    imaging_request_number='imaging_request_number',
    processing_request_number='processing_request_number',
    total_imaging_requests='total_imaging_requests',
    n_processed='CONVERT(SUM(processing_progress="complete"),char)',
    total_processing_requests='CONVERT(COUNT(processing_progress),char)',
    keep_all_rows=True
    ).proj(
        **replicated_args,
        processing_request_number='IF(processing_request_number is NULL, "N/A",processing_request_number)',
        total_imaging_requests='total_imaging_requests',
        total_processing_requests='IF(n_processed is NULL,0,total_processing_requests)', 
        )
processing_joined_contents