In [1]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import bokeh
import bokeh.palettes
from bokeh.plotting import figure, output_notebook, show, output_file, gridplot
from bokeh.io import export_svg, export_png
from bokeh.models import Legend, LegendItem
import os
from neuprint import Client, fetch_neurons, NeuronCriteria as NC, fetch_adjacencies


auth_token_file = open("flybrain.auth.txt", 'r')
auth_token = next(auth_token_file).strip()
try:
    np_client = Client('neuprint.janelia.org', dataset='hemibrain:v1.2.1', token=auth_token)
except:
    np_client = None

output_notebook()

# Turn off the warnings
import warnings
warnings.filterwarnings('ignore')




In [2]:
all_neuron_data, all_roi = fetch_neurons(NC(status='Traced'))
all_neuron_data

Unnamed: 0,bodyId,instance,type,pre,post,downstream,upstream,mito,size,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
0,198469830,,,0,0,0,0,2,6665418,Traced,True,Leaves,,299.0,"[33045, 15655, 3280]",{},,[],[]
1,198810827,,,0,0,0,0,1,9211633,Traced,True,Leaves,,299.0,"[33014, 16221, 3088]",{},,[],[]
2,198815212,,,0,0,0,0,3,27720313,Traced,True,Leaves,,299.0,"[33342, 16128, 3216]",{},,[],[]
3,199147536,,,0,0,0,0,3,58580023,Traced,True,Leaves,,299.0,"[32412, 16453, 3356]",{},,[],[]
4,199156210,,,0,0,0,0,4,82402138,Traced,True,Leaves,,299.0,"[33401, 16741, 3240]",{},,[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97895,7112622044,LAL137(PVL05)_L,LAL137,1376,2087,11635,2087,534,1529252938,Traced,False,Roughly traced,,,,"{'SNP(R)': {'pre': 304, 'post': 130, 'downstre...",VeLC-(LC)(c)Sm(b),"[CRE(-ROB,-RUB)(R), CRE(-RUB)(L), CRE(L), CRE(...","[CRE(-ROB,-RUB)(R), CRE(-RUB)(L), CRE(L), CRE(..."
97896,7112622236,,,5,79,22,79,23,37146103,Traced,False,Prelim Roughly traced,,,,"{'SNP(R)': {'pre': 5, 'post': 79, 'downstream'...",,"[SLP(R), SMP(R), SNP(R)]","[SLP(R), SMP(R), SNP(R)]"
97897,7112623021,,,104,503,445,503,74,97081386,Traced,True,Leaves,,,,"{'AL(L)': {'pre': 104, 'post': 503, 'downstrea...",,"[AL(L), AL-D(L), AL-DA2(L), AL-DA3(L), AL-DA4m...","[AL(L), AL-D(L), AL-DA2(L), AL-DA3(L), AL-DA4m..."
97898,7112623023,,,28,17,205,17,17,26237427,Traced,True,Leaves,,,,"{'AL(L)': {'pre': 28, 'post': 17, 'downstream'...",,"[AL(L), AL-D(L), AL-DC1(L), AL-DC4(L), AL-DL5(L)]","[AL(L), AL-D(L), AL-DC1(L), AL-DC4(L), AL-DL5(L)]"


In [4]:
# roi data
roi_all = pd.read_csv('conn_all.csv')

In [6]:
len(roi_all['bodyId_post'].unique())

90650

In [8]:
roi_all_types = roi_all[['bodyId_post', 'roi', 'weight']].groupby(['bodyId_post', 'roi']).sum().reset_index()
roi_all_types = roi_all_types.rename(columns={'bodyId_post': 'bodyId'})
roi_all_types

Unnamed: 0,bodyId,roi,weight
0,200326126,AVLP(R),2
1,200326126,IB,16
2,200326126,ICL(R),241
3,200326126,LH(R),6
4,200326126,PLP(R),375
...,...,...,...
218450,7112622236,SLP(R),42
218451,7112622236,SMP(R),24
218452,7112623021,AL(L),468
218453,7112623023,AL(L),14


In [9]:
# merge roi data with neuron data
neuron_roi_all = pd.merge(all_neuron_data[['bodyId', 'type']], roi_all_types, on='bodyId')
neuron_roi_all

Unnamed: 0,bodyId,type,roi,weight
0,200326126,CL141,AVLP(R),2
1,200326126,CL141,IB,16
2,200326126,CL141,ICL(R),241
3,200326126,CL141,LH(R),6
4,200326126,CL141,PLP(R),375
...,...,...,...,...
218450,7112622236,,SLP(R),42
218451,7112622236,,SMP(R),24
218452,7112623021,,AL(L),468
218453,7112623023,,AL(L),14


In [10]:
roi_filtered = neuron_roi_all[['roi', 'type', 'weight']]
roi_dropped_filtered = roi_filtered.dropna().reset_index(drop=True)
roi_dropped_filtered

Unnamed: 0,roi,type,weight
0,AVLP(R),CL141,2
1,IB,CL141,16
2,ICL(R),CL141,241
3,LH(R),CL141,6
4,PLP(R),CL141,375
...,...,...,...
110458,SMP(R),LAL137,105
110459,VES(L),LAL137,111
110460,VES(R),LAL137,4
110461,bL(L),LAL137,1


In [11]:
# Filter out the unknown celltypes
for i in range(len(roi_dropped_filtered)):
    if roi_dropped_filtered['type'][i][:3] in ['CRE', 'SMP', 'SIP', 'SLP', 'LAL', 'AOT', 'PLP', 'WED', 'AVL', 'ATL', 'AVL', 'VES', 'SAD']:
        roi_dropped_filtered['type'][i] = ''
    # get the first 2 letters of the celltype
    elif roi_dropped_filtered['type'][i][:2] in ['LT', 'CL', 'IB', 'PL', 'PS', 'PV']:
        roi_dropped_filtered['type'][i] = ''
    else:
        continue
roi_dropped_filtered

Unnamed: 0,roi,type,weight
0,AVLP(R),,2
1,IB,,16
2,ICL(R),,241
3,LH(R),,6
4,PLP(R),,375
...,...,...,...
110458,SMP(R),,105
110459,VES(L),,111
110460,VES(R),,4
110461,bL(L),,1


In [12]:
all_roi_h = pd.read_excel('/Users/rhessa/oviIN-inputs/data/all_roi_df.xlsx')
all_roi_h

Unnamed: 0,Super,Super_split,L1,L2,L3,L4,Color,Color2
0,AL,AL(L),AL(L),AL(L),,,cadetblue,
1,AL,,,AL-D(L),,,,cadetblue
2,AL,,,AL-DA2(L),,,,cadetblue
3,AL,,,AL-DA3(L),,,,cadetblue
4,AL,,,AL-DA4m(L),,,,cadetblue
...,...,...,...,...,...,...,...,...
225,VMNP,,,VES(L),,,,firebrick
226,VMNP,,,VES(R),,,,firebrick
227,AL,AL(L),mALT(L),mALT(L),,,cadetblue,
228,AL,AL(R),mALT(R),mALT(R),,,cadetblue,


In [13]:
#map roi to super roi
roi_super_dict = dict(zip(all_roi_h['L2'], all_roi_h['Super']))
roi_dropped_filtered['super_roi'] = roi_dropped_filtered['roi'].map(roi_super_dict)
roi_dropped_filtered

Unnamed: 0,roi,type,weight,super_roi
0,AVLP(R),,2,VLNP
1,IB,,16,INP
2,ICL(R),,241,INP
3,LH(R),,6,LH
4,PLP(R),,375,VLNP
...,...,...,...,...
110458,SMP(R),,105,SNP
110459,VES(L),,111,VMNP
110460,VES(R),,4,VMNP
110461,bL(L),,1,MB


In [14]:
# Go through type column and replace an empty string with "unknown" and replace the rest with "known"
for i in range(len(roi_dropped_filtered)):
    if roi_dropped_filtered['type'][i] == '':
        roi_dropped_filtered['type'][i] = 'unknown'
    else:
        roi_dropped_filtered['type'][i] = 'known'

roi_dropped_filtered

Unnamed: 0,roi,type,weight,super_roi
0,AVLP(R),unknown,2,VLNP
1,IB,unknown,16,INP
2,ICL(R),unknown,241,INP
3,LH(R),unknown,6,LH
4,PLP(R),unknown,375,VLNP
...,...,...,...,...
110458,SMP(R),unknown,105,SNP
110459,VES(L),unknown,111,VMNP
110460,VES(R),unknown,4,VMNP
110461,bL(L),unknown,1,MB


In [63]:
# Count up the number of known and unknown in each super_roi
roi_dropped_filtered_grouped = roi_dropped_filtered.groupby(['super_roi', 'type']).count().reset_index()
roi_dropped_filtered_grouped = roi_dropped_filtered_grouped.drop(columns=['roi'])

In [64]:
# Count up the weights
roi_dropped_filtered_grouped['weight'] = roi_dropped_filtered_grouped['weight'].astype(int)
roi_dropped_filtered_grouped['weight'].sum()

106121

In [65]:
roi_dropped_filtered_grouped.sort_values(by='super_roi', ascending=False).reset_index(drop=True)

Unnamed: 0,super_roi,type,weight
0,VMNP,unknown,9088
1,VMNP,known,1306
2,VLNP,unknown,12061
3,VLNP,known,6676
4,SNP,unknown,11750
5,SNP,known,6354
6,PENP,unknown,1143
7,PENP,known,351
8,OL,unknown,338
9,OL,known,3020


In [66]:
# Propotion of known and unknown in each super_roi
roi_here = roi_dropped_filtered_grouped['super_roi'].unique()
for i in roi_here:
    roi_dropped_filtered_grouped.loc[(roi_dropped_filtered_grouped['super_roi'] == i), 'prop'] = roi_dropped_filtered_grouped.loc[(roi_dropped_filtered_grouped['super_roi'] == i), 'weight'] / roi_dropped_filtered_grouped.loc[(roi_dropped_filtered_grouped['super_roi'] == i), 'weight'].sum()

In [67]:
roi_dropped_filtered_grouped['prop'] = roi_dropped_filtered_grouped['prop']*100
roi_grouping_final = roi_dropped_filtered_grouped.sort_values(by=['type','prop'], ascending=False).reset_index(drop=True)

In [68]:
roi_grouping_final

Unnamed: 0,super_roi,type,weight,prop
0,VMNP,unknown,9088,87.435059
1,GNG,unknown,908,85.741265
2,PENP,unknown,1143,76.506024
3,INP,unknown,13087,72.737884
4,SNP,unknown,11750,64.902784
5,VLNP,unknown,12061,64.369963
6,LX,unknown,2498,62.465616
7,NotPrimary,unknown,4509,36.784141
8,LH,unknown,1007,34.712168
9,MB,unknown,1020,14.697406
