# Ride or die neurons in the connectome

The goal of this notebook is to find the "ride or die" contingents in oviIN's connectome. These are neurons that stick together throughout all of the modularity resolutions. 
1. Set the maximum resolution to use to assess whether neurons stuck together or not. 
2. Within each module at the maximum resolution, determine whether neurons consistently traveled through the same modules at lower resolutions. If yes, they are a "ride or die" contingent. If no, then the entire module is discarded.
3. Plot a Sankey of the ride or die contingent to check the results.

I expect the results from this analysis to be slightly different from the results of the analysis that Rhessa is working on. There, she is disqualifying modules at every resolution wherever the modules had recombined (crossing partitions). 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [2]:
ovi_in_node_df = pd.read_csv('ovi_preprocessed/preprocessed_inputs-v1.2.1/preprocessed_nodes.csv')
ovi_in_node_df

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
0,1003215282,1,1,1,1,1,1,1,CL229_R,CL229,...,Traced,False,Roughly traced,PDM19,301.0,"[23044, 14981, 11600]","{'INP': {'pre': 87, 'post': 351, 'downstream':...",,"['EPA(R)', 'GOR(R)', 'IB', 'ICL(R)', 'INP', 'S...","['GOR(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S..."
1,1005952640,2,2,1,1,2,2,2,IB058_R,IB058,...,Traced,False,Roughly traced,PVL20,,,"{'INP': {'pre': 464, 'post': 1327, 'downstream...",,"['ATL(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S...","['ATL(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S..."
2,1006928515,3,1,1,1,3,3,3,CL300_R,CL300,...,Traced,False,Roughly traced,PVL13,236.0,"[12083, 10523, 16816]","{'INP': {'pre': 79, 'post': 126, 'downstream':...",,"['ATL(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S...","['ATL(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S..."
3,1007260806,4,2,1,1,4,4,4,CL301_R,CL301,...,Traced,False,Roughly traced,PVL13,236.0,"[13524, 10108, 16480]","{'INP': {'pre': 40, 'post': 128, 'downstream':...",,"['GOR(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S...","['IB', 'ICL(R)', 'INP', 'PLP(R)', 'SCL(R)', 'S..."
4,1008024276,5,3,2,2,5,5,5,FB5N_R,FB5N,...,Traced,False,Roughly traced,AVM08,472.5,"[19178, 29711, 37312]","{'SNP(L)': {'post': 5, 'upstream': 5, 'mito': ...",SMPCREFB5_4,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,987273073,2507,3,8,8,409,604,629,(PVL05)_L,,...,Traced,False,Roughly traced,,,,"{'SNP(R)': {'pre': 65, 'post': 52, 'downstream...",,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2507,987842109,2508,3,9,23,533,780,815,,,...,Orphan,,Orphan hotknife,,,,"{'SNP(R)': {'pre': 2, 'post': 13, 'downstream'...",,"['SMP(R)', 'SNP(R)']","['SMP(R)', 'SNP(R)']"
2508,988567837,2509,2,3,4,16,58,63,FB4G_R,FB4G,...,Traced,False,Roughly traced,AVM08,,,"{'SNP(R)': {'pre': 6, 'post': 73, 'downstream'...",CRELALFB4_3,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
2509,988909130,2510,2,3,4,389,559,572,FB5V_R,FB5V,...,Traced,False,Roughly traced,AVM10,296.5,"[13226, 32024, 18600]","{'SNP(R)': {'pre': 1, 'post': 28, 'downstream'...",CRELALFB5,"['AB(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX',...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."


In [3]:
# take only the columns with modularity data to test function
ovi_HB_chunk = ovi_in_node_df[['0.0','0.05','0.1']].copy()

# Sankey function that is broken
I tried turning my Sankey plotting into this neato function but there is a problem with it that I haven't figured out yet. 

In [None]:
import plotly.graph_objects as go
def create_Sankey(df0):
    """Girl, turn all that code into a function! Only pass a dataframe that has resolution columns and no other columns."""

    # make a copy of the dataframe since we will be modifying it in the function
    df = df0.copy()

    # get the columns of the dataframe
    columns = df.columns

    # add a column of ones to ovi_HB_chunk
    df['counts'] = 1

    # these are for the nodes and links that will be used in the sankey diagram
    nodes = []
    links = pd.DataFrame()

    # append _r0.0 to values in column '0.0' and so on
    for col in columns:
        nodes = nodes + df[col].unique().tolist()
        print(col)
        if col != columns[-1]:
            # create the Sankey levels
            df2 = df.groupby([col,columns[columns.get_loc(col)+1]])['counts'].count().reset_index()
            df2.columns = ['source','target','value']
            links = pd.concat([links, df2], axis=0)
        
        # rename columns after doing the above
        #df[col] = df[col].astype(str) + '_r' + col
        
    # this is basically a mapping dictionary of nodes enumerated
    mapping_dict = {k: v for v, k in enumerate(nodes)}

    # replace source and target with enumerated values
    links['source'] = links['source'].map(mapping_dict)
    links['target'] = links['target'].map(mapping_dict)

    # turn this table into a dictionary for making the sankey diagram
    links_dict = links.to_dict(orient='list')

    # plot it
    fig = go.Figure(data=[go.Sankey(
        node = dict(
            pad = 15,
            thickness=20,
            #line=dict(color='blue', width=0.5),
            label = nodes,
            color='green'
        ),
        link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
        )
    )
    ])
    #fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
    fig.show()

    #return nodes, links

In [None]:
create_Sankey(ovi_HB_chunk)

In [None]:
mynodes, mylinks = create_Sankey(ovi_HB_chunk)

# Sankey of oviINr inputs connectome for only 0.0, 0.05, and 0.1 res
I want to see a Sankey first. I'm going to try this out using resolution 0.1 as the maximum resolution.

In [4]:
# take only the columns with modularity data
ovi_HB_chunk = ovi_in_node_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

In [5]:
# append _r0.0 to values in column '0.0' and so on
ovi_HB_chunk['0.0'] = ovi_HB_chunk['0.0'].astype(str) + '_r0.0'
ovi_HB_chunk['0.05'] = ovi_HB_chunk['0.05'].astype(str) + '_r0.05'
ovi_HB_chunk['0.1'] = ovi_HB_chunk['0.1'].astype(str) + '_r0.1'
ovi_HB_chunk['0.5'] = ovi_HB_chunk['0.5'].astype(str) + '_r0.5'
ovi_HB_chunk['0.75'] = ovi_HB_chunk['0.75'].astype(str) + '_r0.75'
ovi_HB_chunk['1.0'] = ovi_HB_chunk['1.0'].astype(str) + '_r1.0'

# add a column of ones to ovi_HB_chunk
ovi_HB_chunk['counts'] = 1
ovi_HB_chunk

Unnamed: 0,0.0,0.05,0.1,0.5,0.75,1.0,counts
0,1_r0.0,1_r0.05,1_r0.1,1_r0.5,1_r0.75,1_r1.0,1
1,2_r0.0,1_r0.05,1_r0.1,2_r0.5,2_r0.75,2_r1.0,1
2,1_r0.0,1_r0.05,1_r0.1,3_r0.5,3_r0.75,3_r1.0,1
3,2_r0.0,1_r0.05,1_r0.1,4_r0.5,4_r0.75,4_r1.0,1
4,3_r0.0,2_r0.05,2_r0.1,5_r0.5,5_r0.75,5_r1.0,1
...,...,...,...,...,...,...,...
2506,3_r0.0,8_r0.05,8_r0.1,409_r0.5,604_r0.75,629_r1.0,1
2507,3_r0.0,9_r0.05,23_r0.1,533_r0.5,780_r0.75,815_r1.0,1
2508,2_r0.0,3_r0.05,4_r0.1,16_r0.5,58_r0.75,63_r1.0,1
2509,2_r0.0,3_r0.05,4_r0.1,389_r0.5,559_r0.75,572_r1.0,1


In [6]:
# these are the nodes that will be used in the sankey diagram
#nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist() + ovi_HB_chunk['0.5'].unique().tolist() + ovi_HB_chunk['0.75'].unique().tolist() + ovi_HB_chunk['1.0'].unique().tolist()
nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist()

In [7]:
# use this for sankeys with only 2 levels
df2 = ovi_HB_chunk.groupby(['0.0','0.05'])['counts'].count().reset_index()
df2.columns = ['source','target','value']

In [8]:
# use this for sankeys with >2 levels
df3 = ovi_HB_chunk[['0.05','0.1','counts']].groupby(['0.05','0.1']).count().reset_index()
df3.columns = ['source','target','value']

df4 = ovi_HB_chunk[['0.1','0.5','counts']].groupby(['0.1','0.5']).count().reset_index()
df4.columns = ['source','target','value']

df5 = ovi_HB_chunk[['0.5','0.75','counts']].groupby(['0.5','0.75']).count().reset_index()
df5.columns = ['source','target','value']

df6 = ovi_HB_chunk[['0.75','1.0','counts']].groupby(['0.75','1.0']).count().reset_index()
df6.columns = ['source','target','value']

In [9]:
#links = pd.concat([df2, df3, df4, df5, df6], axis=0)
links = pd.concat([df2, df3], axis=0)

In [10]:
# this is basically a mapping dictionary of nodes enumerated
mapping_dict = {k: v for v, k in enumerate(nodes)}

In [11]:
# replace source and target with enumerated values
links['source'] = links['source'].map(mapping_dict)
links['target'] = links['target'].map(mapping_dict)

In [12]:
# turn this table into a dictionary for making the sankey diagram
links_dict = links.to_dict(orient='list')

In [13]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        #color='green'
    ),
    link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
    )
    
)
])
fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

Looking by eye, there seems to be a lot of stuff that rides together through the resolutions. For the algorithm, we will loop through each module of the 0.1 resolution and test for togetherness in the coarser resolutions.

# Ride or die from 0.1 max res

In [14]:
# the max res to use
max_res = '0.1'

# the df to use since we don't need body ids
test = ovi_in_node_df[['0.0','0.05','0.1']].copy()

# get a list of module ids
module_ids = test[max_res].unique().tolist()


In [15]:
# create an empty list to store the module ids of the riders
ride_or_die_modules = []

for i in module_ids:
    # grab the rows that have the module id
    test_mod = test[test[max_res]==i]

    # first test whether there is more than one row since singletons won't count
    if test_mod.shape[0] > 1:

        # tests for number of unique rows
        if test_mod.drop_duplicates().shape[0] == 1:
            # if all rows are duplicates of each other, then append the module id to the list
            ride_or_die_modules.append(i)

# retrieve the rows of the original dataframe that have ride or die modules
ride_or_die_df = ovi_in_node_df[ovi_in_node_df[max_res].isin(ride_or_die_modules)]
ride_or_die_df


Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
51,1036503560,52,2,7,10,31,35,35,LHPV3a1_R,LHPV3a1,...,Traced,False,Roughly traced,PVL10,286.0,"[2035, 16392, 14088]","{'SNP(R)': {'post': 3, 'upstream': 3}, 'SLP(R)...",,"['AVLP(R)', 'EPA(L)', 'EPA(R)', 'GC', 'INP', '...","['AVLP(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EP..."
219,1223388206,220,2,7,10,31,35,35,WEDPN17_a_R,WEDPN17_a,...,Traced,False,Roughly traced,AVL09,,,"{'VLNP(R)': {'pre': 244, 'post': 127, 'downstr...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LAL(-G...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LAL(-G..."
229,1232041867,230,2,7,10,31,124,134,,,...,Traced,True,Leaves,,,,"{'INP': {'pre': 6, 'post': 16, 'downstream': 7...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(L)', 'EPA...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(R)', 'INP..."
242,1283173461,243,2,7,22,102,130,140,PS127_R,PS127,...,Traced,False,Roughly traced,PDM17,301.0,"[25778, 15112, 15296]","{'INP': {'pre': 374, 'post': 368, 'downstream'...",,"['IB', 'ICL(R)', 'INP', 'IPS(R)', 'LAL(-GA)(R)...","['IB', 'ICL(R)', 'INP', 'IPS(R)', 'PLP(R)', 'S..."
256,1313849992,257,2,7,22,102,130,140,PS098_L,PS098,...,Traced,False,Roughly traced,AVM19,334.0,"[25538, 32016, 17080]","{'INP': {'pre': 267, 'post': 144, 'downstream'...",,"['EPA(R)', 'IB', 'ICL(R)', 'INP', 'IPS(R)', 'P...","['EPA(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S..."
307,1448929584,308,2,7,10,31,124,134,,,...,Traced,True,Leaves,,,,"{'INP': {'pre': 10, 'post': 7, 'downstream': 7...",,"['EPA(L)', 'EPA(R)', 'GC', 'ICL(R)', 'INP', 'L...","['EPA(L)', 'EPA(R)', 'GC', 'ICL(R)', 'INP', 'L..."
367,1633078450,368,2,1,29,149,196,195,,,...,Traced,True,Leaves,,,,"{'LX(R)': {'pre': 165, 'post': 63, 'downstream...",,"['LAL(-GA)(R)', 'LAL(L)', 'LAL(R)', 'LX(L)', '...","['EPA(R)', 'LAL(-GA)(R)', 'LAL(L)', 'LAL(R)', ..."
403,1758962229,404,2,1,29,149,196,195,,,...,Orphan,,Orphan,,,,"{'LX(R)': {'pre': 1, 'post': 10, 'downstream':...",,"['LAL(-GA)(R)', 'LAL(R)', 'LX(R)']","['LAL(-GA)(R)', 'LAL(R)', 'LX(R)']"
546,2254523290,547,2,60,57,227,287,249,,,...,Orphan,,Orphan,,,,"{'VMNP': {'post': 3, 'upstream': 3, 'mito': 2,...",,"['FLA(R)', 'PENP', 'VES(R)', 'VMNP']","['FLA(R)', 'PENP']"
1259,5813016625,1260,2,7,10,31,124,134,,,...,Traced,True,Leaves,,,,"{'LX(R)': {'pre': 69, 'post': 28, 'downstream'...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(L)', 'EPA...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(L)', 'EPA..."


In [16]:
ride_or_die_modules

[10, 22, 29, 57, 65, 69, 75]

Make a Sankey.

In [18]:
# take only the columns with modularity data
ovi_HB_chunk = ride_or_die_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# append _r0.0 to values in column '0.0' and so on
ovi_HB_chunk['0.0'] = ovi_HB_chunk['0.0'].astype(str) + '_r0.0'
ovi_HB_chunk['0.05'] = ovi_HB_chunk['0.05'].astype(str) + '_r0.05'
ovi_HB_chunk['0.1'] = ovi_HB_chunk['0.1'].astype(str) + '_r0.1'
ovi_HB_chunk['0.5'] = ovi_HB_chunk['0.5'].astype(str) + '_r0.5'
ovi_HB_chunk['0.75'] = ovi_HB_chunk['0.75'].astype(str) + '_r0.75'
ovi_HB_chunk['1.0'] = ovi_HB_chunk['1.0'].astype(str) + '_r1.0'

# add a column of ones to ovi_HB_chunk
ovi_HB_chunk['counts'] = 1

In [19]:
# these are the nodes that will be used in the sankey diagram
#nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist() + ovi_HB_chunk['0.5'].unique().tolist() + ovi_HB_chunk['0.75'].unique().tolist() + ovi_HB_chunk['1.0'].unique().tolist()
nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist()

In [20]:
# use this for sankeys with only 2 levels
df2 = ovi_HB_chunk.groupby(['0.0','0.05'])['counts'].count().reset_index()
df2.columns = ['source','target','value']

In [21]:
# use this for sankeys with >2 levels
df3 = ovi_HB_chunk[['0.05','0.1','counts']].groupby(['0.05','0.1']).count().reset_index()
df3.columns = ['source','target','value']

df4 = ovi_HB_chunk[['0.1','0.5','counts']].groupby(['0.1','0.5']).count().reset_index()
df4.columns = ['source','target','value']

df5 = ovi_HB_chunk[['0.5','0.75','counts']].groupby(['0.5','0.75']).count().reset_index()
df5.columns = ['source','target','value']

df6 = ovi_HB_chunk[['0.75','1.0','counts']].groupby(['0.75','1.0']).count().reset_index()
df6.columns = ['source','target','value']

In [22]:
#links = pd.concat([df2, df3, df4, df5, df6], axis=0)
links = pd.concat([df2, df3], axis=0)

In [23]:
# this is basically a mapping dictionary of nodes enumerated
mapping_dict = {k: v for v, k in enumerate(nodes)}

In [24]:
# replace source and target with enumerated values
links['source'] = links['source'].map(mapping_dict)
links['target'] = links['target'].map(mapping_dict)

In [25]:
# turn this table into a dictionary for making the sankey diagram
links_dict = links.to_dict(orient='list')

In [26]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        #color='green'
    ),
    link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
    )
    
)
])
fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

Interestingly, only stuff from coarse clusters 2 and 4 survived. I would've expected something from coarse cluster 3 to pull through but looking at cluster 2 from the 0.1 resolution, most of the stuff in there rides together but it only takes a few errant nodes to contaminate the entire module. This makes me wonder whether chosing a finer maximum resolution might actually retain more stuff.

In [27]:
# test
test = ovi_in_node_df[['0.0','0.05','0.1']].copy()
test = test[test[max_res]==2]
test 

Unnamed: 0,0.0,0.05,0.1
4,3,2,2
7,3,2,2
8,3,2,2
9,3,2,2
12,3,2,2
...,...,...,...
2487,3,2,2
2488,3,2,2
2490,3,2,2
2491,3,2,2


In [28]:
test.drop_duplicates().shape[0]

3

Using a finer maximum resolution.

# Ride or die from 1.0 max res

In [148]:
# the max res to use
max_res = '1.0'

# the df to use since we don't need body ids
test = ovi_in_node_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# get a list of module ids
module_ids = test[max_res].unique().tolist()

In [149]:
# create an empty list to store the module ids of the riders
ride_or_die_modules = []

for i in module_ids:
    # grab the rows that have the module id
    test_mod = test[test[max_res]==i]

    # first test whether there is more than one row since singletons won't count
    if test_mod.shape[0] > 1:

        # tests for number of unique rows
        if test_mod.drop_duplicates().shape[0] == 1:
            # if all rows are duplicates of each other, then append the module id to the list
            ride_or_die_modules.append(i)

# retrieve the rows of the original dataframe that have ride or die modules
ride_or_die_df = ovi_in_node_df[ovi_in_node_df[max_res].isin(ride_or_die_modules)]
ride_or_die_df

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
4,1008024276,5,3,2,2,5,5,5,FB5N_R,FB5N,...,Traced,False,Roughly traced,AVM08,472.5,"[19178, 29711, 37312]","{'SNP(L)': {'post': 5, 'upstream': 5, 'mito': ...",SMPCREFB5_4,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
5,1008085163,6,1,1,1,6,6,6,,,...,Traced,True,Leaves,,,,"{'SNP(L)': {'pre': 10, 'post': 9, 'downstream'...",,"['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']","['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']"
6,1008102479,7,2,1,1,7,7,7,(PVM01)_L,,...,Traced,False,Roughly traced,,,,"{'SNP(L)': {'post': 4, 'upstream': 4, 'mito': ...",,"['EPA(L)', 'GOR(L)', 'ICL(L)', 'INP', 'SCL(L)'...","['GC', 'INP', 'SCL(L)', 'SPS(L)', 'SPS(R)', 'V..."
7,1008369339,8,3,2,2,5,5,8,FS1B(FQ9)_C6_R,FS1B,...,Traced,False,Roughly traced,PDM02,301.0,"[22606, 16349, 10032]","{'SNP(L)': {'pre': 43, 'post': 24, 'downstream...",FB25SMPSMP,"['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FBl2',...","['CRE(-RUB)(L)', 'CRE(L)', 'INP', 'SMP(L)', 'S..."
8,1008391068,9,3,2,2,5,5,5,FC2B(FQ7)_C6_L,FC2B,...,Traced,False,Roughly traced,PDM03,325.5,"[29681, 16321, 7328]","{'CX': {'pre': 167, 'post': 2135, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2491,983430664,2492,3,2,2,5,5,20,FS1A(FQ9)_C3_R,FS1A,...,Traced,False,Roughly traced,PDM03,236.0,"[16993, 11130, 15072]","{'SNP(R)': {'pre': 37, 'post': 32, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2495,983767602,2496,3,2,2,5,5,20,FS1B(FQ9)_C2_L,FS1B,...,Traced,False,Roughly traced,PDM01,301.0,"[27031, 16664, 12416]","{'SNP(R)': {'pre': 62, 'post': 30, 'downstream...",FB25SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2503,986098785,2504,2,1,6,427,644,676,AOTU008_a_R,AOTU008_a,...,Traced,False,Roughly traced,ADL08,258.5,"[10014, 33018, 17344]","{'SNP(R)': {'pre': 111, 'post': 179, 'downstre...",aSP11b candidates,"['AOTU(R)', 'ATL(R)', 'CRE(-ROB,-RUB)(R)', 'CR...","['AOTU(R)', 'ATL(R)', 'IB', 'INP', 'MB(+ACA)(R..."
2505,987117151,2506,2,7,7,53,63,68,LAL010_R,LAL010,...,Traced,False,Roughly traced,ADM03,292.5,"[15520, 32234, 10016]","{'SNP(R)': {'pre': 2, 'post': 82, 'downstream'...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(R)', 'INP...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(R)', 'INP..."


Make a Sankey.

In [31]:
# take only the columns with modularity data
ovi_HB_chunk = ride_or_die_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# append _r0.0 to values in column '0.0' and so on
ovi_HB_chunk['0.0'] = ovi_HB_chunk['0.0'].astype(str) + '_r0.0'
ovi_HB_chunk['0.05'] = ovi_HB_chunk['0.05'].astype(str) + '_r0.05'
ovi_HB_chunk['0.1'] = ovi_HB_chunk['0.1'].astype(str) + '_r0.1'
ovi_HB_chunk['0.5'] = ovi_HB_chunk['0.5'].astype(str) + '_r0.5'
ovi_HB_chunk['0.75'] = ovi_HB_chunk['0.75'].astype(str) + '_r0.75'
ovi_HB_chunk['1.0'] = ovi_HB_chunk['1.0'].astype(str) + '_r1.0'

# add a column of ones to ovi_HB_chunk
ovi_HB_chunk['counts'] = 1
ovi_HB_chunk

Unnamed: 0,0.0,0.05,0.1,0.5,0.75,1.0,counts
4,3_r0.0,2_r0.05,2_r0.1,5_r0.5,5_r0.75,5_r1.0,1
5,1_r0.0,1_r0.05,1_r0.1,6_r0.5,6_r0.75,6_r1.0,1
6,2_r0.0,1_r0.05,1_r0.1,7_r0.5,7_r0.75,7_r1.0,1
7,3_r0.0,2_r0.05,2_r0.1,5_r0.5,5_r0.75,8_r1.0,1
8,3_r0.0,2_r0.05,2_r0.1,5_r0.5,5_r0.75,5_r1.0,1
...,...,...,...,...,...,...,...
2491,3_r0.0,2_r0.05,2_r0.1,5_r0.5,5_r0.75,20_r1.0,1
2495,3_r0.0,2_r0.05,2_r0.1,5_r0.5,5_r0.75,20_r1.0,1
2503,2_r0.0,1_r0.05,6_r0.1,427_r0.5,644_r0.75,676_r1.0,1
2505,2_r0.0,7_r0.05,7_r0.1,53_r0.5,63_r0.75,68_r1.0,1


In [32]:
# these are the nodes that will be used in the sankey diagram
nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist() + ovi_HB_chunk['0.5'].unique().tolist() + ovi_HB_chunk['0.75'].unique().tolist() + ovi_HB_chunk['1.0'].unique().tolist()
#nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist()

In [33]:
# use this for sankeys with only 2 levels
df2 = ovi_HB_chunk.groupby(['0.0','0.05'])['counts'].count().reset_index()
df2.columns = ['source','target','value']

# use this for sankeys with >2 levels
df3 = ovi_HB_chunk[['0.05','0.1','counts']].groupby(['0.05','0.1']).count().reset_index()
df3.columns = ['source','target','value']

df4 = ovi_HB_chunk[['0.1','0.5','counts']].groupby(['0.1','0.5']).count().reset_index()
df4.columns = ['source','target','value']

df5 = ovi_HB_chunk[['0.5','0.75','counts']].groupby(['0.5','0.75']).count().reset_index()
df5.columns = ['source','target','value']

df6 = ovi_HB_chunk[['0.75','1.0','counts']].groupby(['0.75','1.0']).count().reset_index()
df6.columns = ['source','target','value']

In [34]:
links = pd.concat([df2, df3, df4, df5, df6], axis=0)
#links = pd.concat([df2, df3], axis=0)

In [35]:
# this is basically a mapping dictionary of nodes enumerated
mapping_dict = {k: v for v, k in enumerate(nodes)}

In [36]:
# replace source and target with enumerated values
links['source'] = links['source'].map(mapping_dict)
links['target'] = links['target'].map(mapping_dict)

In [37]:
# turn this table into a dictionary for making the sankey diagram
links_dict = links.to_dict(orient='list')

In [38]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        #color='green'
    ),
    link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
    )
    
)
])
fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

In [150]:
ride_or_die_df[ride_or_die_df['0.0']==3]

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
4,1008024276,5,3,2,2,5,5,5,FB5N_R,FB5N,...,Traced,False,Roughly traced,AVM08,472.5,"[19178, 29711, 37312]","{'SNP(L)': {'post': 5, 'upstream': 5, 'mito': ...",SMPCREFB5_4,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
7,1008369339,8,3,2,2,5,5,8,FS1B(FQ9)_C6_R,FS1B,...,Traced,False,Roughly traced,PDM02,301.0,"[22606, 16349, 10032]","{'SNP(L)': {'pre': 43, 'post': 24, 'downstream...",FB25SMPSMP,"['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FBl2',...","['CRE(-RUB)(L)', 'CRE(L)', 'INP', 'SMP(L)', 'S..."
8,1008391068,9,3,2,2,5,5,5,FC2B(FQ7)_C6_L,FC2B,...,Traced,False,Roughly traced,PDM03,325.5,"[29681, 16321, 7328]","{'CX': {'pre': 167, 'post': 2135, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
9,1008693605,10,3,2,2,5,5,9,FC2C(FQ7)_C7_L,FC2C,...,Traced,False,Roughly traced,PDM03,291.0,"[32830, 14210, 12336]","{'CX': {'pre': 91, 'post': 1051, 'downstream':...",FB1d367CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
12,1009371535,13,3,2,2,5,5,10,FC2B(FQ7)_C3_L,FC2B,...,Traced,False,Roughly traced,PDM01,277.5,"[28298, 18341, 7600]","{'CX': {'pre': 130, 'post': 2051, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2489,983202377,2490,3,8,8,446,677,707,,,...,Traced,True,Leaves,,,,"{'SNP(L)': {'pre': 97, 'post': 63, 'downstream...",,"['CRE(-RUB)(L)', 'CRE(L)', 'INP', 'LAL(L)', 'L...","['CRE(-RUB)(L)', 'CRE(L)', 'INP', 'LAL(L)', 'L..."
2490,983426636,2491,3,2,2,5,5,77,FS1A(FQ9)_C6_L,FS1A,...,Traced,False,Roughly traced,PDM03,271.0,"[30775, 16354, 10640]","{'SNP(L)': {'pre': 19, 'post': 14, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2491,983430664,2492,3,2,2,5,5,20,FS1A(FQ9)_C3_R,FS1A,...,Traced,False,Roughly traced,PDM03,236.0,"[16993, 11130, 15072]","{'SNP(R)': {'pre': 37, 'post': 32, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2495,983767602,2496,3,2,2,5,5,20,FS1B(FQ9)_C2_L,FS1B,...,Traced,False,Roughly traced,PDM01,301.0,"[27031, 16664, 12416]","{'SNP(R)': {'pre': 62, 'post': 30, 'downstream...",FB25SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."


Using a higher maximum resolution did retain more nodes. We now have about 30% of the input nodes qualifying as ride or die. But something a little strange is happening. I'm seeing crossover.

I took an example below and found that this crossover is valid. There are fine modules that not only stick together but they break off and recombine with other modules together too. They really are ride or die 4 life.

In [39]:
ride_or_die_df[(ride_or_die_df['0.05'] == 8) & (ride_or_die_df['0.1'] == 20)]

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
1131,579230376,1132,3,8,20,341,512,511,SMP031_R,SMP031,...,Traced,False,Traced,ADL15,246.0,"[3685, 24096, 13480]","{'SNP(R)': {'pre': 128, 'post': 560, 'downstre...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'MB(+AC...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'MB(+AC..."
2185,799586652,2186,3,8,20,341,512,511,MBON05(y4>y1y2)(AVM07)_L,MBON05,...,Traced,False,Roughly traced,,,,"{'SNP(R)': {'pre': 241, 'post': 239, 'downstre...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LAL(-G...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'MB(+AC..."


In [40]:
ovi_in_node_df[ovi_in_node_df['1.0'] == 511]

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
1131,579230376,1132,3,8,20,341,512,511,SMP031_R,SMP031,...,Traced,False,Traced,ADL15,246.0,"[3685, 24096, 13480]","{'SNP(R)': {'pre': 128, 'post': 560, 'downstre...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'MB(+AC...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'MB(+AC..."
2185,799586652,2186,3,8,20,341,512,511,MBON05(y4>y1y2)(AVM07)_L,MBON05,...,Traced,False,Roughly traced,,,,"{'SNP(R)': {'pre': 241, 'post': 239, 'downstre...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LAL(-G...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'MB(+AC..."


How many ride or die neurons are type None? Around 27%. Compare that to 33% for the entire oviINr input connectome. It's more comparable than I would've imagined.

In [41]:
ride_or_die_df[ride_or_die_df['celltype'].isna()]

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
5,1008085163,6,1,1,1,6,6,6,,,...,Traced,True,Leaves,,,,"{'SNP(L)': {'pre': 10, 'post': 9, 'downstream'...",,"['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']","['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']"
6,1008102479,7,2,1,1,7,7,7,(PVM01)_L,,...,Traced,False,Roughly traced,,,,"{'SNP(L)': {'post': 4, 'upstream': 4, 'mito': ...",,"['EPA(L)', 'GOR(L)', 'ICL(L)', 'INP', 'SCL(L)'...","['GC', 'INP', 'SCL(L)', 'SPS(L)', 'SPS(R)', 'V..."
66,1041666949,67,2,3,4,35,39,45,,,...,Traced,False,Roughly traced,,,,"{'SNP(R)': {'pre': 255, 'post': 136, 'downstre...",,"['AVLP(R)', 'CAN(R)', 'CRE(-ROB,-RUB)(R)', 'CR...","['AVLP(R)', 'CAN(R)', 'CRE(-ROB,-RUB)(R)', 'CR..."
86,1049946735,87,2,3,4,16,52,58,(MBDLaxon1),,...,Traced,False,Roughly traced,,,,"{'SNP(R)': {'pre': 217, 'post': 140, 'downstre...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'MB(+AC...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'SMP(R)..."
141,1106944058,142,2,1,1,65,79,89,(PDM12)_L,,...,Traced,False,Roughly traced,,301.0,"[28443, 19496, 9872]","{'INP': {'post': 13, 'upstream': 13, 'mito': 5...",,"['CAN(R)', 'FLA(R)', 'ICL(L)', 'INP', 'PENP', ...","['FLA(R)', 'PENP', 'VES(R)', 'VMNP']"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2334,888577071,2335,3,8,8,366,565,575,,,...,Traced,True,Leaves,,,,"{'SNP(R)': {'pre': 72, 'post': 37, 'downstream...",,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2402,921684387,2403,3,8,8,413,697,736,,,...,Traced,True,Leaves,,,,"{'SNP(R)': {'pre': 29, 'post': 21, 'downstream...",,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2474,978068779,2475,4,15,11,323,708,751,,,...,Traced,True,Leaves,,,,"{'SNP(L)': {'pre': 154, 'post': 137, 'downstre...",,"['ATL(L)', 'ATL(R)', 'IB', 'INP', 'SIP(L)', 'S...","['ATL(L)', 'ATL(R)', 'IB', 'ICL(L)', 'INP', 'S..."
2489,983202377,2490,3,8,8,446,677,707,,,...,Traced,True,Leaves,,,,"{'SNP(L)': {'pre': 97, 'post': 63, 'downstream...",,"['CRE(-RUB)(L)', 'CRE(L)', 'INP', 'LAL(L)', 'L...","['CRE(-RUB)(L)', 'CRE(L)', 'INP', 'LAL(L)', 'L..."


In [42]:
839/2511

0.3341298287534847

In [43]:
193/len(ride_or_die_df)

0.273371104815864

In [44]:
ride_or_die_df[ride_or_die_df['0.1']==75]

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
2263,832646151,2264,2,8,75,499,596,782,SMP214_R,SMP214,...,Traced,False,Roughly traced,PDL06,261.0,"[5943, 19514, 5360]","{'SNP(R)': {'pre': 86, 'post': 134, 'downstrea...",,"['ATL(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'SIP(R)..."
2363,894724798,2364,2,8,75,499,596,782,SMP214_R,SMP214,...,Traced,False,Roughly traced,PDL06,245.0,"[5516, 21403, 4592]","{'SNP(R)': {'pre': 52, 'post': 109, 'downstrea...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'PLP(R)...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'PLP(R)..."


Of course I want to see all the other possible max resolutions.

# Ride or die from 0.75 max res

In [45]:
# the max res to use
max_res = '0.75'

# the df to use since we don't need body ids
test = ovi_in_node_df[['0.0','0.05','0.1','0.5','0.75']].copy()

# get a list of module ids
module_ids = test[max_res].unique().tolist()

In [46]:
# create an empty list to store the module ids of the riders
ride_or_die_modules = []

for i in module_ids:
    # grab the rows that have the module id
    test_mod = test[test[max_res]==i]

    # first test whether there is more than one row since singletons won't count
    if test_mod.shape[0] > 1:

        # tests for number of unique rows
        if test_mod.drop_duplicates().shape[0] == 1:
            # if all rows are duplicates of each other, then append the module id to the list
            ride_or_die_modules.append(i)

# retrieve the rows of the original dataframe that have ride or die modules
ride_or_die_df = ovi_in_node_df[ovi_in_node_df[max_res].isin(ride_or_die_modules)]
ride_or_die_df

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
5,1008085163,6,1,1,1,6,6,6,,,...,Traced,True,Leaves,,,,"{'SNP(L)': {'pre': 10, 'post': 9, 'downstream'...",,"['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']","['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']"
6,1008102479,7,2,1,1,7,7,7,(PVM01)_L,,...,Traced,False,Roughly traced,,,,"{'SNP(L)': {'post': 4, 'upstream': 4, 'mito': ...",,"['EPA(L)', 'GOR(L)', 'ICL(L)', 'INP', 'SCL(L)'...","['GC', 'INP', 'SCL(L)', 'SPS(L)', 'SPS(R)', 'V..."
10,1008966615,11,1,1,1,3,8,3,CL300_R,CL300,...,Traced,False,Roughly traced,PVL13,236.0,"[13492, 9603, 16400]","{'INP': {'pre': 42, 'post': 132, 'downstream':...",,"['IB', 'ICL(R)', 'INP', 'PLP(R)', 'SCL(R)', 'S...","['IB', 'ICL(R)', 'INP', 'PLP(R)', 'VLNP(R)']"
21,1013066710,22,3,3,4,16,16,17,CRE071_R,CRE071,...,Traced,False,Roughly traced,AVM17,396.5,"[11636, 34239, 26536]","{'SNP(R)': {'pre': 12, 'post': 95, 'downstream...",,"['AL(R)', 'AL-DP1l(R)', 'CRE(-ROB,-RUB)(R)', '...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
23,1013416147,24,2,3,4,18,18,18,CRE100_R,CRE100,...,Traced,False,Roughly traced,PDM27,401.0,"[23180, 15645, 12816]","{'SNP(L)': {'pre': 1, 'post': 3, 'downstream':...",CL-VeSp,"['CAN(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'FLA...","['CAN(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'FLA..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2489,983202377,2490,3,8,8,446,677,707,,,...,Traced,True,Leaves,,,,"{'SNP(L)': {'pre': 97, 'post': 63, 'downstream...",,"['CRE(-RUB)(L)', 'CRE(L)', 'INP', 'LAL(L)', 'L...","['CRE(-RUB)(L)', 'CRE(L)', 'INP', 'LAL(L)', 'L..."
2494,983767417,2495,2,7,7,31,752,134,LAL175_R,LAL175,...,Traced,False,Roughly traced,PVM03,335.0,"[17040, 9200, 20880]","{'SNP(R)': {'pre': 1, 'post': 13, 'downstream'...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LAL(-G...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LAL(-G..."
2503,986098785,2504,2,1,6,427,644,676,AOTU008_a_R,AOTU008_a,...,Traced,False,Roughly traced,ADL08,258.5,"[10014, 33018, 17344]","{'SNP(R)': {'pre': 111, 'post': 179, 'downstre...",aSP11b candidates,"['AOTU(R)', 'ATL(R)', 'CRE(-ROB,-RUB)(R)', 'CR...","['AOTU(R)', 'ATL(R)', 'IB', 'INP', 'MB(+ACA)(R..."
2505,987117151,2506,2,7,7,53,63,68,LAL010_R,LAL010,...,Traced,False,Roughly traced,ADM03,292.5,"[15520, 32234, 10016]","{'SNP(R)': {'pre': 2, 'post': 82, 'downstream'...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(R)', 'INP...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(R)', 'INP..."


Make a Sankey.

In [47]:
# take only the columns with modularity data
ovi_HB_chunk = ride_or_die_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# append _r0.0 to values in column '0.0' and so on
ovi_HB_chunk['0.0'] = ovi_HB_chunk['0.0'].astype(str) + '_r0.0'
ovi_HB_chunk['0.05'] = ovi_HB_chunk['0.05'].astype(str) + '_r0.05'
ovi_HB_chunk['0.1'] = ovi_HB_chunk['0.1'].astype(str) + '_r0.1'
ovi_HB_chunk['0.5'] = ovi_HB_chunk['0.5'].astype(str) + '_r0.5'
ovi_HB_chunk['0.75'] = ovi_HB_chunk['0.75'].astype(str) + '_r0.75'
ovi_HB_chunk['1.0'] = ovi_HB_chunk['1.0'].astype(str) + '_r1.0'

# add a column of ones to ovi_HB_chunk
ovi_HB_chunk['counts'] = 1
ovi_HB_chunk

Unnamed: 0,0.0,0.05,0.1,0.5,0.75,1.0,counts
5,1_r0.0,1_r0.05,1_r0.1,6_r0.5,6_r0.75,6_r1.0,1
6,2_r0.0,1_r0.05,1_r0.1,7_r0.5,7_r0.75,7_r1.0,1
10,1_r0.0,1_r0.05,1_r0.1,3_r0.5,8_r0.75,3_r1.0,1
21,3_r0.0,3_r0.05,4_r0.1,16_r0.5,16_r0.75,17_r1.0,1
23,2_r0.0,3_r0.05,4_r0.1,18_r0.5,18_r0.75,18_r1.0,1
...,...,...,...,...,...,...,...
2489,3_r0.0,8_r0.05,8_r0.1,446_r0.5,677_r0.75,707_r1.0,1
2494,2_r0.0,7_r0.05,7_r0.1,31_r0.5,752_r0.75,134_r1.0,1
2503,2_r0.0,1_r0.05,6_r0.1,427_r0.5,644_r0.75,676_r1.0,1
2505,2_r0.0,7_r0.05,7_r0.1,53_r0.5,63_r0.75,68_r1.0,1


In [48]:
# these are the nodes that will be used in the sankey diagram
nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist() + ovi_HB_chunk['0.5'].unique().tolist() + ovi_HB_chunk['0.75'].unique().tolist() + ovi_HB_chunk['1.0'].unique().tolist()
#nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist()

In [49]:
# use this for sankeys with only 2 levels
df2 = ovi_HB_chunk.groupby(['0.0','0.05'])['counts'].count().reset_index()
df2.columns = ['source','target','value']

# use this for sankeys with >2 levels
df3 = ovi_HB_chunk[['0.05','0.1','counts']].groupby(['0.05','0.1']).count().reset_index()
df3.columns = ['source','target','value']

df4 = ovi_HB_chunk[['0.1','0.5','counts']].groupby(['0.1','0.5']).count().reset_index()
df4.columns = ['source','target','value']

df5 = ovi_HB_chunk[['0.5','0.75','counts']].groupby(['0.5','0.75']).count().reset_index()
df5.columns = ['source','target','value']

df6 = ovi_HB_chunk[['0.75','1.0','counts']].groupby(['0.75','1.0']).count().reset_index()
df6.columns = ['source','target','value']

In [50]:
links = pd.concat([df2, df3, df4, df5, df6], axis=0)
#links = pd.concat([df2, df3], axis=0)

In [51]:
# this is basically a mapping dictionary of nodes enumerated
mapping_dict = {k: v for v, k in enumerate(nodes)}

In [52]:
# replace source and target with enumerated values
links['source'] = links['source'].map(mapping_dict)
links['target'] = links['target'].map(mapping_dict)

In [53]:
# turn this table into a dictionary for making the sankey diagram
links_dict = links.to_dict(orient='list')

In [54]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        #color='green'
    ),
    link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
    )
    
)
])
fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

# Ride or die from 0.5 max res
Seems like the ride or dies are a smaller and smaller set with lower max resolutions.

In [88]:
# the max res to use
max_res = '0.5'

# the df to use since we don't need body ids
test = ovi_in_node_df[['0.0','0.05','0.1','0.5']].copy()

# get a list of module ids
module_ids = test[max_res].unique().tolist()

In [89]:
# create an empty list to store the module ids of the riders
ride_or_die_modules = []

for i in module_ids:
    # grab the rows that have the module id
    test_mod = test[test[max_res]==i]

    # first test whether there is more than one row since singletons won't count
    if test_mod.shape[0] > 1:

        # tests for number of unique rows
        if test_mod.drop_duplicates().shape[0] == 1:
            # if all rows are duplicates of each other, then append the module id to the list
            ride_or_die_modules.append(i)

# retrieve the rows of the original dataframe that have ride or die modules
ride_or_die_df = ovi_in_node_df[ovi_in_node_df[max_res].isin(ride_or_die_modules)]
ride_or_die_df

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
2,1006928515,3,1,1,1,3,3,3,CL300_R,CL300,...,Traced,False,Roughly traced,PVL13,236.0,"[12083, 10523, 16816]","{'INP': {'pre': 79, 'post': 126, 'downstream':...",,"['ATL(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S...","['ATL(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S..."
3,1007260806,4,2,1,1,4,4,4,CL301_R,CL301,...,Traced,False,Roughly traced,PVL13,236.0,"[13524, 10108, 16480]","{'INP': {'pre': 40, 'post': 128, 'downstream':...",,"['GOR(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S...","['IB', 'ICL(R)', 'INP', 'PLP(R)', 'SCL(R)', 'S..."
5,1008085163,6,1,1,1,6,6,6,,,...,Traced,True,Leaves,,,,"{'SNP(L)': {'pre': 10, 'post': 9, 'downstream'...",,"['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']","['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']"
6,1008102479,7,2,1,1,7,7,7,(PVM01)_L,,...,Traced,False,Roughly traced,,,,"{'SNP(L)': {'post': 4, 'upstream': 4, 'mito': ...",,"['EPA(L)', 'GOR(L)', 'ICL(L)', 'INP', 'SCL(L)'...","['GC', 'INP', 'SCL(L)', 'SPS(L)', 'SPS(R)', 'V..."
10,1008966615,11,1,1,1,3,8,3,CL300_R,CL300,...,Traced,False,Roughly traced,PVL13,236.0,"[13492, 9603, 16400]","{'INP': {'pre': 42, 'post': 132, 'downstream':...",,"['IB', 'ICL(R)', 'INP', 'PLP(R)', 'SCL(R)', 'S...","['IB', 'ICL(R)', 'INP', 'PLP(R)', 'VLNP(R)']"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2503,986098785,2504,2,1,6,427,644,676,AOTU008_a_R,AOTU008_a,...,Traced,False,Roughly traced,ADL08,258.5,"[10014, 33018, 17344]","{'SNP(R)': {'pre': 111, 'post': 179, 'downstre...",aSP11b candidates,"['AOTU(R)', 'ATL(R)', 'CRE(-ROB,-RUB)(R)', 'CR...","['AOTU(R)', 'ATL(R)', 'IB', 'INP', 'MB(+ACA)(R..."
2505,987117151,2506,2,7,7,53,63,68,LAL010_R,LAL010,...,Traced,False,Roughly traced,ADM03,292.5,"[15520, 32234, 10016]","{'SNP(R)': {'pre': 2, 'post': 82, 'downstream'...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(R)', 'INP...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(R)', 'INP..."
2506,987273073,2507,3,8,8,409,604,629,(PVL05)_L,,...,Traced,False,Roughly traced,,,,"{'SNP(R)': {'pre': 65, 'post': 52, 'downstream...",,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2509,988909130,2510,2,3,4,389,559,572,FB5V_R,FB5V,...,Traced,False,Roughly traced,AVM10,296.5,"[13226, 32024, 18600]","{'SNP(R)': {'pre': 1, 'post': 28, 'downstream'...",CRELALFB5,"['AB(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX',...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."


Make a Sankey.

In [77]:
# take only the columns with modularity data
ovi_HB_chunk = ride_or_die_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# append _r0.0 to values in column '0.0' and so on
ovi_HB_chunk['0.0'] = ovi_HB_chunk['0.0'].astype(str) + '_r0.0'
ovi_HB_chunk['0.05'] = ovi_HB_chunk['0.05'].astype(str) + '_r0.05'
ovi_HB_chunk['0.1'] = ovi_HB_chunk['0.1'].astype(str) + '_r0.1'
ovi_HB_chunk['0.5'] = ovi_HB_chunk['0.5'].astype(str) + '_r0.5'
ovi_HB_chunk['0.75'] = ovi_HB_chunk['0.75'].astype(str) + '_r0.75'
ovi_HB_chunk['1.0'] = ovi_HB_chunk['1.0'].astype(str) + '_r1.0'

# add a column of ones to ovi_HB_chunk
ovi_HB_chunk['counts'] = 1

In [58]:
# these are the nodes that will be used in the sankey diagram
nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist() + ovi_HB_chunk['0.5'].unique().tolist() + ovi_HB_chunk['0.75'].unique().tolist() + ovi_HB_chunk['1.0'].unique().tolist()
#nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist()

In [59]:
# use this for sankeys with only 2 levels
df2 = ovi_HB_chunk.groupby(['0.0','0.05'])['counts'].count().reset_index()
df2.columns = ['source','target','value']

# use this for sankeys with >2 levels
df3 = ovi_HB_chunk[['0.05','0.1','counts']].groupby(['0.05','0.1']).count().reset_index()
df3.columns = ['source','target','value']

df4 = ovi_HB_chunk[['0.1','0.5','counts']].groupby(['0.1','0.5']).count().reset_index()
df4.columns = ['source','target','value']

df5 = ovi_HB_chunk[['0.5','0.75','counts']].groupby(['0.5','0.75']).count().reset_index()
df5.columns = ['source','target','value']

df6 = ovi_HB_chunk[['0.75','1.0','counts']].groupby(['0.75','1.0']).count().reset_index()
df6.columns = ['source','target','value']

In [60]:
links = pd.concat([df2, df3, df4, df5, df6], axis=0)
#links = pd.concat([df2, df3], axis=0)

In [61]:
# this is basically a mapping dictionary of nodes enumerated
mapping_dict = {k: v for v, k in enumerate(nodes)}

In [62]:
# replace source and target with enumerated values
links['source'] = links['source'].map(mapping_dict)
links['target'] = links['target'].map(mapping_dict)

In [63]:
# turn this table into a dictionary for making the sankey diagram
links_dict = links.to_dict(orient='list')

In [64]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        #color='green'
    ),
    link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
    )
    
)
])
fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

I was starting to think that the ride or dies from 0.5 max res might be ideal but FS1A isn't in there. Neither is IB017. It feels like this method is still punishing the neurons we probably care about.

In [94]:
ride_or_die_df[ride_or_die_df['celltype']=='FS1A']

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois


In [86]:
ride_or_die_df[ride_or_die_df['0.0']==3]['celltype'].value_counts()

celltype
CRE018       3
CRE056       2
LHPV5g1_a    2
CRE103       2
SIP035       2
            ..
LHPD2c7      1
SMP007       1
SMP541       1
M_spPN4t9    1
SMP023       1
Name: count, Length: 61, dtype: int64

# Ride or die from 0.05 max res
We already did max resolution 0.1 so I'll skip ahead to 0.05. Interestingly, between 0.5 and 0.1 nodes 1, 3, and 5 were eliminated.

In [65]:
# the max res to use
max_res = '0.05'

# the df to use since we don't need body ids
test = ovi_in_node_df[['0.0','0.05']].copy()

# get a list of module ids
module_ids = test[max_res].unique().tolist()

In [66]:
# create an empty list to store the module ids of the riders
ride_or_die_modules = []

for i in module_ids:
    # grab the rows that have the module id
    test_mod = test[test[max_res]==i]

    # first test whether there is more than one row since singletons won't count
    if test_mod.shape[0] > 1:

        # tests for number of unique rows
        if test_mod.drop_duplicates().shape[0] == 1:
            # if all rows are duplicates of each other, then append the module id to the list
            ride_or_die_modules.append(i)

# retrieve the rows of the original dataframe that have ride or die modules
ride_or_die_df = ovi_in_node_df[ovi_in_node_df[max_res].isin(ride_or_die_modules)]
ride_or_die_df

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
546,2254523290,547,2,60,57,227,287,249,,,...,Orphan,,Orphan,,,,"{'VMNP': {'post': 3, 'upstream': 3, 'mito': 2,...",,"['FLA(R)', 'PENP', 'VES(R)', 'VMNP']","['FLA(R)', 'PENP']"
1540,5901230025,1541,2,60,57,227,287,249,,,...,Orphan,,Orphan,,,,"{'VMNP': {'pre': 3, 'downstream': 17}, 'VES(R)...",,"['FLA(R)', 'PENP']","['FLA(R)', 'PENP', 'VES(R)', 'VMNP']"
2015,733100946,2016,1,79,5,468,698,738,,,...,Orphan,,Orphan hotknife,,,,"{'SNP(R)': {'pre': 20, 'post': 19, 'downstream...",,"['SMP(R)', 'SNP(R)']","['SMP(R)', 'SNP(R)']"
2090,763470548,2091,1,79,5,475,709,752,,,...,Orphan,,Orphan hotknife,,,,"{'SNP(R)': {'pre': 6, 'post': 21, 'downstream'...",,"['SMP(R)', 'SNP(R)']","['SMP(R)', 'SNP(R)']"


Make a Sankey.

In [67]:
# take only the columns with modularity data
ovi_HB_chunk = ride_or_die_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# append _r0.0 to values in column '0.0' and so on
ovi_HB_chunk['0.0'] = ovi_HB_chunk['0.0'].astype(str) + '_r0.0'
ovi_HB_chunk['0.05'] = ovi_HB_chunk['0.05'].astype(str) + '_r0.05'
ovi_HB_chunk['0.1'] = ovi_HB_chunk['0.1'].astype(str) + '_r0.1'
ovi_HB_chunk['0.5'] = ovi_HB_chunk['0.5'].astype(str) + '_r0.5'
ovi_HB_chunk['0.75'] = ovi_HB_chunk['0.75'].astype(str) + '_r0.75'
ovi_HB_chunk['1.0'] = ovi_HB_chunk['1.0'].astype(str) + '_r1.0'

# add a column of ones to ovi_HB_chunk
ovi_HB_chunk['counts'] = 1
ovi_HB_chunk

Unnamed: 0,0.0,0.05,0.1,0.5,0.75,1.0,counts
546,2_r0.0,60_r0.05,57_r0.1,227_r0.5,287_r0.75,249_r1.0,1
1540,2_r0.0,60_r0.05,57_r0.1,227_r0.5,287_r0.75,249_r1.0,1
2015,1_r0.0,79_r0.05,5_r0.1,468_r0.5,698_r0.75,738_r1.0,1
2090,1_r0.0,79_r0.05,5_r0.1,475_r0.5,709_r0.75,752_r1.0,1


In [68]:
# these are the nodes that will be used in the sankey diagram
nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist() + ovi_HB_chunk['0.5'].unique().tolist() + ovi_HB_chunk['0.75'].unique().tolist() + ovi_HB_chunk['1.0'].unique().tolist()
#nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist()

In [69]:
# use this for sankeys with only 2 levels
df2 = ovi_HB_chunk.groupby(['0.0','0.05'])['counts'].count().reset_index()
df2.columns = ['source','target','value']

# use this for sankeys with >2 levels
df3 = ovi_HB_chunk[['0.05','0.1','counts']].groupby(['0.05','0.1']).count().reset_index()
df3.columns = ['source','target','value']

df4 = ovi_HB_chunk[['0.1','0.5','counts']].groupby(['0.1','0.5']).count().reset_index()
df4.columns = ['source','target','value']

df5 = ovi_HB_chunk[['0.5','0.75','counts']].groupby(['0.5','0.75']).count().reset_index()
df5.columns = ['source','target','value']

df6 = ovi_HB_chunk[['0.75','1.0','counts']].groupby(['0.75','1.0']).count().reset_index()
df6.columns = ['source','target','value']

In [70]:
links = pd.concat([df2, df3, df4, df5, df6], axis=0)
#links = pd.concat([df2, df3], axis=0)

In [71]:
# this is basically a mapping dictionary of nodes enumerated
mapping_dict = {k: v for v, k in enumerate(nodes)}

In [72]:
# replace source and target with enumerated values
links['source'] = links['source'].map(mapping_dict)
links['target'] = links['target'].map(mapping_dict)

In [73]:
# turn this table into a dictionary for making the sankey diagram
links_dict = links.to_dict(orient='list')

In [74]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        #color='green'
    ),
    link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
    )
    
)
])
fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

# With pre-conditions
We may want to consider pre-conditioning the dataframe before searching for ride or dies. Perhaps any NaN cell types should be disqualified since I think they might be the most likely to contaminate a nice cohesive module. Also, there is hardly anything we could say about them in terms of making testable hypotheses. Another possible precondition could be that the neuron must be traced but it appears that all non-NaN neurons are traced in this dataset.

In [161]:
# pre-conditions: the df may not have any NaN cell types
pretest = ovi_in_node_df.dropna(subset=['celltype'])

In [165]:
pretest[pretest['status']!='Traced']

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois


Another precondition could be that StatusLabel must be Traced or Roughly traced. There are some 'Leaves' which are probably left instances for the most part. I'll leave these alone for now.

In [167]:
pretest['statusLabel'].value_counts()

statusLabel
Roughly traced    1562
Traced              74
Leaves              36
Name: count, dtype: int64

In [169]:
pretest[pretest['statusLabel']=='Leaves']['instance'].value_counts()

instance
SMP316(PDM06)_L          2
SMP370(PDM08)_L          1
SMP422(PDM10)_L          1
IB017(PDM08)_L           1
AVLP473(PDL18)_L         1
SMP528(PDM24)_L          1
SMP251(PDL18)_L          1
SMP362(PDM07)_L          1
SMP176(PDL05)_L          1
SMP339(PDM07)_L          1
pC1d(PDM09)_L            1
PLP245(SCB011)_L         1
CL029(ADL25)_L           1
SMP359(PDM07)_L          1
SMP201(PDL06)_L          1
SMP040(ADL25)_L          1
SMP311(PDM06)_L          1
CL212(PDM13)_L           1
AVLP562(PVL15)_L         1
SMP175(PDL05)_L          1
mAL1(AVM15)_R            1
SMP184(PDL05)_L          1
SMP553(PDM30)_L          1
SMP551(PDM29)_L          1
SMP548(PDM29)_L          1
SMP549(PDM29)_L          1
SMP255(PDL18)_L          1
SMP550(PDM29)_L          1
MBON33(y2y3)(ADM05)_L    1
SMP333(PDM07)_L          1
SMP271(PDL20)_L          1
SMP249(PDL17)_L          1
SMP237(PDL11)_L          1
aMe24(PDM24)_L           1
SMP510(PDM22)_L          1
Name: count, dtype: int64

Those that are 'Leaves' seem to be the same neurons that have 'cropped'=True.

In [171]:
pretest['cropped'].value_counts()

cropped
False    1636
True       36
Name: count, dtype: int64

In [176]:
pretest[pretest['cropped']==True]['instance'].value_counts()

instance
SMP316(PDM06)_L          2
SMP370(PDM08)_L          1
SMP422(PDM10)_L          1
IB017(PDM08)_L           1
AVLP473(PDL18)_L         1
SMP528(PDM24)_L          1
SMP251(PDL18)_L          1
SMP362(PDM07)_L          1
SMP176(PDL05)_L          1
SMP339(PDM07)_L          1
pC1d(PDM09)_L            1
PLP245(SCB011)_L         1
CL029(ADL25)_L           1
SMP359(PDM07)_L          1
SMP201(PDL06)_L          1
SMP040(ADL25)_L          1
SMP311(PDM06)_L          1
CL212(PDM13)_L           1
AVLP562(PVL15)_L         1
SMP175(PDL05)_L          1
mAL1(AVM15)_R            1
SMP184(PDL05)_L          1
SMP553(PDM30)_L          1
SMP551(PDM29)_L          1
SMP548(PDM29)_L          1
SMP549(PDM29)_L          1
SMP255(PDL18)_L          1
SMP550(PDM29)_L          1
MBON33(y2y3)(ADM05)_L    1
SMP333(PDM07)_L          1
SMP271(PDL20)_L          1
SMP249(PDL17)_L          1
SMP237(PDL11)_L          1
aMe24(PDM24)_L           1
SMP510(PDM22)_L          1
Name: count, dtype: int64

## 0.1
I found that nothing survived at max res 0.05 with these preconditions. 

In [105]:
# the max res to use
max_res = '0.1'

# the df to use since we don't need body ids
test = pretest[['0.0','0.05','0.1']].copy()

# get a list of module ids
module_ids = test[max_res].unique().tolist()

In [106]:
# create an empty list to store the module ids of the riders
ride_or_die_modules = []

for i in module_ids:
    # grab the rows that have the module id
    test_mod = test[test[max_res]==i]

    # first test whether there is more than one row since singletons won't count
    if test_mod.shape[0] > 1:

        # tests for number of unique rows
        if test_mod.drop_duplicates().shape[0] == 1:
            # if all rows are duplicates of each other, then append the module id to the list
            ride_or_die_modules.append(i)

# retrieve the rows of the original dataframe that have ride or die modules
ride_or_die_df = ovi_in_node_df[ovi_in_node_df[max_res].isin(ride_or_die_modules)]
ride_or_die_df

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
51,1036503560,52,2,7,10,31,35,35,LHPV3a1_R,LHPV3a1,...,Traced,False,Roughly traced,PVL10,286.0,"[2035, 16392, 14088]","{'SNP(R)': {'post': 3, 'upstream': 3}, 'SLP(R)...",,"['AVLP(R)', 'EPA(L)', 'EPA(R)', 'GC', 'INP', '...","['AVLP(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EP..."
219,1223388206,220,2,7,10,31,35,35,WEDPN17_a_R,WEDPN17_a,...,Traced,False,Roughly traced,AVL09,,,"{'VLNP(R)': {'pre': 244, 'post': 127, 'downstr...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LAL(-G...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LAL(-G..."
229,1232041867,230,2,7,10,31,124,134,,,...,Traced,True,Leaves,,,,"{'INP': {'pre': 6, 'post': 16, 'downstream': 7...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(L)', 'EPA...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(R)', 'INP..."
242,1283173461,243,2,7,22,102,130,140,PS127_R,PS127,...,Traced,False,Roughly traced,PDM17,301.0,"[25778, 15112, 15296]","{'INP': {'pre': 374, 'post': 368, 'downstream'...",,"['IB', 'ICL(R)', 'INP', 'IPS(R)', 'LAL(-GA)(R)...","['IB', 'ICL(R)', 'INP', 'IPS(R)', 'PLP(R)', 'S..."
256,1313849992,257,2,7,22,102,130,140,PS098_L,PS098,...,Traced,False,Roughly traced,AVM19,334.0,"[25538, 32016, 17080]","{'INP': {'pre': 267, 'post': 144, 'downstream'...",,"['EPA(R)', 'IB', 'ICL(R)', 'INP', 'IPS(R)', 'P...","['EPA(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S..."
307,1448929584,308,2,7,10,31,124,134,,,...,Traced,True,Leaves,,,,"{'INP': {'pre': 10, 'post': 7, 'downstream': 7...",,"['EPA(L)', 'EPA(R)', 'GC', 'ICL(R)', 'INP', 'L...","['EPA(L)', 'EPA(R)', 'GC', 'ICL(R)', 'INP', 'L..."
1259,5813016625,1260,2,7,10,31,124,134,,,...,Traced,True,Leaves,,,,"{'LX(R)': {'pre': 69, 'post': 28, 'downstream'...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(L)', 'EPA...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(L)', 'EPA..."
1442,5813076392,1443,2,7,10,31,124,134,PLP187_R,PLP187,...,Traced,False,Roughly traced,PVL10,286.0,"[1223, 16304, 13128]","{'SNP(R)': {'pre': 8, 'post': 100, 'downstream...",,"['AVLP(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EP...","['AVLP(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EP..."
2065,759681018,2066,2,7,10,31,124,134,SMP016_b_R,SMP016_b,...,Traced,False,Roughly traced,ADL08,292.5,"[12101, 32431, 17560]","{'SNP(R)': {'pre': 22, 'post': 123, 'downstrea...",,"['ATL(L)', 'ATL(R)', 'CRE(-ROB,-RUB)(R)', 'CRE...","['ATL(L)', 'ATL(R)', 'CRE(-ROB,-RUB)(R)', 'CRE..."
2263,832646151,2264,2,8,75,499,596,782,SMP214_R,SMP214,...,Traced,False,Roughly traced,PDL06,261.0,"[5943, 19514, 5360]","{'SNP(R)': {'pre': 86, 'post': 134, 'downstrea...",,"['ATL(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'SIP(R)..."


Make a Sankey.

In [108]:
# take only the columns with modularity data
ovi_HB_chunk = ride_or_die_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# append _r0.0 to values in column '0.0' and so on
ovi_HB_chunk['0.0'] = ovi_HB_chunk['0.0'].astype(str) + '_r0.0'
ovi_HB_chunk['0.05'] = ovi_HB_chunk['0.05'].astype(str) + '_r0.05'
ovi_HB_chunk['0.1'] = ovi_HB_chunk['0.1'].astype(str) + '_r0.1'
ovi_HB_chunk['0.5'] = ovi_HB_chunk['0.5'].astype(str) + '_r0.5'
ovi_HB_chunk['0.75'] = ovi_HB_chunk['0.75'].astype(str) + '_r0.75'
ovi_HB_chunk['1.0'] = ovi_HB_chunk['1.0'].astype(str) + '_r1.0'

# add a column of ones to ovi_HB_chunk
ovi_HB_chunk['counts'] = 1

In [109]:
# these are the nodes that will be used in the sankey diagram
nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist() + ovi_HB_chunk['0.5'].unique().tolist() + ovi_HB_chunk['0.75'].unique().tolist() + ovi_HB_chunk['1.0'].unique().tolist()
#nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist()

In [110]:
# use this for sankeys with only 2 levels
df2 = ovi_HB_chunk.groupby(['0.0','0.05'])['counts'].count().reset_index()
df2.columns = ['source','target','value']

# use this for sankeys with >2 levels
df3 = ovi_HB_chunk[['0.05','0.1','counts']].groupby(['0.05','0.1']).count().reset_index()
df3.columns = ['source','target','value']

df4 = ovi_HB_chunk[['0.1','0.5','counts']].groupby(['0.1','0.5']).count().reset_index()
df4.columns = ['source','target','value']

df5 = ovi_HB_chunk[['0.5','0.75','counts']].groupby(['0.5','0.75']).count().reset_index()
df5.columns = ['source','target','value']

df6 = ovi_HB_chunk[['0.75','1.0','counts']].groupby(['0.75','1.0']).count().reset_index()
df6.columns = ['source','target','value']

In [111]:
links = pd.concat([df2, df3, df4, df5, df6], axis=0)
#links = pd.concat([df2, df3], axis=0)

In [112]:
# this is basically a mapping dictionary of nodes enumerated
mapping_dict = {k: v for v, k in enumerate(nodes)}

In [113]:
# replace source and target with enumerated values
links['source'] = links['source'].map(mapping_dict)
links['target'] = links['target'].map(mapping_dict)

In [114]:
# turn this table into a dictionary for making the sankey diagram
links_dict = links.to_dict(orient='list')

In [115]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        #color='green'
    ),
    link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
    )
    
)
])
fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

## 0.5

In [151]:
# the max res to use
max_res = '0.5'

# the df to use since we don't need body ids
test = pretest[['0.0','0.05','0.1','0.5']].copy()

# get a list of module ids
module_ids = test[max_res].unique().tolist()

In [152]:
# create an empty list to store the module ids of the riders
ride_or_die_modules = []

for i in module_ids:
    # grab the rows that have the module id
    test_mod = test[test[max_res]==i]

    # first test whether there is more than one row since singletons won't count
    if test_mod.shape[0] > 1:

        # tests for number of unique rows
        if test_mod.drop_duplicates().shape[0] == 1:
            # if all rows are duplicates of each other, then append the module id to the list
            ride_or_die_modules.append(i)

# retrieve the rows of the original dataframe that have ride or die modules
ride_or_die_df = ovi_in_node_df[ovi_in_node_df[max_res].isin(ride_or_die_modules)]
ride_or_die_df

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
2,1006928515,3,1,1,1,3,3,3,CL300_R,CL300,...,Traced,False,Roughly traced,PVL13,236.0,"[12083, 10523, 16816]","{'INP': {'pre': 79, 'post': 126, 'downstream':...",,"['ATL(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S...","['ATL(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S..."
3,1007260806,4,2,1,1,4,4,4,CL301_R,CL301,...,Traced,False,Roughly traced,PVL13,236.0,"[13524, 10108, 16480]","{'INP': {'pre': 40, 'post': 128, 'downstream':...",,"['GOR(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S...","['IB', 'ICL(R)', 'INP', 'PLP(R)', 'SCL(R)', 'S..."
5,1008085163,6,1,1,1,6,6,6,,,...,Traced,True,Leaves,,,,"{'SNP(L)': {'pre': 10, 'post': 9, 'downstream'...",,"['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']","['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']"
10,1008966615,11,1,1,1,3,8,3,CL300_R,CL300,...,Traced,False,Roughly traced,PVL13,236.0,"[13492, 9603, 16400]","{'INP': {'pre': 42, 'post': 132, 'downstream':...",,"['IB', 'ICL(R)', 'INP', 'PLP(R)', 'SCL(R)', 'S...","['IB', 'ICL(R)', 'INP', 'PLP(R)', 'VLNP(R)']"
14,1010044978,15,3,2,3,10,10,12,FS3(FQ11)_C3_R,FS3,...,Traced,False,Roughly traced,PDM03,236.0,"[17547, 9890, 13960]","{'SNP(R)': {'pre': 31, 'post': 18, 'downstream...",FB1d367SMP,"['CX', 'FB', 'FB-column3', 'FBl1', 'FBl2', 'FB...","['CX', 'FB', 'FB-column3', 'FBl1', 'FBl2', 'FB..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2486,983063787,2487,3,8,8,366,404,370,,,...,Traced,True,Leaves,,,,"{'SNP(R)': {'pre': 53, 'post': 19, 'downstream...",,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2503,986098785,2504,2,1,6,427,644,676,AOTU008_a_R,AOTU008_a,...,Traced,False,Roughly traced,ADL08,258.5,"[10014, 33018, 17344]","{'SNP(R)': {'pre': 111, 'post': 179, 'downstre...",aSP11b candidates,"['AOTU(R)', 'ATL(R)', 'CRE(-ROB,-RUB)(R)', 'CR...","['AOTU(R)', 'ATL(R)', 'IB', 'INP', 'MB(+ACA)(R..."
2505,987117151,2506,2,7,7,53,63,68,LAL010_R,LAL010,...,Traced,False,Roughly traced,ADM03,292.5,"[15520, 32234, 10016]","{'SNP(R)': {'pre': 2, 'post': 82, 'downstream'...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(R)', 'INP...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EPA(R)', 'INP..."
2509,988909130,2510,2,3,4,389,559,572,FB5V_R,FB5V,...,Traced,False,Roughly traced,AVM10,296.5,"[13226, 32024, 18600]","{'SNP(R)': {'pre': 1, 'post': 28, 'downstream'...",CRELALFB5,"['AB(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX',...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."


Make a Sankey.

In [118]:
# take only the columns with modularity data
ovi_HB_chunk = ride_or_die_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# append _r0.0 to values in column '0.0' and so on
ovi_HB_chunk['0.0'] = ovi_HB_chunk['0.0'].astype(str) + '_r0.0'
ovi_HB_chunk['0.05'] = ovi_HB_chunk['0.05'].astype(str) + '_r0.05'
ovi_HB_chunk['0.1'] = ovi_HB_chunk['0.1'].astype(str) + '_r0.1'
ovi_HB_chunk['0.5'] = ovi_HB_chunk['0.5'].astype(str) + '_r0.5'
ovi_HB_chunk['0.75'] = ovi_HB_chunk['0.75'].astype(str) + '_r0.75'
ovi_HB_chunk['1.0'] = ovi_HB_chunk['1.0'].astype(str) + '_r1.0'

# add a column of ones to ovi_HB_chunk
ovi_HB_chunk['counts'] = 1

In [119]:
# these are the nodes that will be used in the sankey diagram
nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist() + ovi_HB_chunk['0.5'].unique().tolist() + ovi_HB_chunk['0.75'].unique().tolist() + ovi_HB_chunk['1.0'].unique().tolist()
#nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist()

In [120]:
# use this for sankeys with only 2 levels
df2 = ovi_HB_chunk.groupby(['0.0','0.05'])['counts'].count().reset_index()
df2.columns = ['source','target','value']

# use this for sankeys with >2 levels
df3 = ovi_HB_chunk[['0.05','0.1','counts']].groupby(['0.05','0.1']).count().reset_index()
df3.columns = ['source','target','value']

df4 = ovi_HB_chunk[['0.1','0.5','counts']].groupby(['0.1','0.5']).count().reset_index()
df4.columns = ['source','target','value']

df5 = ovi_HB_chunk[['0.5','0.75','counts']].groupby(['0.5','0.75']).count().reset_index()
df5.columns = ['source','target','value']

df6 = ovi_HB_chunk[['0.75','1.0','counts']].groupby(['0.75','1.0']).count().reset_index()
df6.columns = ['source','target','value']

In [121]:
links = pd.concat([df2, df3, df4, df5, df6], axis=0)
#links = pd.concat([df2, df3], axis=0)

In [122]:
# this is basically a mapping dictionary of nodes enumerated
mapping_dict = {k: v for v, k in enumerate(nodes)}

In [123]:
# replace source and target with enumerated values
links['source'] = links['source'].map(mapping_dict)
links['target'] = links['target'].map(mapping_dict)

In [124]:
# turn this table into a dictionary for making the sankey diagram
links_dict = links.to_dict(orient='list')

In [125]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        #color='green'
    ),
    link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
    )
    
)
])
fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

In [154]:
ride_or_die_df[ride_or_die_df['celltype']=='FS1A']

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois


## 0.75

In [155]:
# the max res to use
max_res = '0.75'

# the df to use since we don't need body ids
test = pretest[['0.0','0.05','0.1','0.5','0.75']].copy()

# get a list of module ids
module_ids = test[max_res].unique().tolist()

In [156]:
# create an empty list to store the module ids of the riders
ride_or_die_modules = []

for i in module_ids:
    # grab the rows that have the module id
    test_mod = test[test[max_res]==i]

    # first test whether there is more than one row since singletons won't count
    if test_mod.shape[0] > 1:

        # tests for number of unique rows
        if test_mod.drop_duplicates().shape[0] == 1:
            # if all rows are duplicates of each other, then append the module id to the list
            ride_or_die_modules.append(i)

# retrieve the rows of the original dataframe that have ride or die modules
ride_or_die_df = ovi_in_node_df[ovi_in_node_df[max_res].isin(ride_or_die_modules)]
ride_or_die_df

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
5,1008085163,6,1,1,1,6,6,6,,,...,Traced,True,Leaves,,,,"{'SNP(L)': {'pre': 10, 'post': 9, 'downstream'...",,"['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']","['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']"
21,1013066710,22,3,3,4,16,16,17,CRE071_R,CRE071,...,Traced,False,Roughly traced,AVM17,396.5,"[11636, 34239, 26536]","{'SNP(R)': {'pre': 12, 'post': 95, 'downstream...",,"['AL(R)', 'AL-DP1l(R)', 'CRE(-ROB,-RUB)(R)', '...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
23,1013416147,24,2,3,4,18,18,18,CRE100_R,CRE100,...,Traced,False,Roughly traced,PDM27,401.0,"[23180, 15645, 12816]","{'SNP(L)': {'pre': 1, 'post': 3, 'downstream':...",CL-VeSp,"['CAN(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'FLA...","['CAN(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'FLA..."
35,1014802430,36,2,7,7,20,21,25,LAL175_R,LAL175,...,Traced,False,Roughly traced,PVM03,415.5,"[20606, 9708, 21888]","{'SNP(R)': {'pre': 8, 'post': 94, 'downstream'...",,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LAL(-G..."
41,1017880794,42,2,3,4,16,27,30,,,...,Traced,True,Leaves,,,,"{'SNP(R)': {'pre': 28, 'post': 15, 'downstream...",,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2480,979368408,2481,3,2,3,10,70,99,FS3(FQ11)_C4_R,FS3,...,Traced,False,Roughly traced,PDM03,301.0,"[20038, 13800, 11392]","{'SNP(R)': {'pre': 34, 'post': 24, 'downstream...",FB1d367SMP,"['CX', 'FB', 'FB-column3', 'FBl1', 'FBl2', 'FB...","['CX', 'FB', 'FB-column3', 'FBl6', 'FBl7', 'FB..."
2481,980978890,2482,2,3,4,414,619,648,CRE061_R,CRE061,...,Traced,False,Roughly traced,AVM13,361.5,"[12057, 34635, 24688]","{'SNP(R)': {'pre': 24, 'post': 24, 'downstream...",,"['AVLP(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EP...","['AVLP(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'EP..."
2494,983767417,2495,2,7,7,31,752,134,LAL175_R,LAL175,...,Traced,False,Roughly traced,PVM03,335.0,"[17040, 9200, 20880]","{'SNP(R)': {'pre': 1, 'post': 13, 'downstream'...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LAL(-G...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LAL(-G..."
2503,986098785,2504,2,1,6,427,644,676,AOTU008_a_R,AOTU008_a,...,Traced,False,Roughly traced,ADL08,258.5,"[10014, 33018, 17344]","{'SNP(R)': {'pre': 111, 'post': 179, 'downstre...",aSP11b candidates,"['AOTU(R)', 'ATL(R)', 'CRE(-ROB,-RUB)(R)', 'CR...","['AOTU(R)', 'ATL(R)', 'IB', 'INP', 'MB(+ACA)(R..."


In [128]:
# take only the columns with modularity data
ovi_HB_chunk = ride_or_die_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# append _r0.0 to values in column '0.0' and so on
ovi_HB_chunk['0.0'] = ovi_HB_chunk['0.0'].astype(str) + '_r0.0'
ovi_HB_chunk['0.05'] = ovi_HB_chunk['0.05'].astype(str) + '_r0.05'
ovi_HB_chunk['0.1'] = ovi_HB_chunk['0.1'].astype(str) + '_r0.1'
ovi_HB_chunk['0.5'] = ovi_HB_chunk['0.5'].astype(str) + '_r0.5'
ovi_HB_chunk['0.75'] = ovi_HB_chunk['0.75'].astype(str) + '_r0.75'
ovi_HB_chunk['1.0'] = ovi_HB_chunk['1.0'].astype(str) + '_r1.0'

# add a column of ones to ovi_HB_chunk
ovi_HB_chunk['counts'] = 1

In [129]:
# these are the nodes that will be used in the sankey diagram
nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist() + ovi_HB_chunk['0.5'].unique().tolist() + ovi_HB_chunk['0.75'].unique().tolist() + ovi_HB_chunk['1.0'].unique().tolist()
#nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist()

In [130]:
# use this for sankeys with only 2 levels
df2 = ovi_HB_chunk.groupby(['0.0','0.05'])['counts'].count().reset_index()
df2.columns = ['source','target','value']

# use this for sankeys with >2 levels
df3 = ovi_HB_chunk[['0.05','0.1','counts']].groupby(['0.05','0.1']).count().reset_index()
df3.columns = ['source','target','value']

df4 = ovi_HB_chunk[['0.1','0.5','counts']].groupby(['0.1','0.5']).count().reset_index()
df4.columns = ['source','target','value']

df5 = ovi_HB_chunk[['0.5','0.75','counts']].groupby(['0.5','0.75']).count().reset_index()
df5.columns = ['source','target','value']

df6 = ovi_HB_chunk[['0.75','1.0','counts']].groupby(['0.75','1.0']).count().reset_index()
df6.columns = ['source','target','value']

In [131]:
links = pd.concat([df2, df3, df4, df5, df6], axis=0)
#links = pd.concat([df2, df3], axis=0)

In [132]:
# this is basically a mapping dictionary of nodes enumerated
mapping_dict = {k: v for v, k in enumerate(nodes)}

In [133]:
# replace source and target with enumerated values
links['source'] = links['source'].map(mapping_dict)
links['target'] = links['target'].map(mapping_dict)

In [134]:
# turn this table into a dictionary for making the sankey diagram
links_dict = links.to_dict(orient='list')

In [135]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        #color='green'
    ),
    link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
    )
    
)
])
fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

In [157]:
ride_or_die_df[ride_or_die_df['celltype']=='FS1A']

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois


## 1.0

In [158]:
# the max res to use
max_res = '1.0'

# the df to use since we don't need body ids
test = pretest[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# get a list of module ids
module_ids = test[max_res].unique().tolist()

In [159]:
# create an empty list to store the module ids of the riders
ride_or_die_modules = []

for i in module_ids:
    # grab the rows that have the module id
    test_mod = test[test[max_res]==i]

    # first test whether there is more than one row since singletons won't count
    if test_mod.shape[0] > 1:

        # tests for number of unique rows
        if test_mod.drop_duplicates().shape[0] == 1:
            # if all rows are duplicates of each other, then append the module id to the list
            ride_or_die_modules.append(i)

# retrieve the rows of the original dataframe that have ride or die modules
ride_or_die_df = ovi_in_node_df[ovi_in_node_df[max_res].isin(ride_or_die_modules)]
ride_or_die_df

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
4,1008024276,5,3,2,2,5,5,5,FB5N_R,FB5N,...,Traced,False,Roughly traced,AVM08,472.5,"[19178, 29711, 37312]","{'SNP(L)': {'post': 5, 'upstream': 5, 'mito': ...",SMPCREFB5_4,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
5,1008085163,6,1,1,1,6,6,6,,,...,Traced,True,Leaves,,,,"{'SNP(L)': {'pre': 10, 'post': 9, 'downstream'...",,"['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']","['ATL(L)', 'IB', 'INP', 'SMP(L)', 'SNP(L)']"
7,1008369339,8,3,2,2,5,5,8,FS1B(FQ9)_C6_R,FS1B,...,Traced,False,Roughly traced,PDM02,301.0,"[22606, 16349, 10032]","{'SNP(L)': {'pre': 43, 'post': 24, 'downstream...",FB25SMPSMP,"['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FBl2',...","['CRE(-RUB)(L)', 'CRE(L)', 'INP', 'SMP(L)', 'S..."
8,1008391068,9,3,2,2,5,5,5,FC2B(FQ7)_C6_L,FC2B,...,Traced,False,Roughly traced,PDM03,325.5,"[29681, 16321, 7328]","{'CX': {'pre': 167, 'post': 2135, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
9,1008693605,10,3,2,2,5,5,9,FC2C(FQ7)_C7_L,FC2C,...,Traced,False,Roughly traced,PDM03,291.0,"[32830, 14210, 12336]","{'CX': {'pre': 91, 'post': 1051, 'downstream':...",FB1d367CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2490,983426636,2491,3,2,2,5,5,77,FS1A(FQ9)_C6_L,FS1A,...,Traced,False,Roughly traced,PDM03,271.0,"[30775, 16354, 10640]","{'SNP(L)': {'pre': 19, 'post': 14, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2491,983430664,2492,3,2,2,5,5,20,FS1A(FQ9)_C3_R,FS1A,...,Traced,False,Roughly traced,PDM03,236.0,"[16993, 11130, 15072]","{'SNP(R)': {'pre': 37, 'post': 32, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2495,983767602,2496,3,2,2,5,5,20,FS1B(FQ9)_C2_L,FS1B,...,Traced,False,Roughly traced,PDM01,301.0,"[27031, 16664, 12416]","{'SNP(R)': {'pre': 62, 'post': 30, 'downstream...",FB25SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2503,986098785,2504,2,1,6,427,644,676,AOTU008_a_R,AOTU008_a,...,Traced,False,Roughly traced,ADL08,258.5,"[10014, 33018, 17344]","{'SNP(R)': {'pre': 111, 'post': 179, 'downstre...",aSP11b candidates,"['AOTU(R)', 'ATL(R)', 'CRE(-ROB,-RUB)(R)', 'CR...","['AOTU(R)', 'ATL(R)', 'IB', 'INP', 'MB(+ACA)(R..."


In [138]:
# take only the columns with modularity data
ovi_HB_chunk = ride_or_die_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# append _r0.0 to values in column '0.0' and so on
ovi_HB_chunk['0.0'] = ovi_HB_chunk['0.0'].astype(str) + '_r0.0'
ovi_HB_chunk['0.05'] = ovi_HB_chunk['0.05'].astype(str) + '_r0.05'
ovi_HB_chunk['0.1'] = ovi_HB_chunk['0.1'].astype(str) + '_r0.1'
ovi_HB_chunk['0.5'] = ovi_HB_chunk['0.5'].astype(str) + '_r0.5'
ovi_HB_chunk['0.75'] = ovi_HB_chunk['0.75'].astype(str) + '_r0.75'
ovi_HB_chunk['1.0'] = ovi_HB_chunk['1.0'].astype(str) + '_r1.0'

# add a column of ones to ovi_HB_chunk
ovi_HB_chunk['counts'] = 1

In [139]:
# these are the nodes that will be used in the sankey diagram
nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist() + ovi_HB_chunk['0.5'].unique().tolist() + ovi_HB_chunk['0.75'].unique().tolist() + ovi_HB_chunk['1.0'].unique().tolist()
#nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist()

In [140]:
# use this for sankeys with only 2 levels
df2 = ovi_HB_chunk.groupby(['0.0','0.05'])['counts'].count().reset_index()
df2.columns = ['source','target','value']

# use this for sankeys with >2 levels
df3 = ovi_HB_chunk[['0.05','0.1','counts']].groupby(['0.05','0.1']).count().reset_index()
df3.columns = ['source','target','value']

df4 = ovi_HB_chunk[['0.1','0.5','counts']].groupby(['0.1','0.5']).count().reset_index()
df4.columns = ['source','target','value']

df5 = ovi_HB_chunk[['0.5','0.75','counts']].groupby(['0.5','0.75']).count().reset_index()
df5.columns = ['source','target','value']

df6 = ovi_HB_chunk[['0.75','1.0','counts']].groupby(['0.75','1.0']).count().reset_index()
df6.columns = ['source','target','value']

In [141]:
links = pd.concat([df2, df3, df4, df5, df6], axis=0)
#links = pd.concat([df2, df3], axis=0)

In [142]:
# this is basically a mapping dictionary of nodes enumerated
mapping_dict = {k: v for v, k in enumerate(nodes)}

In [143]:
# replace source and target with enumerated values
links['source'] = links['source'].map(mapping_dict)
links['target'] = links['target'].map(mapping_dict)

In [144]:
# turn this table into a dictionary for making the sankey diagram
links_dict = links.to_dict(orient='list')

In [145]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        #color='green'
    ),
    link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
    )
    
)
])
fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

In [147]:
ride_or_die_df[ride_or_die_df['0.0']==3]

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
4,1008024276,5,3,2,2,5,5,5,FB5N_R,FB5N,...,Traced,False,Roughly traced,AVM08,472.5,"[19178, 29711, 37312]","{'SNP(L)': {'post': 5, 'upstream': 5, 'mito': ...",SMPCREFB5_4,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
7,1008369339,8,3,2,2,5,5,8,FS1B(FQ9)_C6_R,FS1B,...,Traced,False,Roughly traced,PDM02,301.0,"[22606, 16349, 10032]","{'SNP(L)': {'pre': 43, 'post': 24, 'downstream...",FB25SMPSMP,"['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FBl2',...","['CRE(-RUB)(L)', 'CRE(L)', 'INP', 'SMP(L)', 'S..."
8,1008391068,9,3,2,2,5,5,5,FC2B(FQ7)_C6_L,FC2B,...,Traced,False,Roughly traced,PDM03,325.5,"[29681, 16321, 7328]","{'CX': {'pre': 167, 'post': 2135, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
9,1008693605,10,3,2,2,5,5,9,FC2C(FQ7)_C7_L,FC2C,...,Traced,False,Roughly traced,PDM03,291.0,"[32830, 14210, 12336]","{'CX': {'pre': 91, 'post': 1051, 'downstream':...",FB1d367CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
12,1009371535,13,3,2,2,5,5,10,FC2B(FQ7)_C3_L,FC2B,...,Traced,False,Roughly traced,PDM01,277.5,"[28298, 18341, 7600]","{'CX': {'pre': 130, 'post': 2051, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2487,983067799,2488,3,2,2,5,5,20,FS1A(FQ9)_C3_L,FS1A,...,Traced,False,Roughly traced,PDM01,291.0,"[29498, 16181, 12232]","{'SNP(R)': {'pre': 48, 'post': 37, 'downstream...",FB2-6SMPSMP,"['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FB-col...","['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FB-col..."
2488,983093876,2489,3,2,2,5,5,51,FS1A(FQ9)_C8_L,FS1A,...,Traced,False,Roughly traced,PDM04,261.5,"[32853, 11568, 18616]","{'SNP(R)': {'pre': 54, 'post': 50, 'downstream...",FB2-6SMPSMP,"['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FBl1',...","['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FBl2',..."
2490,983426636,2491,3,2,2,5,5,77,FS1A(FQ9)_C6_L,FS1A,...,Traced,False,Roughly traced,PDM03,271.0,"[30775, 16354, 10640]","{'SNP(L)': {'pre': 19, 'post': 14, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2491,983430664,2492,3,2,2,5,5,20,FS1A(FQ9)_C3_R,FS1A,...,Traced,False,Roughly traced,PDM03,236.0,"[16993, 11130, 15072]","{'SNP(R)': {'pre': 37, 'post': 32, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."


In [160]:
ride_or_die_df[ride_or_die_df['celltype']=='FS1A']

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
26,1014102651,27,3,2,2,5,5,20,FS1A(FQ9)_C2_L,FS1A,...,Traced,False,Roughly traced,PDM01,301.0,"[26807, 16994, 11456]","{'SNP(R)': {'pre': 45, 'post': 34, 'downstream...",FB2-6SMPSMP,"['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FB-col...","['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FB-col..."
28,1014223948,29,3,2,2,5,5,20,FS1A(FQ9)_C2_R,FS1A,...,Traced,False,Roughly traced,PDM04,291.5,"[18950, 10470, 18776]","{'SNP(L)': {'pre': 36, 'post': 43, 'downstream...",FB2-6SMPSMP,"['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FB-col...","['CRE(-RUB)(L)', 'CRE(L)', 'INP', 'SMP(L)', 'S..."
30,1014461486,31,3,2,2,5,5,20,FS1A(FQ9)_C3_L,FS1A,...,Traced,False,Roughly traced,PDM01,291.0,"[28899, 16232, 12088]","{'SNP(R)': {'pre': 37, 'post': 23, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
31,1014465452,32,3,2,2,5,5,10,FS1A(FQ9)_C3_L,FS1A,...,Traced,False,Roughly traced,PDM01,291.0,"[28255, 16569, 12008]","{'SNP(L)': {'pre': 25, 'post': 28, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
32,1014465588,33,3,2,2,5,5,23,FS1A(FQ9)_C9_L,FS1A,...,Traced,False,Roughly traced,PDM04,245.0,"[33446, 11602, 19792]","{'SNP(R)': {'pre': 49, 'post': 31, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
33,1014465666,34,3,2,2,5,5,24,FS1A(FQ9)_C9_L,FS1A,...,Traced,False,Roughly traced,PDM04,300.0,"[33748, 11191, 19440]","{'SNP(R)': {'pre': 45, 'post': 37, 'downstream...",FB2-6SMPSMP,"['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FBl2',...","['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FBl4',..."
34,1014474192,35,3,2,2,5,5,20,FS1A(FQ9)_C2_L,FS1A,...,Traced,False,Roughly traced,PDM01,301.0,"[27300, 17574, 10448]","{'SNP(L)': {'pre': 31, 'post': 18, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
57,1039063931,58,3,2,2,5,5,39,FS1A(FQ9)_C5_R,FS1A,...,Traced,False,Roughly traced,PDM02,301.0,"[19582, 14276, 13168]","{'SNP(R)': {'pre': 28, 'post': 21, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
72,1044800536,73,3,2,2,5,5,50,FS1A(FQ9)_C7_L,FS1A,...,Traced,False,Roughly traced,PDM03,241.0,"[33577, 12183, 17312]","{'SNP(L)': {'pre': 24, 'post': 21, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
74,1044918145,75,3,2,2,5,5,20,FS1A(FQ9)_C7_R,FS1A,...,Traced,False,Roughly traced,PDM01,301.0,"[23421, 17021, 10288]","{'CX': {'pre': 1, 'post': 1086, 'downstream': ...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."


FS1As reappear only at max res 1.0. It seems like traveling with a large group is somewhat of a liability. It is easier to be a ride or die when you travel in pairs but not in groups of 44. This ride or die analysis must be done on the oviINr input connectome by cell types. That way we can identify cell types that ride as pairs or larger groups. 