# Ride or die neurons in the connectome

The goal of this notebook is to find the "ride or die" contingents in oviIN's connectome. These are neurons that stick together throughout all of the modularity resolutions. 
1. Set the maximum resolution to use to assess whether neurons stuck together or not. 
2. Within each module at the maximum resolution, determine whether neurons consistently traveled through the same modules at lower resolutions. If yes, they are a "ride or die" contingent. If no, then the entire module is discarded.
3. Plot a Sankey of the ride or die contingent to check the results.

I expect the results from this analysis to be slightly different from the results of the analysis that Rhessa is working on. There, she is disqualifying modules at every resolution wherever the modules had recombined (crossing partitions). 

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [3]:
ovi_in_node_df = pd.read_csv('ovi_preprocessed/preprocessed_inputs-v1.2.1/preprocessed_nodes.csv')
ovi_in_node_df

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
0,1003215282,1,1,1,1,1,1,1,CL229_R,CL229,...,Traced,False,Roughly traced,PDM19,301.0,"[23044, 14981, 11600]","{'INP': {'pre': 87, 'post': 351, 'downstream':...",,"['EPA(R)', 'GOR(R)', 'IB', 'ICL(R)', 'INP', 'S...","['GOR(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S..."
1,1005952640,2,2,1,1,2,2,2,IB058_R,IB058,...,Traced,False,Roughly traced,PVL20,,,"{'INP': {'pre': 464, 'post': 1327, 'downstream...",,"['ATL(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S...","['ATL(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S..."
2,1006928515,3,1,1,1,3,3,3,CL300_R,CL300,...,Traced,False,Roughly traced,PVL13,236.0,"[12083, 10523, 16816]","{'INP': {'pre': 79, 'post': 126, 'downstream':...",,"['ATL(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S...","['ATL(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S..."
3,1007260806,4,2,1,1,4,4,4,CL301_R,CL301,...,Traced,False,Roughly traced,PVL13,236.0,"[13524, 10108, 16480]","{'INP': {'pre': 40, 'post': 128, 'downstream':...",,"['GOR(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S...","['IB', 'ICL(R)', 'INP', 'PLP(R)', 'SCL(R)', 'S..."
4,1008024276,5,3,2,2,5,5,5,FB5N_R,FB5N,...,Traced,False,Roughly traced,AVM08,472.5,"[19178, 29711, 37312]","{'SNP(L)': {'post': 5, 'upstream': 5, 'mito': ...",SMPCREFB5_4,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,987273073,2507,3,8,8,409,604,629,(PVL05)_L,,...,Traced,False,Roughly traced,,,,"{'SNP(R)': {'pre': 65, 'post': 52, 'downstream...",,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2507,987842109,2508,3,9,23,533,780,815,,,...,Orphan,,Orphan hotknife,,,,"{'SNP(R)': {'pre': 2, 'post': 13, 'downstream'...",,"['SMP(R)', 'SNP(R)']","['SMP(R)', 'SNP(R)']"
2508,988567837,2509,2,3,4,16,58,63,FB4G_R,FB4G,...,Traced,False,Roughly traced,AVM08,,,"{'SNP(R)': {'pre': 6, 'post': 73, 'downstream'...",CRELALFB4_3,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
2509,988909130,2510,2,3,4,389,559,572,FB5V_R,FB5V,...,Traced,False,Roughly traced,AVM10,296.5,"[13226, 32024, 18600]","{'SNP(R)': {'pre': 1, 'post': 28, 'downstream'...",CRELALFB5,"['AB(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX',...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."


In [1]:
# take only the columns with modularity data
ovi_HB_chunk = ovi_in_node_df[['0.0','0.05','0.1']].copy()

NameError: name 'ovi_in_node_df' is not defined

I want to see a Sankey first. I'm going to try this out using resolution 0.1 as the maximum resolution.

In [25]:
import plotly.graph_objects as go
def create_Sankey(df0):
    """Girl, turn all that code into a function! Only pass a dataframe that has resolution columns and no other columns."""

    # make a copy of the dataframe since we will be modifying it in the function
    df = df0.copy()

    # get the columns of the dataframe
    columns = df.columns

    # add a column of ones to ovi_HB_chunk
    df['counts'] = 1

    # these are for the nodes and links that will be used in the sankey diagram
    nodes = []
    links = pd.DataFrame()

    # append _r0.0 to values in column '0.0' and so on
    for col in columns:
        nodes = nodes + df[col].unique().tolist()
        print(col)
        if col != columns[-1]:
            # create the Sankey levels
            df2 = df.groupby([col,columns[columns.get_loc(col)+1]])['counts'].count().reset_index()
            df2.columns = ['source','target','value']
            links = pd.concat([links, df2], axis=0)
        
        # rename columns after doing the above
        #df[col] = df[col].astype(str) + '_r' + col
        
    # this is basically a mapping dictionary of nodes enumerated
    mapping_dict = {k: v for v, k in enumerate(nodes)}

    # replace source and target with enumerated values
    links['source'] = links['source'].map(mapping_dict)
    links['target'] = links['target'].map(mapping_dict)

    # turn this table into a dictionary for making the sankey diagram
    links_dict = links.to_dict(orient='list')

    # plot it
    fig = go.Figure(data=[go.Sankey(
        node = dict(
            pad = 15,
            thickness=20,
            #line=dict(color='blue', width=0.5),
            label = nodes,
            color='green'
        ),
        link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
        )
    )
    ])
    #fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
    fig.show()

    #return nodes, links

In [26]:
ovi_HB_chunk

Unnamed: 0,0.0,0.05,0.1,0.5,0.75,1.0
0,1,1,1,1,1,1
1,2,1,1,2,2,2
2,1,1,1,3,3,3
3,2,1,1,4,4,4
4,3,2,2,5,5,5
...,...,...,...,...,...,...
2506,3,8,8,409,604,629
2507,3,9,23,533,780,815
2508,2,3,4,16,58,63
2509,2,3,4,389,559,572


In [27]:
create_Sankey(ovi_HB_chunk)

0.0
0.05
0.1
0.5
0.75
1.0


In [23]:
mynodes, mylinks = create_Sankey(ovi_HB_chunk)

TypeError: cannot concatenate object of type '<class 'type'>'; only Series and DataFrame objs are valid

In [None]:
mylinks

In [None]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        color='green'
    ),
    link = dict(
    source= links_dict['source'],
    target = links_dict['target'],
    value = links_dict['value']
    )
)
])
#fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

In [22]:
# take only the columns with modularity data
ovi_HB_chunk = ovi_in_node_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

In [4]:
# append _r0.0 to values in column '0.0' and so on
ovi_HB_chunk['0.0'] = ovi_HB_chunk['0.0'].astype(str) + '_r0.0'
ovi_HB_chunk['0.05'] = ovi_HB_chunk['0.05'].astype(str) + '_r0.05'
ovi_HB_chunk['0.1'] = ovi_HB_chunk['0.1'].astype(str) + '_r0.1'
ovi_HB_chunk['0.5'] = ovi_HB_chunk['0.5'].astype(str) + '_r0.5'
ovi_HB_chunk['0.75'] = ovi_HB_chunk['0.75'].astype(str) + '_r0.75'
ovi_HB_chunk['1.0'] = ovi_HB_chunk['1.0'].astype(str) + '_r1.0'

# add a column of ones to ovi_HB_chunk
ovi_HB_chunk['counts'] = 1
ovi_HB_chunk

Unnamed: 0,0.0,0.05,0.1,0.5,0.75,1.0,counts
0,1_r0.0,1_r0.05,1_r0.1,1_r0.5,1_r0.75,1_r1.0,1
1,2_r0.0,1_r0.05,1_r0.1,2_r0.5,2_r0.75,2_r1.0,1
2,1_r0.0,1_r0.05,1_r0.1,3_r0.5,3_r0.75,3_r1.0,1
3,2_r0.0,1_r0.05,1_r0.1,4_r0.5,4_r0.75,4_r1.0,1
4,3_r0.0,2_r0.05,2_r0.1,5_r0.5,5_r0.75,5_r1.0,1
...,...,...,...,...,...,...,...
2506,3_r0.0,8_r0.05,8_r0.1,409_r0.5,604_r0.75,629_r1.0,1
2507,3_r0.0,9_r0.05,23_r0.1,533_r0.5,780_r0.75,815_r1.0,1
2508,2_r0.0,3_r0.05,4_r0.1,16_r0.5,58_r0.75,63_r1.0,1
2509,2_r0.0,3_r0.05,4_r0.1,389_r0.5,559_r0.75,572_r1.0,1


In [5]:
# these are the nodes that will be used in the sankey diagram
#nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist() + ovi_HB_chunk['0.5'].unique().tolist() + ovi_HB_chunk['0.75'].unique().tolist() + ovi_HB_chunk['1.0'].unique().tolist()
nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist()

In [6]:
# use this for sankeys with only 2 levels
df2 = ovi_HB_chunk.groupby(['0.0','0.05'])['counts'].count().reset_index()
df2.columns = ['source','target','value']

In [7]:
# use this for sankeys with >2 levels
df3 = ovi_HB_chunk[['0.05','0.1','counts']].groupby(['0.05','0.1']).count().reset_index()
df3.columns = ['source','target','value']

df4 = ovi_HB_chunk[['0.1','0.5','counts']].groupby(['0.1','0.5']).count().reset_index()
df4.columns = ['source','target','value']

df5 = ovi_HB_chunk[['0.5','0.75','counts']].groupby(['0.5','0.75']).count().reset_index()
df5.columns = ['source','target','value']

df6 = ovi_HB_chunk[['0.75','1.0','counts']].groupby(['0.75','1.0']).count().reset_index()
df6.columns = ['source','target','value']

In [8]:
#links = pd.concat([df2, df3, df4, df5, df6], axis=0)
links = pd.concat([df2, df3], axis=0)

In [9]:
# this is basically a mapping dictionary of nodes enumerated
mapping_dict = {k: v for v, k in enumerate(nodes)}

In [10]:
# replace source and target with enumerated values
links['source'] = links['source'].map(mapping_dict)
links['target'] = links['target'].map(mapping_dict)

In [11]:
# turn this table into a dictionary for making the sankey diagram
links_dict = links.to_dict(orient='list')

In [12]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        #color='green'
    ),
    link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
    )
    
)
])
fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

Looking by eye, there seems to be a lot of stuff that rides together through the resolutions. For the algorithm, we will loop through each module of the 0.1 resolution and test for togetherness in the coarser resolutions.

In [None]:
# the max res to use
max_res = '0.1'

# the df to use since we don't need body ids
test = ovi_in_node_df[['0.0','0.05','0.1']].copy()

# get a list of module ids
module_ids = test[max_res].unique().tolist()


In [None]:
# create an empty list to store the module ids of the riders
ride_or_die_modules = []

for i in module_ids:
    # grab the rows that have the module id
    test_mod = test[test[max_res]==i]

    # first test whether there is more than one row since singletons won't count
    if test_mod.shape[0] > 1:

        # tests for number of unique rows
        if test_mod.drop_duplicates().shape[0] == 1:
            ride_or_die_modules.append(i)

# retrieve the rows of the original dataframe that have ride or die modules
ride_or_die_df = ovi_in_node_df[ovi_in_node_df[max_res].isin(ride_or_die_modules)]
ride_or_die_df


In [None]:
ride_or_die_modules

Make a Sankey.

In [None]:
# take only the columns with modularity data
ovi_HB_chunk = ride_or_die_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# append _r0.0 to values in column '0.0' and so on
ovi_HB_chunk['0.0'] = ovi_HB_chunk['0.0'].astype(str) + '_r0.0'
ovi_HB_chunk['0.05'] = ovi_HB_chunk['0.05'].astype(str) + '_r0.05'
ovi_HB_chunk['0.1'] = ovi_HB_chunk['0.1'].astype(str) + '_r0.1'
ovi_HB_chunk['0.5'] = ovi_HB_chunk['0.5'].astype(str) + '_r0.5'
ovi_HB_chunk['0.75'] = ovi_HB_chunk['0.75'].astype(str) + '_r0.75'
ovi_HB_chunk['1.0'] = ovi_HB_chunk['1.0'].astype(str) + '_r1.0'

# add a column of ones to ovi_HB_chunk
ovi_HB_chunk['counts'] = 1
ovi_HB_chunk

In [None]:
# these are the nodes that will be used in the sankey diagram
#nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist() + ovi_HB_chunk['0.5'].unique().tolist() + ovi_HB_chunk['0.75'].unique().tolist() + ovi_HB_chunk['1.0'].unique().tolist()
nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist()

In [None]:
# use this for sankeys with only 2 levels
df2 = ovi_HB_chunk.groupby(['0.0','0.05'])['counts'].count().reset_index()
df2.columns = ['source','target','value']

In [None]:
# use this for sankeys with >2 levels
df3 = ovi_HB_chunk[['0.05','0.1','counts']].groupby(['0.05','0.1']).count().reset_index()
df3.columns = ['source','target','value']

df4 = ovi_HB_chunk[['0.1','0.5','counts']].groupby(['0.1','0.5']).count().reset_index()
df4.columns = ['source','target','value']

df5 = ovi_HB_chunk[['0.5','0.75','counts']].groupby(['0.5','0.75']).count().reset_index()
df5.columns = ['source','target','value']

df6 = ovi_HB_chunk[['0.75','1.0','counts']].groupby(['0.75','1.0']).count().reset_index()
df6.columns = ['source','target','value']

In [None]:
#links = pd.concat([df2, df3, df4, df5, df6], axis=0)
links = pd.concat([df2, df3], axis=0)

In [None]:
# this is basically a mapping dictionary of nodes enumerated
mapping_dict = {k: v for v, k in enumerate(nodes)}

In [None]:
# replace source and target with enumerated values
links['source'] = links['source'].map(mapping_dict)
links['target'] = links['target'].map(mapping_dict)

In [None]:
# turn this table into a dictionary for making the sankey diagram
links_dict = links.to_dict(orient='list')

In [None]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        #color='green'
    ),
    link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
    )
    
)
])
fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

Interestingly, only stuff from coarse clusters 2 and 4 survived. I would've expected something from coarse cluster 3 to pull through but looking at cluster 2 from the 0.1 resolution, most of the stuff in there rides together but it only takes a few errant nodes to contaminate the entire module. This makes me wonder whether chosing a finer maximum resolution might actually retain more stuff.

In [None]:
# test
test = ovi_in_node_df[['0.0','0.05','0.1']].copy()
test = test[test[max_res]==2]
test 

In [None]:
test.drop_duplicates().shape[0]

Using a finer maximum resolution.

In [None]:
# the max res to use
max_res = '1.0'

# the df to use since we don't need body ids
test = ovi_in_node_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# get a list of module ids
module_ids = test[max_res].unique().tolist()

In [None]:
# create an empty list to store the module ids of the riders
ride_or_die_modules = []

for i in module_ids:
    # grab the rows that have the module id
    test_mod = test[test[max_res]==i]

    # first test whether there is more than one row since singletons won't count
    if test_mod.shape[0] > 1:

        # tests for number of unique rows
        if test_mod.drop_duplicates().shape[0] == 1:
            ride_or_die_modules.append(i)

# retrieve the rows of the original dataframe that have ride or die modules
ride_or_die_df = ovi_in_node_df[ovi_in_node_df[max_res].isin(ride_or_die_modules)]
ride_or_die_df

Make a Sankey.

In [None]:
# take only the columns with modularity data
ovi_HB_chunk = ride_or_die_df[['0.0','0.05','0.1','0.5','0.75','1.0']].copy()

# append _r0.0 to values in column '0.0' and so on
ovi_HB_chunk['0.0'] = ovi_HB_chunk['0.0'].astype(str) + '_r0.0'
ovi_HB_chunk['0.05'] = ovi_HB_chunk['0.05'].astype(str) + '_r0.05'
ovi_HB_chunk['0.1'] = ovi_HB_chunk['0.1'].astype(str) + '_r0.1'
ovi_HB_chunk['0.5'] = ovi_HB_chunk['0.5'].astype(str) + '_r0.5'
ovi_HB_chunk['0.75'] = ovi_HB_chunk['0.75'].astype(str) + '_r0.75'
ovi_HB_chunk['1.0'] = ovi_HB_chunk['1.0'].astype(str) + '_r1.0'

# add a column of ones to ovi_HB_chunk
ovi_HB_chunk['counts'] = 1
ovi_HB_chunk

In [None]:
# these are the nodes that will be used in the sankey diagram
nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist() + ovi_HB_chunk['0.5'].unique().tolist() + ovi_HB_chunk['0.75'].unique().tolist() + ovi_HB_chunk['1.0'].unique().tolist()
#nodes = ovi_HB_chunk['0.0'].unique().tolist() + ovi_HB_chunk['0.05'].unique().tolist() + ovi_HB_chunk['0.1'].unique().tolist()

In [None]:
# use this for sankeys with only 2 levels
df2 = ovi_HB_chunk.groupby(['0.0','0.05'])['counts'].count().reset_index()
df2.columns = ['source','target','value']

# use this for sankeys with >2 levels
df3 = ovi_HB_chunk[['0.05','0.1','counts']].groupby(['0.05','0.1']).count().reset_index()
df3.columns = ['source','target','value']

df4 = ovi_HB_chunk[['0.1','0.5','counts']].groupby(['0.1','0.5']).count().reset_index()
df4.columns = ['source','target','value']

df5 = ovi_HB_chunk[['0.5','0.75','counts']].groupby(['0.5','0.75']).count().reset_index()
df5.columns = ['source','target','value']

df6 = ovi_HB_chunk[['0.75','1.0','counts']].groupby(['0.75','1.0']).count().reset_index()
df6.columns = ['source','target','value']

In [None]:
links = pd.concat([df2, df3, df4, df5, df6], axis=0)
#links = pd.concat([df2, df3], axis=0)

In [None]:
# this is basically a mapping dictionary of nodes enumerated
mapping_dict = {k: v for v, k in enumerate(nodes)}

In [None]:
# replace source and target with enumerated values
links['source'] = links['source'].map(mapping_dict)
links['target'] = links['target'].map(mapping_dict)

In [None]:
# turn this table into a dictionary for making the sankey diagram
links_dict = links.to_dict(orient='list')

In [None]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness=20,
        #line=dict(color='blue', width=0.5),
        label = nodes,
        #color='green'
    ),
    link = dict(
        source= links_dict['source'],
        target = links_dict['target'],
        value = links_dict['value']
    )
    
)
])
fig.update_layout(title='oviINr inputs modularity data across resolutions', height=1000)
fig.show()

Using a higher maximum resolution did retain more nodes. We now have about 30% of the input nodes qualifying as ride or die. But something a little strange is happening. I'm seeing crossover.

I took an example below and found that this crossover is valid. There are fine modules that not only stick together but they break off and recombine with other modules together too. They really are ride or die.

In [None]:
ride_or_die_df[(ride_or_die_df['0.05'] == 8) & (ride_or_die_df['0.1'] == 20)]

In [None]:
ovi_in_node_df[ovi_in_node_df['1.0'] == 511]

How many ride or die neurons are type None? Around 27%. Compare that to 33% for the entire oviINr input connectome. It's more comparable than I would've imagined.

In [None]:
ride_or_die_df[ride_or_die_df['celltype']=='None']

In [None]:
839/2511

In [None]:
193/len(ride_or_die_df)