# Ride or die neurons in the Celltype connectome
This notebook uses the ride_or_die.ipynb notebook written by Dr. Gabrielle J Gutierrez as a basis. The goal is the find the "ride or die" celltypes in the oviINs connectome of celltypes. These are the celltypes that stick together throughout all the modularity resolutions.

1. Set the maximum resolution to use to asses wether neurons stuck together or not
2. Within each module at the maximum resolution, find which neurons traveled together consistently through the same modules at lower resolutions. \
    a. If yes, they are "ride or die" celltype. \
    b. if no, the whole module is discarded.
3. Plot a sankey of the ride or die contigent to check results

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import plotly.graph_objects as go

In [2]:
# Use on lab computer
#ovi_cell  = pd.read_csv('/Users/rw2822/Documents/GitHub/flybrain-clustering/oviIN_celltype/oviIN_combined/full/preprocessed-v1.2.1/preprocessed_nodes.csv')

# Use on personal computer
ovi_cell  = pd.read_csv('/Users/rhessa/flybrain-clustering/oviIN_celltype/oviIN_combined/full/preprocessed-v1.2.1/preprocessed_nodes.csv')

## Sankey function that accepts in a dataframe and a list of chi values

This is a simple function tested in sankey_sandbox.ipynb!

In [18]:
# Function for creating the sankey diagram
def create_sankey(df,  prominent=False, prom_types=None, width = None, height = None, title=None):
    """This function creates a sankey diagram from the data in the dataframe df.
    The columns to be used are in the list chis. If prominent is True, then only 
    the prominent types are used.
    
    df: dataframe with partition data
    prominent: boolean, whether to use only prominent types
    prom_types: dataframe of prominent types and their weights, columns are 'type_pre' and 'weight'
    FigSize: int of figure size

    Returns: Sankey Figure
    """

    # Columns to be used
    chis = df.columns

    # prominent types only option:
    if prominent:
        ovi_chunk_df = df[df['celltype'].isin(prom_types['type_pre'])]

        # Take only the columns that are needed
        ovi_chunk_df = ovi_chunk_df[chis]
    
    else:
        ovi_chunk_df = df[chis]

    # Append correct _r values to the columns
    for i, x in enumerate(chis):
        ovi_chunk_df[x] = ovi_chunk_df[x].astype(str) + '_r' + str(x)

    # add a column of ones to ovi_HB_chunk
    ovi_chunk_df['counts'] = 1
    
    # find all the unique nodes
    nodes = []
    for chi in chis:    
        nodes += ovi_chunk_df[chi].unique().tolist()


    # Finding the links and putting it into a dataframe
    links = pd.DataFrame()
    for i in range(len(chis)-1):
        df = ovi_chunk_df.groupby([chis[i],chis[i+1]])['counts'].count().reset_index().rename(columns={chis[i]:'source',chis[i+1]:'target','counts':'value'})
        links = pd.concat([links, df], axis=0)

    # create a mapping dictionary
    mapping_dict = {k: v for v, k in enumerate(nodes)}

    # replace source and target with mapping dictionary
    links['source'] = links['source'].map(mapping_dict)
    links['target'] = links['target'].map(mapping_dict)

    # turn this table into a dictionary for making the sankey diagram
    links_dict = links.to_dict(orient='list')

    fig = go.Figure(data=[go.Sankey(
        node = dict(
            pad = 15,
            thickness=20,
            line=dict(width=0.5),
            label = nodes,
        ),
        link = dict(
            source= links_dict['source'],
            target = links_dict['target'],
            value = links_dict['value']
        )
        )
    ])
    # make figure larger
    fig.update_layout(height = height, width= width, title=title)
    fig.show()

In [19]:
ovi_cell_chunk = ovi_cell[['0.0', '0.05', '0.1']].copy()
create_sankey(ovi_cell_chunk, width=800, height=800, title='Ovi Cell Types')

## Ride or die from 0.1 max res for celltypes

In [20]:
# max res to use
max_res = '0.1'

# df to use
cell_test = ovi_cell[["0.0","0.05", max_res]].copy()

# Get list of module ids
module_ids = cell_test[max_res].unique().tolist()

In [21]:
# create an empty list to store mod ids of riders
the_homies = []

# loop through each module id
for i in module_ids:
    # Grab the roes that have module id 
    temp_df = cell_test[cell_test[max_res] == i]

    # test whether there ismore than one row since singletons dont count
    if temp_df.shape[0] > 1:
        
        # test for number of unique rows
        if temp_df.drop_duplicates().shape[0] == 1:
            the_homies.append(i)
# Retrieve rows of the original dataframe that have ride or die modules
ride_or_die = ovi_cell[ovi_cell[max_res].isin(the_homies)]
ride_or_die

Unnamed: 0.1,Unnamed: 0,celltype,key,0.75,0.05,0.1,0.25,0.5,1.0,0.0
661,661,PAM04_a,662,326,3,22,62,254,375,3
662,662,PAM04_b,663,327,3,22,26,255,376,3
719,719,PS098,720,346,4,24,67,268,397,4
724,724,PS127,725,346,4,24,67,268,397,4
750,750,SIFa,751,326,3,22,62,254,375,3
817,817,SLP009,818,375,1,26,74,285,428,1
1258,1258,SMP431,1259,498,1,26,74,335,574,1


In [22]:
the_homies

[22, 24, 26]