# Participation Score figure


In [1]:
# import important stuff here
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from neuprint import fetch_simple_connections, Client
import networkx as nx
from pandas import DataFrame


auth_token_file = open("flybrain.auth.txt", 'r')
auth_token = next(auth_token_file).strip()
try:
    np_client = Client('neuprint.janelia.org', dataset='hemibrain:' + 'v1.2.1', token=auth_token)
except:
    np_client = None

In [66]:
# Fucntions needed for participation scoring
def K_table(edges: DataFrame,
            u_col="node1", v_col="node2",
            weight_col="total_weight"):
    """Compute each node's weight-degree sum, which is its contribution to the K_C term"""
    Ku = edges.groupby(u_col).agg({weight_col: "sum"})
    Kv = edges.groupby(v_col).agg({weight_col: "sum"})
    # Ktable = Ku.merge(Kv, how="outer", left_index=True, right_index=True, suffixes=suffixes).fillna(0)
    # Ktable["Kv"] = Ktable[weight_col+suffixes[0]] + Ktable[weight_col+suffixes[1]]
    return Ku.add(Kv, fill_value=0)

def participation_score(nodes: DataFrame, edges: DataFrame, col, v,
                        weight_col="total_weight", u_col="node1", v_col="node2", suffixes=["_1","_2"],
                        m_tbl=None, K_tbl=None):
    """Compute the partitcipation score of a node `v` relative to a given partition of a network.
    
    See Guimera & Amaral (2005), https://doi.org/10.1038/nature03288"""
    if m_tbl is None:
        m_tbl = m_table(edges, col, u_col=u_col, v_col=v_col, weight_col=weight_col, suffixes=suffixes)
    if K_tbl is None:
        K_tbl = K_table(edges, u_col=u_col, v_col=v_col, weight_col=weight_col)
    
    return 1 - ((m_tbl[v] / K_tbl.loc[v]) ** 2).sum()


def m_table(edges: DataFrame, col,
            u_col="node1", v_col="node2",
            weight_col="total_weight", agg="sum",
            suffixes=["_1","_2"]):
    """Compute the sum of weights of edges from nodes to clusters.
    Returns a dataframe `mv` where the clusters are the index, the nodes are the columns.
    
    The value at `mv.loc[c, v]` is the sum of edge weights of all edges connecting node `v`
    to a node in cluster `c` (in either direction).
    
    Using `agg='sum'` will compute the weighted degree sum. Using `agg='count'` will """
    u_table = edges.groupby([u_col, col + suffixes[1]]).agg({weight_col: agg})
    u_table.index.names = ["node", "cluster"]
    v_table = edges.groupby([v_col, col + suffixes[0]]).agg({weight_col: agg})
    v_table.index.names = ["node", "cluster"]
    # mv = u_table.merge(v_table, how="outer", left_index=True, right_index=True, suffixes=suffixes).fillna(0)
    # mv["weight"] = mv[weight_col+suffixes[0]] + mv[weight_col+suffixes[1]]
    # return mv.pivot_table(index="cluster", columns="node", values="weight", fill_value=0)
    mv = u_table.add(v_table, fill_value=0)
    return mv.pivot_table(index="cluster", columns="node", values=weight_col, fill_value=0)

def get_participation(mod_df, clus_id, res, celltype):
    mod_ids = mod_df[mod_df[res]==clus_id]['id'].tolist()

    if len(mod_ids)<=1:
        participation = [[]]
        return participation
        #return '1 or fewer neurons in this cluster'
    
    # fetch simple connections among neurons in chosen cluster
    clu_connectome = fetch_simple_connections(mod_ids, mod_ids, min_weight=3)

    # replace None with string 'None' to allow it to be a node in the graph
    clu_connectome = clu_connectome.fillna('None')
    
    # group by celltype and count the number of connections
    clu_list =  clu_connectome[['type_pre','type_post','weight']].groupby(['type_pre','type_post']).sum()
    
    # let's threshold?
    clu_list = clu_list[clu_list['weight']>1]
    
    # reset the index to make the dataframe easier to work with
    clu_edges = clu_list.reset_index()
    
    # Edit weight column names
    clu_edges = clu_edges.rename(columns={'weight': 'weight_1'})
    clu_edges['weight_2'] = clu_edges['weight_1']
    # Make a node list 
    nodes = list(set(clu_edges['bodyId_pre'].tolist() + clu_edges['bodyId_post'].tolist()))
    nodes_df = pd.DataFrame(nodes, columns=['bodyId'])
    # Use the undirected list of nodes and edges to get participation score
    k_t = K_table(clu_edges, u_col="bodyId_pre", v_col="bodyId_post", weight_col="weight_1")
    for n in nodes:
        if n not in k_t.index:
            nodes_df.loc[n] = 0
        nodes_df[n] = participation_score(nodes_df, clu_edges, 'weight', n, K_tbl=k_t, weight_col="weight_1", u_col="bodyId_pre", v_col="bodyId_post")
    return nodes_df[]

def main_participation(celltype, cluster, mod_df, resolutions):
    cluster = mod_df[mod_df['0.0']==cluster]
    cluster = cluster[['id','0.0','0.05','0.1','0.5','0.75','1.0', 'celltype']]
    # get the cluster rows for a neuron that we want to analyze
    cluster_row = cluster[cluster['celltype']==celltype]
    cluster_row_dict = cluster_row[resolutions].mode(axis=0).to_dict('records')[0]
    participation_dict = {}
    for i in resolutions:
        clu_id = cluster_row_dict[i]
        participation_dict[i] = get_participation(cluster, clu_id, i)
    
    return participation_dict


In [67]:
resos = ['0.0','0.05','0.1','0.5','0.75','1.0']

In [68]:
# load the oviINr input connectome
ovi_in = pd.read_csv('oviIN/preprocessed_inputs-v1.2.1/preprocessed_nodes.csv')
ovi_in

Unnamed: 0,id,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
0,1003215282,1,1,1,1,1,1,1,CL229_R,CL229,...,Traced,False,Roughly traced,PDM19,301.0,"[23044, 14981, 11600]","{'INP': {'pre': 87, 'post': 351, 'downstream':...",,"['EPA(R)', 'GOR(R)', 'IB', 'ICL(R)', 'INP', 'S...","['GOR(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S..."
1,1005952640,2,2,1,1,2,2,2,IB058_R,IB058,...,Traced,False,Roughly traced,PVL20,,,"{'INP': {'pre': 464, 'post': 1327, 'downstream...",,"['ATL(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S...","['ATL(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S..."
2,1006928515,3,1,1,1,3,3,3,CL300_R,CL300,...,Traced,False,Roughly traced,PVL13,236.0,"[12083, 10523, 16816]","{'INP': {'pre': 79, 'post': 126, 'downstream':...",,"['ATL(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S...","['ATL(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S..."
3,1007260806,4,2,1,1,4,4,4,CL301_R,CL301,...,Traced,False,Roughly traced,PVL13,236.0,"[13524, 10108, 16480]","{'INP': {'pre': 40, 'post': 128, 'downstream':...",,"['GOR(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S...","['IB', 'ICL(R)', 'INP', 'PLP(R)', 'SCL(R)', 'S..."
4,1008024276,5,3,2,2,5,5,5,FB5N_R,FB5N,...,Traced,False,Roughly traced,AVM08,472.5,"[19178, 29711, 37312]","{'SNP(L)': {'post': 5, 'upstream': 5, 'mito': ...",SMPCREFB5_4,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,987273073,2507,3,8,8,409,604,629,(PVL05)_L,,...,Traced,False,Roughly traced,,,,"{'SNP(R)': {'pre': 65, 'post': 52, 'downstream...",,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'..."
2507,987842109,2508,3,9,23,533,780,815,,,...,Orphan,,Orphan hotknife,,,,"{'SNP(R)': {'pre': 2, 'post': 13, 'downstream'...",,"['SMP(R)', 'SNP(R)']","['SMP(R)', 'SNP(R)']"
2508,988567837,2509,2,3,4,16,58,63,FB4G_R,FB4G,...,Traced,False,Roughly traced,AVM08,,,"{'SNP(R)': {'pre': 6, 'post': 73, 'downstream'...",CRELALFB4_3,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
2509,988909130,2510,2,3,4,389,559,572,FB5V_R,FB5V,...,Traced,False,Roughly traced,AVM10,296.5,"[13226, 32024, 18600]","{'SNP(R)': {'pre': 1, 'post': 28, 'downstream'...",CRELALFB5,"['AB(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX',...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."


In [69]:
# Testing the m_table function

#Filter the oviINr input connectome to only include the neurons in the cluster
ovi_test = ovi_in[ovi_in['0.5']==276]

edges = fetch_simple_connections(ovi_test['id'].tolist(), ovi_test['id'].tolist(), min_weight=3)
edges = edges.fillna('None')
edges = edges[['bodyId_pre','bodyId_post','weight']].groupby(['bodyId_pre','bodyId_post']).sum()
edges = edges.reset_index()
# change weight to weight_1
edges['weight_1'] = edges['weight']
# make a second weight column 
edges['weight_2'] = edges['weight_1']
col_test = ovi_test['0.5']

edges


Unnamed: 0,bodyId_pre,bodyId_post,weight,weight_1,weight_2
0,330678844,485934965,5,5,5
1,330678844,546303917,7,7,7
2,330678844,674882237,5,5,5
3,330678844,674882302,12,12,12
4,330678844,675568335,4,4,4
...,...,...,...,...,...
220,5813077929,643475630,7,7,7
221,5813077929,673509195,7,7,7
222,5813077929,673845326,7,7,7
223,5813077929,674882302,10,10,10


In [70]:
mt = m_table(edges, 'weight', u_col="bodyId_pre", v_col="bodyId_post", weight_col="weight", suffixes=["_1","_2"])
mt

node,330678844,360773196,361445621,393499533,457520516,485249150,485934965,487286529,546303917,579425252,...,673158826,673509195,673845326,674882237,674882302,675568335,675887521,5813041161,5813053712,5813077929
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3,9,9,3,0,0,12,12,0,6,0,...,12,9,0,6,15,9,15,9,0,6
4,4,4,0,0,0,4,4,0,4,12,...,0,8,4,0,4,4,8,4,0,0
5,15,5,5,0,0,0,15,5,5,10,...,5,5,0,5,5,10,10,15,0,0
6,0,0,6,0,0,0,6,0,6,0,...,0,0,0,0,0,0,6,0,0,0
7,14,14,14,0,7,7,0,0,14,0,...,0,21,7,7,14,14,0,14,0,28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129,0,0,129,129,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
142,0,0,0,0,0,0,0,0,0,0,...,0,142,142,0,0,0,0,0,0,0
144,0,0,0,0,0,0,144,0,0,0,...,0,0,0,0,0,0,0,0,0,0
164,0,0,0,0,0,0,164,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [71]:
# Do it on the FS1A celltype
main_participation('FS1A', 3, ovi_in, resos)

{'0.0': 1.0, '0.05': 1.0, '0.1': 1.0, '0.5': 1.0, '0.75': 1.0, '1.0': 1.0}

In [72]:
# IB017 celltype
dict_IB = main_participation('IB017', 2, ovi_in, resos)
dict_IB

{'0.0': 1.0, '0.05': 1.0, '0.1': 1.0, '0.5': 1.0, '0.75': 1.0, '1.0': [[]]}

In [73]:
# SMP052
dict_SMP = main_participation('SMP052', 1, ovi_in, resos)
dict_SMP

{'0.0': 1.0, '0.05': 1.0, '0.1': 1.0, '0.5': 1.0, '0.75': 1.0, '1.0': 1.0}