# Assortivity Analysis of the oviIN connectome
This will need several things:
    
1. assortivity of the wholebrain data
2. assortivity of the oviIN connectome
3. filtering and assortivity analysis for each of the clusters at a higher resolution
4. plotting a line plot that visibily shows where the different clusters fall (resolutions could be plotted by color?)

In [1]:
from util import *
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import networkx as nx
from neuprint import Client, fetch_adjacencies, NeuronCriteria as NC


auth_token_file = open("/Users/rhessa/flybrain-clustering/flybrain.auth.txt", 'r')
auth_token = next(auth_token_file).strip()
try:
    np_client = Client('neuprint.janelia.org', dataset='hemibrain:v1.2.1', token=auth_token)
    print("Connected to neuprint")
except:
    np_client = None

Connected to neuprint


In [2]:
pwd

'/Users/rhessa/flybrain-clustering'

## Pulling wholebrain data for assortivity

In [3]:
# Import file, create dataframe and create networkx graph
hemibrain_version = "v1.2"
direct = "exported-traced-adjacencies-v1.2/"
filename = "traced-roi-connections.csv"

log_msg("Loading node dataframe")
wb = pd.read_csv(os.path.join(direct, filename))
log_msg("Done!")
wb = wb.groupby(['bodyId_pre', 'bodyId_post'], as_index=False)['weight'].sum()
wb

2024 03 25 22:14:03  Loading node dataframe
2024 03 25 22:14:04  Done!


Unnamed: 0,bodyId_pre,bodyId_post,weight
0,200326126,264083994,3
1,200326126,295816140,5
2,200326126,296203440,1
3,200326126,325122109,2
4,200326126,326474963,1
...,...,...,...
3550398,7112622236,328283521,1
3550399,7112622236,357932060,1
3550400,7112622236,357940977,1
3550401,7112622236,358631450,1


In [4]:
wbG = nx.from_pandas_edgelist(wb, 'bodyId_pre', 'bodyId_post', edge_attr='weight', create_using=nx.DiGraph())

In [5]:
assort_wb = nx.degree_assortativity_coefficient(wbG)

# Planning process to do this for each of the clusters
1. Seperate per cluster and isolate bodyIDs \
    a. Pull in partition data\
    b. Filter per cluster\
    c. isolate bodyIds
2. Use bodyIds to pull adjacencies to and from that list of bodyIds
3. Use edgelist to calculate assortivity and add value to dataframe
4. Plot values in line plot


In [6]:
# Pull in full dataframe connectome data
full = pd.read_csv(os.path.join('oviIN_combined/preprocessed-v1.2.1/', 'preprocessed_nodes.csv'), index_col=0)

In [76]:
# Filter by cluster at 0.0 resolution
cluster1 = full[full['0.1'] == 12]
cluster2 = full[full['0.0'] == 2]
cluster3 = full[full['0.0'] == 3]
cluster4 = full[full['0.0'] == 4]
cluster5 = full[full['0.0'] == 5]
cluster6 = full[full['0.0'] == 6]

In [80]:
cluster1_ids = cluster1.index.tolist()
cluster1_ids

[1013425227]

In [78]:
# Fetch the adjacencies for the cluster
log_msg("Fetching adjacencies")
_, edgelist_1 = fetch_adjacencies(NC(bodyId=cluster1_ids))
log_msg("Done!")

2024 03 25 23:08:56  Fetching adjacencies
2024 03 25 23:08:58  Done!


In [79]:
edgelist_1

Unnamed: 0,bodyId_pre,bodyId_post,roi,weight


In [64]:
# Instantiate nx item
cluster1G = nx.from_pandas_edgelist(edgelist_1, 'bodyId_pre', 'bodyId_post', edge_attr='weight', create_using=nx.DiGraph())
assort_c1 = nx.degree_assortativity_coefficient(cluster1G)


invalid value encountered in scalar divide



In [12]:
assort_c1

-0.2133818498437977

In [13]:
# Add the degree assortativity to a dataframe that has row names and a column of assortivity values
assort_df = pd.DataFrame(columns=['assortativity'], index=['whole', '0.0_1', '0.0_2', '0.0_3', '0.0_4', '0.0_5', '0.0_6'])
assort_df.loc['whole'] = assort_wb
assort_df.loc['0.0_1'] = assort_c1

# Print dataframe
assort_df

Unnamed: 0,assortativity
whole,-0.022964
0.0_1,-0.213382
0.0_2,
0.0_3,
0.0_4,
0.0_5,
0.0_6,


In [74]:
# Create a function out of it to make it easier to run on all clusters
def assortativity(cluster, x):
    cluster_ids = cluster.index.tolist()
    _, edgelist = fetch_adjacencies(NC(bodyId=cluster_ids))
    print(x)
    if len(edgelist) == 10:
        return np.nan
    clusterG = nx.from_pandas_edgelist(edgelist, 'bodyId_pre', 'bodyId_post', edge_attr='weight', create_using=nx.DiGraph())
    assort = nx.degree_assortativity_coefficient(clusterG)
    return assort

# Create function that does the whole process of dividing clusters and calculating assortativity
def main_assort(df, resolution, cluster_list):
    assort_df = pd.DataFrame(columns=['assortativity'], index=[f'{resolution}_{x}' for x in cluster_list])
    for i, x in enumerate(cluster_list):
        cluster = df[df[resolution] == x]
        assort = assortativity(cluster, x)
        assort_df.loc[f'{resolution}_{x}'] = assort
    return assort_df

                                       

In [15]:
# Run function for 0.0 resolution and 6 clusters
full_df = main_assort(full, '0.0', 6)

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

In [16]:
full_df

Unnamed: 0,assortativity
whole,-0.022964
0.0_1,-0.213382
0.0_2,-0.157566
0.0_3,-0.181511
0.0_4,-0.246467
0.0_5,-0.192219
0.0_6,-0.176241


In [40]:
# Using this function from jaccard sim to get the right clusters
def get_clusters(df, resolution):
    cluster_list = df[resolution].unique().tolist()
    return cluster_list

In [42]:
# Get cluster numbers for 0.1 
chi = '0.1'
cluster_01 = get_clusters(full, chi)

In [54]:
df_full01 = main_assort(full, chi, cluster_01[:9])

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

1


  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

2


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

3


  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

4


  0%|          | 0/3 [00:00<?, ?it/s]

5


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

6


  0%|          | 0/2 [00:00<?, ?it/s]

7
8


  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

9


In [75]:
df_full01_1 = main_assort(full, chi, cluster_01[10:19])

11



invalid value encountered in scalar divide



12



invalid value encountered in scalar divide



  0%|          | 0/2 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [71]:
full_df = pd.concat([full_df, df_full01, df_full01_1])

In [73]:
full_df

Unnamed: 0,assortativity
whole,-0.022964
0.0_1,-0.213382
0.0_2,-0.157566
0.0_3,-0.181511
0.0_4,-0.246467
0.0_5,-0.192219
0.0_6,-0.176241
0.1_1,-0.213161
0.1_2,-0.230055
0.1_3,-0.159883


In [72]:
# Create a numberline plot for the values using plotly
import plotly.graph_objects as go

fig = go.Figure()
# Set layout
fig.update_layout(
    title='Assortativity Numberline Plot',
    yaxis=dict(
        showgrid=False,
        showticklabels=False,
        zeroline=True,
        zerolinewidth=2,
        zerolinecolor='black',
    ),
    xaxis=dict(
        title='Assortativity',
        showgrid=False,
        zeroline=False,
        showticklabels=True,
        tickwidth=10,
        range=[-1.2, 1.2],
    ),
    
)
fig.update_yaxes(range=[-0.2, 0.3])
fig.update_xaxes(ticks='inside')

# Add the values to the plot
for i in range(len(full_df)):
    fig.add_trace(go.Scatter(x=[full_df.iloc[i, 0]], y=[0], mode='markers', marker=dict(size=5), name=full_df.index[i]))

# Show the plot
fig.show()

