# Figure 5B Joint Marginal Analysis that compares the Whole brain modularity data created by Kunin et al. to our oviIN modularity data. 
This figure was created by using a basic joint marginal function written by Kunin et al. The steps for this were:
1. Pull in modularity data for the inputs and the whole brain
2. Define the base modularity (0.0) and the suffixes for each modularity group
3. Use the modularity merge function to compare where each neuron id ends up in each modularity data
4. Each neuron is tagged with the correct suffix and the plots are created based on module breakdown 

The file was written by Rhessa Weber Langstaff 


In [1]:
import os
import time
import pandas as pd
import numpy as np
import networkx as nx
import collections
from scipy import sparse as sp
from scipy.stats import rankdata

import itertools
from itertools import combinations, combinations_with_replacement, cycle
from functools import reduce

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from util import *

import colorcet as cc

import bokeh
from bokeh.io import output_notebook, output_file, show, save
from bokeh.plotting import figure
from bokeh.models import (Rect, MultiLine, Circle, Span, Label,
                          GraphRenderer, StaticLayoutProvider,
                          NodesAndLinkedEdges,
                          HoverTool, TapTool, ColumnDataSource,
                          LinearColorMapper, LogColorMapper, CategoricalColorMapper,
                          CategoricalMarkerMapper,
                          BoxSelectTool,
                          ColorBar, BasicTicker, BoxZoomTool, FactorRange,
                          Range1d)
from bokeh.models import CategoricalTicker, FixedTicker, BoxAnnotation
from bokeh.models import Arrow, NormalHead, OpenHead, VeeHead, LabelSet

from bokeh.transform import transform, factor_cmap, linear_cmap, log_cmap
from bokeh.layouts import row, column, gridplot
output_notebook()

In [2]:
def joint_marginal(df, c1, c2, include_fraction=False):
    """Given a dataframe and two columns, return a dataframe with the joint and marginal counts."""
    j = df.value_counts([c1, c2])
    j.name = "joint_count"
    j = j.reset_index()

    m1 = df.value_counts(c1)
    m1.name = f"{c1}_count"
    j = j.merge(m1, left_on=c1, right_index=True)

    m2 = df.value_counts(c2)
    m2.name = f"{c2}_count"
    j = j.merge(m2, left_on=c2, right_index=True)

    if include_fraction:
        j["joint_fraction"] = j["joint_count"] / j["joint_count"].sum()
        j[f"{c1}_fraction"] = j["joint_count"] / j[f"{c1}_count"]
        j[f"{c2}_fraction"] = j["joint_count"] / j[f"{c2}_count"]
    return j

In [3]:
hemibrain_version = "v1.2.1"
log_msg("Hemibrain data set being used:", hemibrain_version)

2024 09 11 12:57:14  Hemibrain data set being used: v1.2.1


In [4]:
log_msg("Set up directory info and useful lists")
from neuprint import Client
from neuprint import fetch_roi_hierarchy, fetch_neurons, NeuronCriteria as NC

# Inputs
hemibrain_version = "v1.2.1"
log_msg("Hemibrain data set being used:", hemibrain_version)

preproc_dir = "oviIN_sep10/preprocessed_inputs-" + hemibrain_version
preproc_nodes = "preprocessed_nodes.csv"
preproc_centroids = "x"
preproc_edges = "preprocessed_undirected_edges.csv"

#hemibrain_dir = "oviIN/clustering_inputs_" + hemibrain_version
hemibrain_nodes = "key_inputr_removed_ids.txt"
hemibrain_edges = "original_inputr_removed_ids.txt"

figure_dir = os.path.join("figures","paper")
movie_dir = os.path.join("movies")
analysis_dir = os.path.join("analysis",hemibrain_version)
obj_dir = os.path.join("obj",hemibrain_version)  # 3d objects from, e.g. fetch_roi_mesh
skel_dir = os.path.join("skeleton", hemibrain_version)  # skeleta of neurons in .csv format


for d in [figure_dir, analysis_dir, obj_dir, movie_dir]:
    if not os.path.isdir(d):
        log_msg("Creating directory", d)
        os.makedirs(d)

reneel_params = list(sorted(['0.05','0.1','0.5'], key=float))
type_params = ['celltype','instance']
list_of_params = reneel_params + type_params

log_msg("Set up directory info and useful lists")

auth_token_file = open("flybrain.auth.txt", 'r')
auth_token = next(auth_token_file).strip()
try:
    np_client = Client('neuprint.janelia.org', dataset='hemibrain:' + hemibrain_version, token=auth_token)
    log_msg("neuprint Client set up as `np_Client`, version", np_client.fetch_version())
except:
    np_client = None
    log_msg("neuprint Client set up failed!")

log_msg("Loading node dataframe")
if os.path.isfile(os.path.join(preproc_dir, preproc_centroids)):
    log_msg("  (with centroids)")
    HB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_centroids), index_col=0)
else:
    log_msg("  (without centroids)")
    HB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_nodes), index_col=0)
log_msg("Adding 'type group'")
HB_node_df["type_group"] = HB_node_df["celltype"]
log_msg("Done!")

log_msg("Loading directed edges from csv")
HB_edge_df = pd.read_csv(hemibrain_edges, delimiter=',', header=None).rename(columns={0: "pre", 1:"post"})
log_msg("Done!")

log_msg("Merging in cell info to edge df")
HB_edge_df = HB_edge_df.merge(HB_node_df[list_of_params + ['type_group']], left_on='pre', right_index=True)
HB_edge_df = HB_edge_df.merge(HB_node_df[list_of_params + ['type_group']], left_on='post', right_index=True, suffixes=['pre', 'post'])
log_msg("Done!")

2024 09 11 12:57:14  Set up directory info and useful lists
2024 09 11 12:57:14  Hemibrain data set being used: v1.2.1
2024 09 11 12:57:14  Set up directory info and useful lists
2024 09 11 12:57:15  neuprint Client set up as `np_Client`, version 0.1.0
2024 09 11 12:57:15  Loading node dataframe
2024 09 11 12:57:15    (without centroids)
2024 09 11 12:57:15  Adding 'type group'
2024 09 11 12:57:15  Done!
2024 09 11 12:57:15  Loading directed edges from csv
2024 09 11 12:57:15  Done!
2024 09 11 12:57:15  Merging in cell info to edge df
2024 09 11 12:57:15  Done!


In [5]:
# Pulled from Prof G's code on github (https://github.com/Gutierrez-lab/oviIN-analyses-gabrielle/blob/main/modular_sandbox.ipynb)
def modularity_merge(df1,df2,suf1,suf2):
    """Given two modularity dataframes, merge them along shared body IDs. Pass in suffixes for the columns as strings."""
    merged_mod_df = df1.merge(df2, left_on='id', right_on='id', suffixes=[suf1, suf2])
    #merged_mod_df = df1.merge(df2, left_on='id', right_on='id', suffixes=['_oviHB', '_wholeHB'])
    return merged_mod_df

# Defining path to whole brain data
preproc_dir = "hemibrain/preprocessed-" + "v1.2"
preproc_nodes = "preprocessed_nodes.csv"

# Creating dataframe
WB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_nodes), index_col=0)
WB_node_df["type_group"] = HB_node_df["celltype"]

# Defining base res and suffixes
res = '0.0'
df1_suf = '_oviHB'
df2_suf = '_wholeHB'

# Using merge function from code cell above
mod_merge_df = modularity_merge(HB_node_df[[res]],WB_node_df[[res]],df1_suf,df2_suf)
mod_merge_df
chi1 = res + df1_suf
chi2 = res + df2_suf

In [6]:
# defining colors for oviHB clusters
import bokeh.palettes
cmap = bokeh.palettes.tol['Bright'][5]

#create dictionary
color_dict = dict(zip(mod_merge_df[res + df1_suf].unique(), cmap))
color_dict

# create joint marginal dataframe
jm = joint_marginal(mod_merge_df, chi1, chi2, include_fraction=True)

# map colors into the dataframe
jm['color']=jm['0.0_oviHB'].map(color_dict)

# sort the clusters on the y axis to get a more "diagonal" plot
yrange = jm.sort_values([f"{chi2}_fraction"], ascending=False).groupby(chi2).agg({chi1: "first", f"{chi2}_fraction": "first", "joint_count": "first"}).sort_values([chi1, "joint_count"], ascending=[True, False]).index

# make a bokeh figure
f = figure(title=f"Module at chi2 = {chi2} vs. module at chi1 = {chi1}",
x_range=FactorRange(factors=[str(i + 1) for i in range(jm[chi1].max())]),
y_range=FactorRange(factors=[str(y) for y in yrange]),
width=600, height=700)
jm["x"] = jm[chi1].apply(str)  # bokeh factor range has to have strings, so we have to convert these
jm["y"] = jm[chi2].apply(str)

f.rect(x="x", y="y", width=f"{chi1}_fraction", height=f"{chi2}_fraction", source=jm, fill_color='color', line_color='color')
f.add_tools(HoverTool(tooltips={"Neurons": "@joint_count (@joint_fraction{%%} of Hemibrain)",
                                f"Fraction of {chi2}": f"@{{{chi2}_fraction}}{{2.%%}}",
                                f"Fraction of {chi1}": f"@{{{chi1}_fraction}}{{2.%%}}"}))
f.xaxis.axis_label = 'Module in ' +chi1
f.yaxis.axis_label = 'Module in ' +chi2

show(f)

Unnamed: 0,0.0_oviHB,0.0_wholeHB,joint_count,0.0_oviHB_count,0.0_wholeHB_count,joint_fraction,0.0_oviHB_fraction,0.0_wholeHB_fraction,color,x,y
0,4,3,365,455,1221,0.199236,0.802198,0.298935,#228833,4,3
1,3,3,214,246,1221,0.116812,0.869919,0.175266,#EE6677,3,3
2,6,3,190,210,1221,0.103712,0.904762,0.15561,,6,3
3,1,2,189,347,206,0.103166,0.544669,0.917476,#CCBB44,1,2
4,7,1,177,280,279,0.096616,0.632143,0.634409,,7,1
5,2,3,169,212,1221,0.092249,0.79717,0.138411,#4477AA,2,3
6,1,3,154,347,1221,0.084061,0.443804,0.126126,#CCBB44,1,3
7,4,1,81,455,279,0.044214,0.178022,0.290323,#228833,4,1
8,5,3,71,82,1221,0.038755,0.865854,0.058149,#66CCEE,5,3
9,7,3,58,280,1221,0.031659,0.207143,0.047502,,7,3


In [7]:
# save f as a svg file
from bokeh.io import export_svgs

# option 1
f.output_backend = "svg"

# option 2
export_svgs(f, filename='manuscript_figs/Figure5B_jointmarginal1.svg')
from reportlab.graphics import renderPDF
from svglib.svglib import svg2rlg

# read in svg file
drawing = svg2rlg('manuscript_figs/Figure5B_jointmarginal1.svg')

# save as pdf
renderPDF.drawToFile(drawing, 'manuscript_figs/Figure5B_jointmarginal1.pdf')

Failed to load input file! (Error reading file 'manuscript_figs/Figure5B_jointmarginal1.svg': failed to load external entity "manuscript_figs/Figure5B_jointmarginal1.svg")


AttributeError: 'NoneType' object has no attribute 'renderScale'

Collecting selenium
  Downloading selenium-4.24.0-py3-none-any.whl.metadata (7.1 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.26.2-py3-none-any.whl.metadata (8.6 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting attrs>=23.2.0 (from trio~=0.17->selenium)
  Downloading attrs-24.2.0-py3-none-any.whl.metadata (11 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Collecting h11<1,>=0.9.0 (from wsproto>=0.14->trio-websocket~=0.9->selenium)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading selenium-4.24.0-py3-none-any.whl (9.6 MB)
[2K   [90m━━━━━