In [1]:
import pandas as pd
import os
from util import *
import plotly.graph_objects as go

# neuprint access
from neuprint import Client
from neuprint import fetch_simple_connections


auth_token_file = open("flybrain.auth.txt", 'r')
auth_token = next(auth_token_file).strip()
try:
    np_client = Client('neuprint.janelia.org', dataset='hemibrain:' + 'v1.2.1', token=auth_token)
    log_msg("neuprint Client set up as `np_Client`, version", np_client.fetch_version())
except:
    np_client = None
    log_msg("neuprint Client set up failed!")



2024 06 20 17:48:31  neuprint Client set up as `np_Client`, version 0.1.0


In [11]:
# Function for creating the sankey diagram
def create_sankey(df, chis, prominent=False, prom_types=None, figsize=None, title=None):
    """This function creates a sankey diagram from the data in the dataframe df.
    The columns to be used are in the list chis. If prominent is True, then only 
    the prominent types are used.
    
    df: dataframe with partition data
    chis: list of chi values in modularity data
    prominent: boolean, whether to use only prominent types
    prom_types: dataframe of prominent types and their weights, columns are 'type_pre' and 'weight'
    FigSize: int of figure size

    Returns: Sankey Figure
    """
    # prominent types only option:
    if prominent:
        ovi_chunk_df = df[df['celltype'].isin(prom_types['type_pre'])]

        # Take only the columns that are needed
        ovi_chunk_df = ovi_chunk_df[chis]
    
    else:
        ovi_chunk_df = df[chis]

    # Append correct _r values to the columns
    for i, x in enumerate(chis):
        ovi_chunk_df[x] = ovi_chunk_df[x].astype(str) + '_r' + str(x)

    # add a column of ones to ovi_HB_chunk
    ovi_chunk_df['counts'] = 1
    
    # find all the unique nodes
    nodes = []
    for chi in chis:    
        nodes += ovi_chunk_df[chi].unique().tolist()


    # Finding the links and putting it into a dataframe
    links = pd.DataFrame()
    for i in range(len(chis)-1):
        df = ovi_chunk_df.groupby([chis[i],chis[i+1]])['counts'].count().reset_index().rename(columns={chis[i]:'source',chis[i+1]:'target','counts':'value'})
        links = pd.concat([links, df], axis=0)

    print(links)
    mapping_dict = {k: v for v, k in enumerate(nodes)}

    # replace source and target with mapping dictionary
    links['source'] = links['source'].map(mapping_dict)
    links['target'] = links['target'].map(mapping_dict)

    # turn this table into a dictionary for making the sankey diagram
    links_dict = links.to_dict(orient='list')

    fig = go.Figure(data=[go.Sankey(
        node = dict(
            pad = 15,
            thickness=20,
            line=dict(width=0.5),
            label = nodes,
        ),
        link = dict(
            source= links_dict['source'],
            target = links_dict['target'],
            value = links_dict['value']
        )
        )
    ])
    # make figure larger
    fig.update_layout(height = figsize, width= 1400, title=title)
    fig.show()

In [10]:
# Import the data
ovi_cell  = pd.read_csv('oviIN_celltype/oviIN_combined/full/preprocessed-v1.2.1/preprocessed_nodes.csv')

# Prominent partners now
# body IDs of oviINs from Neuprint
oviINr_bodyID = 423101189
oviINl_bodyID = 485934965

ovi_inputs = fetch_simple_connections(None,[oviINr_bodyID, oviINl_bodyID])

ovi_type_inputs = ovi_inputs[['type_pre','weight']]  

# collapse ovi_inputs by cell type and sort in descending order
ovi_type_inputs = ovi_type_inputs.groupby('type_pre', as_index=False).sum().sort_values(by='weight', ascending=False,ignore_index=True)

# filter out cell types with less than 10 inputs
ovi_type_inputs = ovi_type_inputs[ovi_type_inputs['weight'] >= 100]

ovi_type_inputs = ovi_type_inputs.head(30)

In [13]:
# Run through the function
chi_values = ['0.0', '0.05','0.1', '0.25', '0.5', '0.75', '1.0']
create_sankey(ovi_cell, chi_values, figsize=800, title='OviIN combined full Cell Types')

       source    target  value
0      1_r0.0  14_r0.05      1
1      1_r0.0   1_r0.05    305
2      1_r0.0   7_r0.05      1
3      2_r0.0  10_r0.05      1
4      2_r0.0   2_r0.05    448
..        ...       ...    ...
768  98_r0.75  118_r1.0      1
769  98_r0.75  121_r1.0      2
770  99_r0.75  102_r1.0      1
771  99_r0.75  568_r1.0      2
772   9_r0.75    9_r1.0      2

[2363 rows x 3 columns]




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [20]:
create_sankey(ovi_cell, chi_values, prominent=True, prom_types=ovi_type_inputs, figsize=600, title='OviIN combined full Prominent Cell Types')

         source      target  value
0      1.0_r0.0  13.0_r0.05     15
1      1.0_r0.0   2.0_r0.05      2
2      2.0_r0.0   1.0_r0.05      2
3      2.0_r0.0   3.0_r0.05      2
4      2.0_r0.0   7.0_r0.05      4
..          ...         ...    ...
74  612.0_r0.75  640.0_r1.0      1
75  613.0_r0.75  642.0_r1.0      1
76   64.0_r0.75  479.0_r1.0      1
77   64.0_r0.75   69.0_r1.0      1
78   80.0_r0.75  582.0_r1.0      1

[191 rows x 3 columns]


In [59]:
# checking what celltypes are in which modules of interest
ovi_check = ovi_cell.merge(ovi_type_inputs,how='left', left_on='celltype', right_on='type_pre')

# Drop all unnecessary columns
ovi_test = ovi_check[['type_pre', '0.0','0.25', 'weight']].dropna()
ovi_test

Unnamed: 0,type_pre,0.0,0.25,weight
272,CRE075,5,9,225.0
274,CRE077,3,36,210.0
407,FC2B,6,44,435.0
408,FC2C,6,45,287.0
409,FS1A,6,37,1589.0
410,FS1B,6,11,179.0
419,IB017,5,37,562.0
468,LAL022,4,37,201.0
517,LAL134,2,37,358.0
883,SMP007,4,78,172.0


In [17]:
# pull in inputs only
ovi_inputs = pd.read_csv('oviIN/preprocessed_inputs-v1.2.1/preprocessed_nodes.csv', index_col=0)
# only keep columns we need
ovi_inputs = ovi_inputs[['celltype','0.0', '0.05','0.1',  '0.5', '0.75', '1.0']]
# get rid of any celltype that are NaN or None
ovi_inputs = ovi_inputs.dropna(subset=['celltype'])

# get rid of any celltype that are None
ovi_inputs = ovi_inputs[ovi_inputs['celltype'] != 'None']

ovi_inputs




Unnamed: 0_level_0,celltype,0.0,0.05,0.1,0.5,0.75,1.0
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1.003215e+09,CL229,1.0,1.0,1.0,1.0,1.0,1.0
1.005953e+09,IB058,2.0,1.0,1.0,2.0,2.0,2.0
1.006929e+09,CL300,1.0,1.0,1.0,3.0,3.0,3.0
1.007261e+09,CL301,2.0,1.0,1.0,4.0,4.0,4.0
1.008024e+09,FB5N,3.0,2.0,2.0,5.0,5.0,5.0
...,...,...,...,...,...,...,...
9.869747e+08,CRE010,3.0,8.0,8.0,93.0,116.0,124.0
9.871172e+08,LAL010,2.0,7.0,7.0,53.0,63.0,68.0
9.885678e+08,FB4G,2.0,3.0,4.0,16.0,58.0,63.0
9.889091e+08,FB5V,2.0,3.0,4.0,389.0,559.0,572.0


In [22]:
# Run through the function
chi_values = ['0.0', '0.05','0.1',  '0.5', '0.75', '1.0']
create_sankey(ovi_inputs, chi_values, figsize=800, title='OviINr input across resolutions')

         source      target  value
0      1.0_r0.0   1.0_r0.05     39
1      1.0_r0.0  13.0_r0.05    333
2      1.0_r0.0   2.0_r0.05     26
3      1.0_r0.0   3.0_r0.05      3
4      1.0_r0.0   5.0_r0.05     13
..          ...         ...    ...
792  97.0_r0.75  113.0_r1.0      2
793  97.0_r0.75  180.0_r1.0      3
794  97.0_r0.75  196.0_r1.0      6
795  97.0_r0.75   64.0_r1.0      1
796  98.0_r0.75  106.0_r1.0      3

[2068 rows x 3 columns]




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [21]:
# prominent types only
create_sankey(ovi_inputs, chi_values, prominent=True, prom_types=ovi_type_inputs, figsize=600, title='OviINr Prominent Inputs')

         source      target  value
0      1.0_r0.0  13.0_r0.05     15
1      1.0_r0.0   2.0_r0.05      2
2      2.0_r0.0   1.0_r0.05      2
3      2.0_r0.0   3.0_r0.05      2
4      2.0_r0.0   7.0_r0.05      4
..          ...         ...    ...
74  612.0_r0.75  640.0_r1.0      1
75  613.0_r0.75  642.0_r1.0      1
76   64.0_r0.75  479.0_r1.0      1
77   64.0_r0.75   69.0_r1.0      1
78   80.0_r0.75  582.0_r1.0      1

[191 rows x 3 columns]
