# FlyWire Connectome Query

By Charles Xu @ Caltech

We will query the FlyWire connectome using CAVE to obtain connectivity matrices by select single cells or cell types in relavent neuropils.

## Setup

At this point starting up the client is unnecessary because tables will be downloaded and processed locally instead of being queried from the remote. Caveclient does not seem to support query by cell ID.

In [1]:
# import caveclient
import pandas as pd
import seaborn as sns
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import colors
from scipy.io import savemat
import pickle
import sympy as sp
from scipy.linalg import schur
import os
import csv

### User defined variables

In [2]:
# User options
savedata = True                                            # Set to True to save data to files
datapath = '/Users/charlesxu/local/data/mcp/flywire' # MBA

plot = True                                                # Set to True to plot
showplots = True                                           # Set to True to show plots
saveplots = False                                           # Set to True to save plots to files
plotpath = '/Users/charlesxu/local/data/mcp/flywire/plots' # MBA

## Query

### Query by NPOI

In [None]:
## Query neurons forming synapses in select neuropils
# Download the table at https://codex.flywire.ai/api/download?data_product=connections&data_version=783

path_connections = os.path.join(datapath, 'connections.csv')
connections_df = pd.read_csv(path_connections)
connections_df

In [None]:
## Get a list of presynaptic neurons for each neuropil
# Query by non-zero output partners

# List of neuropils of interest
roi = ["EB", "PB", "NO", "FB",
        "LAL_R", "BU_R", "CRE_R", "SIP_R", "SLP_R", "SMP_R",
        "IB_R", "SPS_R", "IPS_R", "AOTU_R", "PVLP_R", "WED_R", "SAD", "LH_R", "MB_CA_R", "MB_ML_R", "MB_PED_R", "MB_VL_R", "AL_R"] # fix MB_R

# Get lists of neurons with non-zero output partners in each neuropil
neurons_roi = {}
for neuropil in roi:
    neurons_roi[neuropil] = connections_df[connections_df["neuropil"] == neuropil]["pre_root_id"].values.tolist()

# Print the number of neurons in each neuropil
for neuropil, neurons in neurons_roi.items():
    print(f"{neuropil}: {len(set(neurons))} neurons")

# Unique list of neurons in all neuropils of interest
noi_set = set()
for neuropil in roi:
    noi_set.update(neurons_roi[neuropil])
noi_list = list(noi_set)

if savedata:
    try:
        with open(datapath + 'noi_list.pkl', 'wb') as f:
            pickle.dump(noi_list, f)
    except:
        print("Error saving noi_list.pkl")

print(f"Aggregated number of unique neurons in all neuropils of interest: {len(noi_list)}")

#### Query for synapses

For each neuron of interest, we will query for its synapses formed in the neuropil of interest and assign a binary weight value (`1` or `-1`) based on their neurotransmitter type.

In [None]:
# ## Query neurons forming synapses in select neuropils
# # Download the table at https://codex.flywire.ai/api/download?data_product=connections&data_version=783

# path_connections = datapath + 'connections.csv' # Path to the connections table
# connections_df = pd.read_csv(path_connections)
# connections_df

In [None]:
print(f"Aggregated number of unique neurons in all neuropils of interest: {len(noi_list)}")
print(f"The neurotransmitters involved are: {connections_df['nt_type'].unique()}")

In [None]:
# Manually assign weights to neurotransmitters
# These weights are used to scale the number of synapses between neurons
nt_weights = {
    'GABA': -1, # References?
    'ACH': 1,
    'GLU': -1,
    'DA': 1,
    'SER': 1,
    'OCT': 1
}

In [None]:
# Filter connections to only include neurons in the list
filtered_connections_df = connections_df[
    (connections_df["pre_root_id"].isin(noi_list)) &
    (connections_df["post_root_id"].isin(noi_list)) &
    (connections_df["neuropil"].isin(roi))
]

del connections_df

filtered_connections_df

### Query by cell type

In [3]:
## Query neurons of select cell types
# Download the table at https://codex.flywire.ai/api/download?data_product=classification&data_version=783

path_classification = os.path.join(datapath, 'classification.csv')
classification_df = pd.read_csv(path_classification)
classification_df

Unnamed: 0,root_id,flow,super_class,class,sub_class,cell_type,hemibrain_type,hemilineage,side,nerve
0,720575940640144768,intrinsic,optic,,L1-5,L1,,,right,
1,720575940630759755,intrinsic,central,,,CB1092,,LB3,left,
2,720575940637932009,intrinsic,central,Kenyon_Cell,,,KCg-m,MBp3,left,
3,720575940606131116,intrinsic,optic,,,Tlp4,,,left,
4,720575940633723091,intrinsic,optic,,,Tm4,,,left,
...,...,...,...,...,...,...,...,...,...,...
139250,720575940632239661,afferent,sensory,olfactory,,ORN_VM6l,ORN_VC5,,right,AN
139251,720575940624423312,intrinsic,optic,,,TmY5a,,,right,
139252,720575940625090916,intrinsic,optic,,,,,,left,
139253,720575940631596103,intrinsic,optic,,,T2a,,,left,


In [4]:
# List of cell types of interest
ctoi = ['Delta7', 'EPG']

# List of all cell types (in hemibrain_type)
col_hemibrain_type = classification_df['hemibrain_type'].str.split(',')
all_hemibrain_type = col_hemibrain_type.explode().unique().astype(str).tolist()

# Verify if the cell types of interest are in the list
for cell_type in ctoi:
    print(f"Matching type found for {cell_type}: {cell_type in all_hemibrain_type}")

# Get the root_id of neurons of the cell types of interest
neurons_ctoi = {}
for cell_type in ctoi:
    neurons_ctoi[cell_type] = classification_df[col_hemibrain_type.apply(lambda x: cell_type in x if isinstance(x, list) else False)]["root_id"].values.tolist()

# Print the number of neurons in each cell type
for cell_type, neurons in neurons_ctoi.items():
    print(f"{cell_type}: {len(neurons)} neurons")

# Unique list of neurons in all cell types of interest
noi_set = set()
for cell_type in ctoi:
    noi_set.update(neurons_ctoi[cell_type])
noi_list = list(noi_set)

# Generate a list of cell types corresponding to each root_id in noi_list
ctoi_list = []
for root_id in noi_list:
    is_in_hemibrain = [classification_df[classification_df["root_id"] == root_id]["hemibrain_type"].str.contains(cell_type, na=False).any() for cell_type in ctoi]
    is_in_cell_type = [classification_df[classification_df["root_id"] == root_id]["cell_type"].str.contains(cell_type, na=False).any() for cell_type in ctoi]
    if any(is_in_hemibrain):
        ctoi_list.append(ctoi[is_in_hemibrain.index(True)])
    elif any(is_in_cell_type):
        ctoi_list.append(ctoi[is_in_cell_type.index(True)])
    else:
        ctoi_list.append(None)

print(f"Generated ctoi_list with {len(ctoi_list)} entries")

Matching type found for Delta7: True
Matching type found for EPG: True
Delta7: 40 neurons
EPG: 47 neurons
Generated ctoi_list with 87 entries


In [5]:
## Query neurons forming synapses in select neuropils
# Download the table at https://codex.flywire.ai/api/download?data_product=connections&data_version=783

# path_connections = os.path.join(datapath, 'connections_filtered.csv')
path_connections = os.path.join(datapath, 'ring_extend', 'connections.csv')
connections_df = pd.read_csv(path_connections)
connections_df

Unnamed: 0,From,To,Neuropil,Synapses,Neuro Transmitter
0,720575940631125036,720575940611964882,EB,155,ACH
1,720575940631125036,720575940611964882,GA_L,1,ACH
2,720575940631125036,720575940611964882,NO,49,ACH
3,720575940626137895,720575940629459441,EB,201,ACH
4,720575940604801993,720575940629902206,FB,1,ACH
...,...,...,...,...,...
14645,720575940639088105,720575940632736703,PB,5,GLUT
14646,720575940628733292,720575940613836438,EB,5,GABA
14647,720575940627140551,720575940625528638,EB,5,ACH
14648,720575940633538724,720575940625351354,EB,5,ACH


In [6]:
# Extract rows where 'From' or 'To' matches any entry in noi_list
matched_connections_df = connections_df[
    (connections_df['From'].isin(noi_list)) | (connections_df['To'].isin(noi_list))
]

matched_connections_df

Unnamed: 0,From,To,Neuropil,Synapses,Neuro Transmitter
0,720575940631125036,720575940611964882,EB,155,ACH
1,720575940631125036,720575940611964882,GA_L,1,ACH
2,720575940631125036,720575940611964882,NO,49,ACH
3,720575940626137895,720575940629459441,EB,201,ACH
4,720575940604801993,720575940629902206,FB,1,ACH
...,...,...,...,...,...
14645,720575940639088105,720575940632736703,PB,5,GLUT
14646,720575940628733292,720575940613836438,EB,5,GABA
14647,720575940627140551,720575940625528638,EB,5,ACH
14648,720575940633538724,720575940625351354,EB,5,ACH


In [7]:
# Get all unique entries in From and To, combined
noi_partners_list = pd.unique(
    matched_connections_df[['From', 'To']].values.ravel()
)
print(f"Number of unique neuron IDs: {len(noi_partners_list)}")
noi_partners_list[:10]

Number of unique neuron IDs: 863


array([720575940631125036, 720575940611964882, 720575940626137895,
       720575940629459441, 720575940604801993, 720575940629902206,
       720575940608722371, 720575940631848791, 720575940628843458,
       720575940620027515])

In [8]:
# Generate a list of cell types corresponding to each root_id in noi_partners_list
ctoi_partners_list = []
no_ctoi = []
for i, root_id in enumerate(noi_partners_list):
    row = classification_df[classification_df["root_id"] == root_id]
    if not row.empty:
        # Try to match with ctoi using hemibrain_type or cell_type columns
        hemibrain_types = str(row.iloc[0]["hemibrain_type"]).split(",") if pd.notnull(row.iloc[0]["hemibrain_type"]) else []
        cell_types = str(row.iloc[0]["cell_type"]).split(",") if pd.notnull(row.iloc[0]["cell_type"]) else []
        if hemibrain_types:
            ctoi_partners_list.append(hemibrain_types[0])
        elif cell_types:
            ctoi_partners_list.append(cell_types[0])
        else:
            print(f"Neuron {root_id} not assigned to any cell type.")
            ctoi_partners_list.append(None)
            no_ctoi.append(i)
    else:
        print(f"Neuron {root_id} not found in classification_df.")
        ctoi_partners_list.append(None)
        no_ctoi.append(i)

print(f"Generated ctoi_partners_list with {len(ctoi_partners_list)} entries")
ctoi_partners_list[:10]

Neuron 720575940619574181 not assigned to any cell type.
Neuron 720575940630199759 not assigned to any cell type.
Neuron 720575940654467233 not assigned to any cell type.
Neuron 720575940630021115 not assigned to any cell type.
Neuron 720575940619489088 not assigned to any cell type.
Neuron 720575940615673949 not assigned to any cell type.
Neuron 720575940616040148 not assigned to any cell type.
Neuron 720575940624156391 not assigned to any cell type.
Generated ctoi_partners_list with 863 entries


['EPG', 'EL', 'EPG', 'EL', 'EPG', 'EL', 'EPG', 'EL', 'EPG', 'ExR6']

In [9]:
# Remove entries specified by no_ctoi from noi_partners_list and ctoi_partners_list
noi_partners_list_filtered = np.delete(noi_partners_list, no_ctoi)
ctoi_partners_list_filtered = [ctoi_partners_list[i] for i in range(len(ctoi_partners_list)) if i not in no_ctoi]

print(f"Filtered noi_partners_list length: {len(noi_partners_list_filtered)}")
print(f"Filtered ctoi_partners_list length: {len(ctoi_partners_list_filtered)}")

Filtered noi_partners_list length: 855
Filtered ctoi_partners_list length: 855


In [10]:
noi_list = noi_partners_list_filtered.tolist()
ctoi_list = ctoi_partners_list_filtered

len(noi_list), len(ctoi_list)

(855, 855)

In [11]:
if savedata:
    try:
        with open(os.path.join(datapath, 'noi_list.csv'), 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Neuron ID'])  # Add a header
            for neuron_id in noi_list:
                writer.writerow([neuron_id])
    except Exception as e:
        print(f"Error saving noi_list.csv: {e}")
        
    try:
        with open(os.path.join(datapath, 'ctoi_list.csv'), 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Cell Type'])  # Add a header
            for cell_type in ctoi_list:
                writer.writerow([cell_type])
    except Exception as e:
        print(f"Error saving ctoi_list.csv: {e}")

#### Query for synapses

For each neuron of interest, we will query for its synapses formed in the neuropil of interest and assign a binary weight value (`1` or `-1`) based on their neurotransmitter type.

In [12]:
## Query neurons forming synapses in select neuropils
# Download the table at https://codex.flywire.ai/api/download?data_product=connections&data_version=783

path_connections = os.path.join(datapath, 'ring_extend', 'connections.csv')
connections_df = pd.read_csv(path_connections)
connections_df

Unnamed: 0,From,To,Neuropil,Synapses,Neuro Transmitter
0,720575940631125036,720575940611964882,EB,155,ACH
1,720575940631125036,720575940611964882,GA_L,1,ACH
2,720575940631125036,720575940611964882,NO,49,ACH
3,720575940626137895,720575940629459441,EB,201,ACH
4,720575940604801993,720575940629902206,FB,1,ACH
...,...,...,...,...,...
14645,720575940639088105,720575940632736703,PB,5,GLUT
14646,720575940628733292,720575940613836438,EB,5,GABA
14647,720575940627140551,720575940625528638,EB,5,ACH
14648,720575940633538724,720575940625351354,EB,5,ACH


In [13]:
print(f"Aggregated number of unique neurons in all neuropils of interest: {len(noi_list)}")
print(f"The neurotransmitters involved are: {connections_df['Neuro Transmitter'].unique()}")

Aggregated number of unique neurons in all neuropils of interest: 855
The neurotransmitters involved are: ['ACH' 'GLUT' 'GABA' 'SER' 'OCT' 'DA']


In [14]:
# Manually assign weights to neurotransmitters
# These weights are used to scale the number of synapses between neurons
nt_weights = {
    'GABA': -1, # References?
    'ACH': 1,
    'GLUT': -1,
    'DA': 1,
    'SER': 1,
    'OCT': 1
}

In [15]:
# Filter connections to only include neurons in the list
filtered_connections_df = connections_df[
    (connections_df["From"].isin(noi_list)) &
    (connections_df["To"].isin(noi_list))
]

del connections_df

filtered_connections_df

Unnamed: 0,From,To,Neuropil,Synapses,Neuro Transmitter
0,720575940631125036,720575940611964882,EB,155,ACH
1,720575940631125036,720575940611964882,GA_L,1,ACH
2,720575940631125036,720575940611964882,NO,49,ACH
3,720575940626137895,720575940629459441,EB,201,ACH
4,720575940604801993,720575940629902206,FB,1,ACH
...,...,...,...,...,...
14645,720575940639088105,720575940632736703,PB,5,GLUT
14646,720575940628733292,720575940613836438,EB,5,GABA
14647,720575940627140551,720575940625528638,EB,5,ACH
14648,720575940633538724,720575940625351354,EB,5,ACH


## Connectivity matrix

### Single neuron connectivity

In [16]:
## Generate a connectivity matrix

# Calculate the connectivity between each pair of neurons
filtered_connections_df.loc[:, 'connectivity'] = filtered_connections_df['Synapses'] * filtered_connections_df['Neuro Transmitter'].map(nt_weights)

# Group the connections by neuron pair
grouped_connections_df = filtered_connections_df.groupby(['From', 'To']).agg({'connectivity': 'sum'}).reset_index()

# Connectivity matrix
n_neurons = len(noi_list)
connectivity_matrix = np.zeros((n_neurons, n_neurons))
for _, row in grouped_connections_df.iterrows():
    pre_id = row['From']
    post_id = row['To']
    conn = row['connectivity']
    pre_idx = noi_list.index(pre_id)
    post_idx = noi_list.index(post_id)
    connectivity_matrix[pre_idx, post_idx] = conn

del grouped_connections_df

if savedata:
    connectivity_df = pd.DataFrame(connectivity_matrix, index=noi_list, columns=noi_list)
    connectivity_df.to_csv(os.path.join(datapath, 'connectivity_matrix.csv'))

connectivity_matrix

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_connections_df.loc[:, 'connectivity'] = filtered_connections_df['Synapses'] * filtered_connections_df['Neuro Transmitter'].map(nt_weights)


array([[  0., 205.,   0., ...,   0.,   0.,   0.],
       [-35.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       ...,
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.]], shape=(855, 855))

In [19]:
# Function to detect the sign of the first non-zero value in a row
def detect_first_nonzero_sign(row):
    for value in row:
        if value != 0:
            return np.sign(value)
    return 0  # Return 0 if the row is all zeros

# Function to check if the row contains a value of the opposite sign
def contains_opposite_sign(row, first_sign):
    for value in row:
        if value != 0 and np.sign(value) != first_sign:
            return True
    return False

# Initialize an array to store the logical results
logical_results = np.zeros(n_neurons, dtype=bool)

# Iterate through each row in the connectivity matrix
for i, row in enumerate(connectivity_matrix):
    first_sign = detect_first_nonzero_sign(row)
    if first_sign != 0:
        has_opposite_sign = contains_opposite_sign(row, first_sign)
        logical_results[i] = has_opposite_sign
    else:
        logical_results[i] = False

# Print the indices where logical_results is True
true_indices = np.where(logical_results)[0]
print(f"Number of neurons with mixed signs: {len(true_indices)}")
print(f"Indices of neurons with mixed signs: {true_indices}")

Number of neurons with mixed signs: 0
Indices of neurons with mixed signs: []


In [21]:
# Create an empty matrix to store the nt_type
nt_type_matrix = np.empty((n_neurons, n_neurons), dtype=object)

# Fill the matrix with the corresponding nt_type
for _, row in filtered_connections_df.iterrows():
    pre_id = row['From']
    post_id = row['To']
    nt_type = row['Neuro Transmitter']
    pre_idx = noi_list.index(pre_id)
    post_idx = noi_list.index(post_id)
    nt_type_matrix[pre_idx, post_idx] = nt_type

if savedata:
    nt_type_df = pd.DataFrame(nt_type_matrix, index=ctoi_list, columns=ctoi_list)
    nt_type_df.to_csv(os.path.join(datapath, 'nt_type_matrix.csv'))

nt_type_matrix

array([[None, 'ACH', None, ..., None, None, None],
       ['GABA', None, None, ..., None, None, None],
       [None, None, None, ..., None, None, None],
       ...,
       [None, None, None, ..., None, None, None],
       [None, None, None, ..., None, None, None],
       [None, None, None, ..., None, None, None]],
      shape=(855, 855), dtype=object)

In [None]:
## Save the connectivity matrix

# Save to a numpy file
# Read with: connectivity_matrix = np.load('connectivity_matrix.npy')
# if savedata:
#     np.save(datapath + 'connectivity_matrix.npy', connectivity_matrix)

# Save to a CSV file (requires a large memory)
# Read with: connectivity_df = pd.read_csv('connectivity_matrix.csv', index_col=0)
# if savedata:
#     connectivity_df = pd.DataFrame(connectivity_matrix, index=noi_list, columns=noi_list)
#     connectivity_df.to_csv(datapath + 'connectivity_matrix.csv')

# Save to a MATLAB file
# Read with: connectivity_matrix = scipy.io.loadmat('connectivity_matrix.mat')['connectivity_matrix']
# if savedata:
#     try:
#         savemat(datapath + 'connectivity_matrix.mat', {'connectivity_matrix': connectivity_matrix})
#     except:
#         print("Error saving connectivity_matrix.mat")

# Save to a pickle file
# Read with: with open('connectivity_matrix.pkl', 'rb') as f: connectivity_matrix = pickle.load(f)
# if savedata:
#     connectivity_matrix_list = connectivity_matrix.tolist()
#     with open(datapath + 'connectivity_matrix.pkl', 'wb') as f:
#         pickle.dump(connectivity_matrix_list, f)

In [None]:
# Load the connectivity matrix
# connectivity_matrix = np.load(datapath + 'connectivity_matrix.npy')

### Visualisation

In [None]:
## Visualize the connectivity matrix

# Plot the connectivity matrix as a heatmap
if plot:
    plt.figure(figsize=(10, 8))
    norm = colors.TwoSlopeNorm(vcenter=0)
    plt.imshow(connectivity_matrix, cmap='bwr', aspect='auto', norm=norm)
    plt.colorbar()
    plt.title('Connectivity Matrix Heatmap')
    plt.xlabel('Postsynaptic Neuron')
    plt.ylabel('Presynaptic Neuron')
    if showplots:
        plt.show()
    else:
        plt.close()
    if saveplots:
        plt.savefig(plotpath + 'connectivity_matrix_heatmap.png')

In [None]:
# Plot the distribution of connectivity strengths
if plot:
    connectivity_values = connectivity_matrix.flatten()
    nz_connectivity_values = connectivity_values[connectivity_values != 0]
    plt.figure(figsize=(8, 6))
    sns.histplot(nz_connectivity_values, bins=30)
    plt.title('Distribution of Connectivity Strengths')
    plt.xlabel('Connectivity Strength')
    plt.ylabel('Frequency')
    if showplots:
        plt.show()
    else:
        plt.close()
    if saveplots:
        plt.savefig(plotpath + 'connectivity_strengths_distribution.png')