# Matching DNs between the brain and VNC

Specifically, we map DNs between the FAFB and MANC connectomes using data from [Stürner _et al._, 2024](https://doi.org/10.1101/2024.06.04.596633) and the BANC connectome

## Get the DN names for each connectome

In [1]:
import os

import numpy as np
import pandas as pd

import vnc_networks

connections_fafb = vnc_networks.connections.Connections(CR = vnc_networks.connectome_reader.FAFB_v783())
connections_manc = vnc_networks.connections.Connections(CR = vnc_networks.connectome_reader.MANC_v_1_2())

Attribute class_1 not found in the graph. Adding it.
Attribute class_1 not found in the graph. Adding it.


In [2]:
dn_names_fafb = connections_fafb.get_node_attribute(
    connections_fafb.get_neuron_ids({"class_1": "descending"}),
	"name"
)
dn_names_fafb_filtered = [dn for dn in dn_names_fafb if isinstance(dn, str)]
dn_names_fafb_unique = np.unique(dn_names_fafb_filtered)
print(f"FAFB has {len(dn_names_fafb)} DNs, of which {len(dn_names_fafb_filtered)} have been assigned a name. There are {len(dn_names_fafb_unique)} unique names.")

Attribute name not found in the graph. Adding it.
FAFB has 1269 DNs, of which 1170 have been assigned a name. There are 436 unique names.


In [3]:
dn_names_manc = connections_manc.get_node_attribute(
    connections_manc.get_neuron_ids({"class_1": "descending"}), "name"
)
dn_names_manc_filtered = [dn for dn in dn_names_manc if isinstance(dn, str)]
dn_names_manc_unique = np.unique(dn_names_manc_filtered)
print(
    f"MANC has {len(dn_names_manc)} DNs, of which {len(dn_names_fafb_filtered)} have been assigned a name. There are {len(dn_names_manc_unique)} unique names."
)

Attribute name not found in the graph. Adding it.
MANC has 1328 DNs, of which 1170 have been assigned a name. There are 490 unique names.


## Stürner _et al._ matching table

In [4]:
!wget https://www.biorxiv.org/content/biorxiv/early/2024/06/28/2024.06.04.596633/DC2/embed/media-2.xlsx

--2025-04-02 18:24:01--  https://www.biorxiv.org/content/biorxiv/early/2024/06/28/2024.06.04.596633/DC2/embed/media-2.xlsx
Resolving www.biorxiv.org (www.biorxiv.org)... 2606:4700:4400::6812:2253, 2606:4700:4400::ac40:99ad, 104.18.34.83, ...
Connecting to www.biorxiv.org (www.biorxiv.org)|2606:4700:4400::6812:2253|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/vnd.openxmlformats-officedocument.spreadsheetml.sheet]
Saving to: ‘media-2.xlsx.1’

media-2.xlsx.1          [  <=>               ]   1.03M  2.77MB/s    in 0.4s    

2025-04-02 18:24:03 (2.77 MB/s) - ‘media-2.xlsx.1’ saved [1084988]



In [4]:
dn_match_data_stuerner = pd.read_excel(
    "media-2.xlsx",
    sheet_name="MANC_DNs",
)
dn_match_data_stuerner

Unnamed: 0,bodyid,group,type,systematic_type,root_side,long_tract,predicted_nt,predicted_nt_prob,neuropil,synonyms,type_with_new,soma_type,FANC_svID,FANC_match,FANC_group,confidence_fanc_match_1_5
0,15424,15424,oviDN,DNad001,RHS,MDA,ACH,0.943492,ad,,oviDN,P,,,,
1,15827,15424,oviDN,DNad001,LHS,MDA,ACH,0.968009,ad,,oviDN,P,,,,
2,13020,12574,DNg66,DNad002,RHS,MDA,unk,0.570516,ad,,DNg66,G,7.339707e+16,6.485183e+17,73397074884148993,5.0
3,12574,12574,DNg66,DNad002,LHS,MDA,unk,0.564406,ad,,DNg66,G,7.339707e+16,6.485183e+17,73397074884148993,5.0
4,27771,14171,DNp58,DNad003,LHS,CVL,ACH,0.826446,ad,,DNp58,P,7.325634e+16,6.485183e+17,NO MATCH,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1323,44166,44166,DNxn189,DNxn189,RHS,VTV,unk,0.425634,xn,,DNxn189,,,,,
1324,49688,49688,DNxn190,DNxn190,RHS,none,unk,0.495362,xn,,DNxn190,,,,,
1325,24311,24311,DNxn191,DNxn191,LHS,MDA,ACH,0.931584,xn,,DNxn191,,,,,
1326,49317,49317,DNxn192,DNxn192,LHS,none,GLUT,0.887326,xn,,DNxn192,,,,,


Check that: 
* `systematic_type` are the MANC names
* `type` are FAFB names if a match was found, otherwise MANC names still

The FAFB names might not match those we have above becomes synonyms are used in the table - like oviDN instead of DNpe038

In [5]:
assert dn_match_data_stuerner.systematic_type.isin(dn_names_manc_unique).sum() == len(dn_match_data_stuerner), "Not all the systematic_type values are MANC DN names"

In [6]:
n_matches = (dn_match_data_stuerner.systematic_type != dn_match_data_stuerner.type).sum()
print(f"{n_matches}/{len(dn_match_data_stuerner)} ({n_matches / len(dn_match_data_stuerner) * 100:.1f}%) MANC neurons are matched to FAFB names")

613/1328 (46.2%) MANC neurons are matched to FAFB names


Let's build a MANC -> FAFB map

In [7]:
manc_to_fafb_map_stuerner = {}
for manc_dn in dn_names_manc_unique:
    manc_to_fafb_map_stuerner[manc_dn] = [
        dn
        for dn in dn_match_data_stuerner[
            dn_match_data_stuerner.systematic_type == manc_dn
        ].type.unique()
        if dn != manc_dn
    ]


In [8]:
print(
    f"{sum([len(matches) > 0 for matches in manc_to_fafb_map_stuerner.values()])}/{len(dn_names_manc_unique)} of the unique MANC names have FAFB matches"
)
print(
    f"Of these, {sum([len([m for m in matches if m in dn_names_fafb_unique]) > 0 for matches in manc_to_fafb_map_stuerner.values()])} have matches to the proper FAFB DN names"
)

225/490 of the unique MANC names have FAFB matches
Of these, 150 have matches to the proper FAFB DN names


## BANC matching

Ask Jasper where to get the latest `cell_info.parquet` file from the BANC connectome which includes all annotations.

In [9]:
banc_cell_info = pd.read_parquet(os.path.expanduser("~/Downloads/banc_v514/cell_info.parquet"))
banc_cell_info

Unnamed: 0,id,created,superceded_id,valid,tag,tag2,user_id,pt_supervoxel_id,pt_root_id,pt_position
0,12874,2024-05-30 02:05:23.483876+00:00,,t,glia,primary class,4147,76983819387277319,720575941450508103,"[144967, 13874, 3795]"
1,81,2023-11-16 13:12:49.437296+00:00,,t,descending,anterior-posterior projection pattern,2660,75930968082991019,720575941530295013,"[114431, 92500, 2996]"
2,10264,2024-05-08 01:08:13.146235+00:00,,t,glia,primary class,5039,76632044654585125,720575941539930460,"[134672, 14576, 4828]"
3,324,2023-12-08 09:43:28.706682+00:00,,t,sensory neuron,primary class,8,76001336759980256,720575941399010595,"[116268, 92500, 2719]"
4,92,2023-11-19 11:27:05.528941+00:00,,t,descending,anterior-posterior projection pattern,2660,76001336827176493,720575941554728868,"[115643, 92500, 2999]"
...,...,...,...,...,...,...,...,...,...,...
293142,297757,2025-03-05 22:50:51.738400+00:00,,t,soma in brain,soma region,2660,79236305455825832,720575941493186736,"[209792, 33632, 181]"
293143,297758,2025-03-05 22:50:51.739005+00:00,,t,soma in brain,soma region,2660,79236305455818584,720575941463571213,"[209488, 34416, 177]"
293144,297759,2025-03-05 22:50:51.739636+00:00,,t,soma in brain,soma region,2660,79166074150693633,720575941574389768,"[207232, 37696, 224]"
293145,297740,2025-03-05 22:50:51.727567+00:00,,t,soma in brain,soma region,2660,79306674200023841,720575941504470210,"[211744, 32928, 214]"


For all the rows, `tag2` is the type of tag and `tag` is the corresponding value, all applied to the specified `pt_root_id`

In [10]:
banc_dn_ids = set(banc_cell_info[banc_cell_info.tag == "descending"].pt_root_id)

In [11]:
banc_dn_annotations = banc_cell_info[
    banc_cell_info.pt_root_id.isin(banc_dn_ids)
    & (banc_cell_info.tag2 == "neuron identity")
]
banc_dn_annotations

Unnamed: 0,id,created,superceded_id,valid,tag,tag2,user_id,pt_supervoxel_id,pt_root_id,pt_position
237,338,2023-12-13 22:33:56.479369+00:00,,t,giant fiber,neuron identity,2660,76142074315554489,720575941540215501,"[120421, 92500, 3002]"
1288,337,2023-12-13 22:30:26.067496+00:00,,t,giant fiber,neuron identity,2660,76071705571426918,720575941459097503,"[117976, 92500, 3015]"
2119,74996,2024-11-24 22:49:57.484901+00:00,,t,DNxl021,neuron identity,355,76071705570901483,720575941588971076,"[117503, 92500, 2830]"
2393,74571,2024-11-24 22:49:57.223431+00:00,,t,DNxl106,neuron identity,355,76282811736337897,720575941507243589,"[124387, 92500, 2655]"
2577,74818,2024-11-24 22:49:57.375055+00:00,,t,DNxl113,neuron identity,355,76001336759769357,720575941519502040,"[114745, 92500, 2653]"
...,...,...,...,...,...,...,...,...,...,...
230949,18966,2024-07-08 12:40:31.327311+00:00,,t,DNpe028,neuron identity,847,76071705504214539,720575941471139191,"[117916, 92500, 2736]"
232621,129416,2025-02-21 23:43:58.529831+00:00,,t,DNxn187,neuron identity,355,76142074248164002,720575941459053471,"[120652, 92500, 2661]"
232622,124221,2025-02-21 23:43:55.358110+00:00,,t,DNpe031,neuron identity,355,76142074248164002,720575941459053471,"[120652, 92500, 2661]"
233642,18223,2024-07-01 18:36:49.088507+00:00,,t,DNpe053,neuron identity,847,76142074315169489,720575941508499234,"[119588, 92500, 2870]"


In [12]:
print(
    f"Of these {len(banc_dn_annotations)} annotations, {banc_dn_annotations.tag.isin(dn_names_manc_unique).sum()} are MANC names and {banc_dn_annotations.tag.isin(dn_names_fafb_unique).sum()} and FAFB names."
)

Of these 2097 annotations, 596 are MANC names and 1164 and FAFB names.


Let's build a MANC -> FAFB map

In [13]:
manc_to_fafb_map_banc = {}
for manc_dn in dn_names_manc_unique:
    manc_to_fafb_map_banc[manc_dn] = [
        dn
        for dn in banc_dn_annotations[
            banc_dn_annotations.pt_root_id.isin(
                banc_dn_annotations[banc_dn_annotations.tag == manc_dn].pt_root_id
            )
        ].tag.unique()
        if dn != manc_dn
    ]


In [14]:
print(
    f"{sum([len(matches) > 0 for matches in manc_to_fafb_map_banc.values()])}/{len(dn_names_manc_unique)} of the unique MANC names have FAFB matches from the BANC"
)
print(
    f"Of these, {sum([len([m for m in matches if m in dn_names_fafb_unique]) > 0 for matches in manc_to_fafb_map_banc.values()])} have matches to the proper FAFB DN names"
)

219/490 of the unique MANC names have FAFB matches from the BANC
Of these, 209 have matches to the proper FAFB DN names


## Combining the two maps

In [15]:
manc_to_fafb_map_combined = {
    manc_dn: manc_to_fafb_map_stuerner[manc_dn] + manc_to_fafb_map_banc[manc_dn]
    for manc_dn in dn_names_manc_unique
}

In [16]:
print(
    f"{sum([len(matches) > 0 for matches in manc_to_fafb_map_combined.values()])}/{len(dn_names_manc_unique)} of the unique MANC names have FAFB matches from the both sources"
)
print(
    f"Of these, {sum([len([m for m in matches if m in dn_names_fafb_unique]) > 0 for matches in manc_to_fafb_map_combined.values()])} have matches to the proper FAFB DN names"
)

442/490 of the unique MANC names have FAFB matches from the both sources
Of these, 359 have matches to the proper FAFB DN names


We can now map ~90% of MANC DNs to FAFB. Let's export this as a nice table

In [17]:
rows = []
for manc_dn in dn_names_manc_unique:
    for match in manc_to_fafb_map_stuerner[manc_dn]:
        rows.append((manc_dn, match, "Stuerner et al."))
    for match in manc_to_fafb_map_banc[manc_dn]:
        rows.append((manc_dn, match, "BANC v514"))

    if len(manc_to_fafb_map_stuerner[manc_dn]) == 0 and len(manc_to_fafb_map_banc[manc_dn]) == 0:
        rows.append((manc_dn, None, None))

manc_to_fafb_df = pd.DataFrame(rows, columns=["MANC DN name", "FAFB DN name", "source"])
manc_to_fafb_df.to_csv("manc_to_fafb.csv", index=False)
manc_to_fafb_df

Unnamed: 0,MANC DN name,FAFB DN name,source
0,DNad001,oviDN,Stuerner et al.
1,DNad002,DNg66,Stuerner et al.
2,DNad003,DNp58,Stuerner et al.
3,DNad004,DNge005,BANC v514
4,DNad005,DNge151,BANC v514
...,...,...,...
594,DNxn189,DNpe052,BANC v514
595,DNxn190,,
596,DNxn191,,
597,DNxn192,,


If we check the matches that aren't proper FAFB names, we can see that most of them look OK. So we don't try to filter out the outliers.

In [18]:
manc_to_fafb_df[~manc_to_fafb_df["FAFB DN name"].isin(dn_names_fafb_unique)][
    "FAFB DN name"
].unique()

array(['oviDN', 'DNg12', 'DNg52', None, 'DNxl067',
       'synapseless descending', 'XD', 'DNge061', 'PS225?', 'DNa03',
       'DNp02', 'DNp04', 'DNp06', 'DNg01', 'DNnt008', 'DNnt007', 'DNp16',
       'DNa07', 'DNb02', 'DNb03', 'DNg02', 'DNg05', 'DNg06', 'DNg08',
       'DNg18', 'DNg36', 'DNp03', 'DNp15', 'DNp31', 'DNg92', 'DNg110',
       'DNx02', 'PS225', 'DNge175', 'DNa08', 'DNa02', 'DNb06', 'DNp09',
       'DNp10', 'DNp32', 'DNp34', 'DNa13', 'DNb08', 'DNg74', 'DNpe029',
       'DNfl021', 'LN-DN2', 'DNge0128', 'VES028', 'DNge074', 'DNa09',
       'DNa10', 'aSP22', 'DNb01', 'DNxn009', 'DNxn010', 'DNb05', 'DNg30',
       'DNp05', 'DNp07', 'DNp11', 'DNp13', 'DNp18', 'DNp19', 'DNp20',
       'DNp21', 'DNp23', 'DNp24', 'DNp25', 'DNp26', 'DNp27', 'DNp28',
       'pMP2', 'pIP9', 'pIP10', 'MDN', 'DNx01', 'DNp42', 'DNp44', 'DNp49',
       'DNb04', 'AVLP499', 'PS166', 'PS211', 'DNg102'], dtype=object)

## Matching FAFB to MANC

In [19]:
fafb_to_manc_map_stuerner = {}
for fafb_dn in dn_names_fafb_unique:
    fafb_to_manc_map_stuerner[fafb_dn] = [
        dn
        for dn in dn_match_data_stuerner[
            dn_match_data_stuerner.type == fafb_dn
        ].systematic_type.unique()
        if dn != fafb_dn
    ]

print(
    f"{sum([len(matches) > 0 for matches in fafb_to_manc_map_stuerner.values()])}/{len(dn_names_fafb_unique)} of the unique FAFB names have MANC matches from Stürner et al."
)
print(
    f"Of these, {sum([len([m for m in matches if m in dn_names_manc_unique]) > 0 for matches in fafb_to_manc_map_stuerner.values()])} have matches to the proper MANC DN names"
)

147/436 of the unique FAFB names have MANC matches from Stürner et al.
Of these, 147 have matches to the proper MANC DN names


This time we enforce that the matches be proper MANC names, otherwise we tend to get mostly brain based synonyms for the DNs

In [20]:
fafb_to_manc_map_banc = {}
for fafb_dn in dn_names_fafb_unique:
    fafb_to_manc_map_banc[fafb_dn] = [
        dn
        for dn in banc_dn_annotations[
            banc_dn_annotations.pt_root_id.isin(
                banc_dn_annotations[banc_dn_annotations.tag == fafb_dn].pt_root_id
            )
        ].tag.unique()
        if dn in dn_names_manc_unique
    ]

print(
    f"{sum([len(matches) > 0 for matches in fafb_to_manc_map_banc.values()])}/{len(dn_names_fafb_unique)} of the unique FAFB names have MANC matches from the BANC."
)
print(
    f"Of these, {sum([len([m for m in matches if m in dn_names_manc_unique]) > 0 for matches in fafb_to_manc_map_banc.values()])} have matches to the proper MANC DN names"
)

222/436 of the unique FAFB names have MANC matches from the BANC.
Of these, 222 have matches to the proper MANC DN names


In [21]:
fafb_to_manc_map_combined = {
    fafb_dn: fafb_to_manc_map_stuerner[fafb_dn] + fafb_to_manc_map_banc[fafb_dn]
    for fafb_dn in dn_names_fafb_unique
}
print(
    f"{sum([len(matches) > 0 for matches in fafb_to_manc_map_combined.values()])}/{len(dn_names_fafb_unique)} of the unique FAFB names have MANC matches from the BANC."
)
print(
    f"Of these, {sum([len([m for m in matches if m in dn_names_manc_unique]) > 0 for matches in fafb_to_manc_map_combined.values()])} have matches to the proper MANC DN names"
)

353/436 of the unique FAFB names have MANC matches from the BANC.
Of these, 353 have matches to the proper MANC DN names


We get ~80% matches

In [22]:
rows = []
for fafb_dn in dn_names_fafb_unique:
    for match in fafb_to_manc_map_stuerner[fafb_dn]:
        rows.append((fafb_dn, match, "Stuerner et al."))
    for match in fafb_to_manc_map_banc[fafb_dn]:
        rows.append((fafb_dn, match, "BANC v514"))

    if (
        len(fafb_to_manc_map_stuerner[fafb_dn]) == 0
        and len(fafb_to_manc_map_banc[fafb_dn]) == 0
    ):
        rows.append((fafb_dn, None, None))

fafb_to_manc_df = pd.DataFrame(rows, columns=["FAFB DN name", "MANC DN name", "source"])
fafb_to_manc_df.to_csv("fafb_to_manc.csv", index=False)
fafb_to_manc_df


Unnamed: 0,FAFB DN name,MANC DN name,source
0,DNa01,DNxl001,Stuerner et al.
1,DNa04,DNut001,Stuerner et al.
2,DNa05,DNut002,Stuerner et al.
3,DNa05,DNut037,BANC v514
4,DNa06,DNxn001,Stuerner et al.
...,...,...,...
524,DNpe054,DNut042,BANC v514
525,DNpe055,DNxn102,BANC v514
526,DNpe056,DNxn168,BANC v514
527,oviDNa_a,,


## How many individual DNs do we have matches for?

We match 93% of MANC DNs to FAFB, and 82% of FAFB DNs to MANC

In [23]:
sum(
    [len(manc_to_fafb_map_combined[dn_manc]) > 0 for dn_manc in dn_names_manc_filtered]
) / len(dn_names_manc_filtered)


0.9269578313253012

In [24]:
sum(
    [len(fafb_to_manc_map_combined[dn_fafb]) > 0 for dn_fafb in dn_names_fafb_filtered]
) / len(dn_names_fafb_filtered)


0.8247863247863247

## What if we care about which side the DN is on?

### Stürner _et al._

In [25]:
dn_match_data_stuerner

Unnamed: 0,bodyid,group,type,systematic_type,root_side,long_tract,predicted_nt,predicted_nt_prob,neuropil,synonyms,type_with_new,soma_type,FANC_svID,FANC_match,FANC_group,confidence_fanc_match_1_5
0,15424,15424,oviDN,DNad001,RHS,MDA,ACH,0.943492,ad,,oviDN,P,,,,
1,15827,15424,oviDN,DNad001,LHS,MDA,ACH,0.968009,ad,,oviDN,P,,,,
2,13020,12574,DNg66,DNad002,RHS,MDA,unk,0.570516,ad,,DNg66,G,7.339707e+16,6.485183e+17,73397074884148993,5.0
3,12574,12574,DNg66,DNad002,LHS,MDA,unk,0.564406,ad,,DNg66,G,7.339707e+16,6.485183e+17,73397074884148993,5.0
4,27771,14171,DNp58,DNad003,LHS,CVL,ACH,0.826446,ad,,DNp58,P,7.325634e+16,6.485183e+17,NO MATCH,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1323,44166,44166,DNxn189,DNxn189,RHS,VTV,unk,0.425634,xn,,DNxn189,,,,,
1324,49688,49688,DNxn190,DNxn190,RHS,none,unk,0.495362,xn,,DNxn190,,,,,
1325,24311,24311,DNxn191,DNxn191,LHS,MDA,ACH,0.931584,xn,,DNxn191,,,,,
1326,49317,49317,DNxn192,DNxn192,LHS,none,GLUT,0.887326,xn,,DNxn192,,,,,


In [None]:
import collections

manc_to_fafb_map_stuerner_sides = collections.defaultdict(lambda: collections.defaultdict(list))
for _, row in dn_match_data_stuerner.iterrows():
    if row.type != row.systematic_type:
        # then we have a match
        manc_to_fafb_map_stuerner_sides[row.systematic_type][row.root_side].append(row.type)
for neuron, matches in manc_to_fafb_map_stuerner_sides.items():
    for side, neurons in matches.items():
        manc_to_fafb_map_stuerner_sides[neuron][side] = list(set(neurons))

print(
    f"{sum([len(matches) > 0 for matches in manc_to_fafb_map_stuerner_sides.values()])}/{len(dn_names_manc_unique)} of the unique MANC names have FAFB matches"
)
# print(
#     f"Of these, {sum([len([m for mm in matches.values() for m in mm.values() if m in dn_names_fafb_unique]) > 0 for matches in manc_to_fafb_map_stuerner_sides.values()])} have matches to the proper FAFB DN names"
# )

225/490 of the unique MANC names have FAFB matches


### BANC

In [27]:
banc_dn_info = pd.read_parquet(
    os.path.expanduser("~/Downloads/banc_v514/neck_connective_y92500.parquet")
)
banc_dn_info["pt_root_id"] = banc_dn_info["pt_root_id_v514"]
banc_dn_info = banc_dn_info[banc_dn_info.pt_root_id.isin(banc_dn_ids)]
banc_dn_info["tag_side"] = banc_dn_info["tag"]
banc_dn_info

Unnamed: 0,id,created,superceded_id,valid,tag,pt_supervoxel_id,pt_root_id_v520,pt_position,pt_root_id_v514,pt_root_id,tag_side
0,1533,2023-11-26 13:56:35.021693+00:00,,t,neck connective (left),76282811803942976,720575941651815445,"[124081, 92500, 3018]",720575941651815445,720575941651815445,neck connective (left)
1,979,2023-10-16 00:07:36.462043+00:00,,t,neck connective (right),76071705504309150,720575941558189171,"[116974, 92500, 2774]",720575941558189171,720575941558189171,neck connective (right)
3,1514,2023-11-26 13:56:35.010613+00:00,,t,neck connective (left),76282811870475120,720575941591151500,"[123535, 92500, 3075]",720575941591151500,720575941591151500,neck connective (left)
6,938,2023-10-16 00:07:36.438476+00:00,,t,neck connective (right),76001336760188835,720575941518951091,"[116355, 92500, 2791]",720575941518951091,720575941518951091,neck connective (right)
8,763,2023-10-16 00:07:36.336310+00:00,,t,neck connective (right),76071705504208646,720575941598469276,"[117746, 92500, 2733]",720575941598469276,720575941598469276,neck connective (right)
...,...,...,...,...,...,...,...,...,...,...,...
3634,3569,2023-11-26 13:56:36.186713+00:00,,t,neck connective,76142074248486514,720575941560434035,"[119453, 92500, 2777]",720575941560434035,720575941560434035,neck connective
3635,84,2023-10-16 00:07:35.929910+00:00,,t,neck connective (right),75930968083167187,720575941441643867,"[114579, 92500, 3059]",720575941441643867,720575941441643867,neck connective (right)
3638,3489,2023-11-26 13:56:36.140920+00:00,,t,neck connective,76071705503729338,720575941560435059,"[118724, 92500, 2565]",720575941560435059,720575941560435059,neck connective
3640,3493,2023-11-26 13:56:36.143112+00:00,,t,neck connective,76142074180719370,720575941620160895,"[118814, 92500, 2546]",720575941432295383,720575941432295383,neck connective


In [28]:
banc_dn_annotations_sides = banc_dn_annotations.merge(
    banc_dn_info[["pt_root_id", "tag_side"]],
    on="pt_root_id",
)
banc_dn_annotations_sides

Unnamed: 0,id,created,superceded_id,valid,tag,tag2,user_id,pt_supervoxel_id,pt_root_id,pt_position,tag_side
0,338,2023-12-13 22:33:56.479369+00:00,,t,giant fiber,neuron identity,2660,76142074315554489,720575941540215501,"[120421, 92500, 3002]",neck connective (left)
1,337,2023-12-13 22:30:26.067496+00:00,,t,giant fiber,neuron identity,2660,76071705571426918,720575941459097503,"[117976, 92500, 3015]",neck connective (right)
2,74996,2024-11-24 22:49:57.484901+00:00,,t,DNxl021,neuron identity,355,76071705570901483,720575941588971076,"[117503, 92500, 2830]",neck connective (right)
3,74571,2024-11-24 22:49:57.223431+00:00,,t,DNxl106,neuron identity,355,76282811736337897,720575941507243589,"[124387, 92500, 2655]",neck connective (left)
4,74818,2024-11-24 22:49:57.375055+00:00,,t,DNxl113,neuron identity,355,76001336759769357,720575941519502040,"[114745, 92500, 2653]",neck connective (right)
...,...,...,...,...,...,...,...,...,...,...,...
2114,18966,2024-07-08 12:40:31.327311+00:00,,t,DNpe028,neuron identity,847,76071705504214539,720575941471139191,"[117916, 92500, 2736]",neck connective (right)
2115,129416,2025-02-21 23:43:58.529831+00:00,,t,DNxn187,neuron identity,355,76142074248164002,720575941459053471,"[120652, 92500, 2661]",neck connective (left)
2116,124221,2025-02-21 23:43:55.358110+00:00,,t,DNpe031,neuron identity,355,76142074248164002,720575941459053471,"[120652, 92500, 2661]",neck connective (left)
2117,18223,2024-07-01 18:36:49.088507+00:00,,t,DNpe053,neuron identity,847,76142074315169489,720575941508499234,"[119588, 92500, 2870]",neck connective


In [29]:
manc_to_fafb_map_banc_sides = collections.defaultdict(lambda: collections.defaultdict(list))
for manc_dn in dn_names_manc_unique:
    left = [dn for dn in banc_dn_annotations_sides[
        banc_dn_annotations_sides.pt_root_id.isin(
            banc_dn_annotations_sides[banc_dn_annotations_sides.tag == manc_dn].pt_root_id
        )
        & banc_dn_annotations_sides.tag_side.str.contains("left", regex=False)
    ].tag.unique() if dn != manc_dn]
    right = [dn for dn in banc_dn_annotations_sides[
        banc_dn_annotations_sides.pt_root_id.isin(
            banc_dn_annotations_sides[banc_dn_annotations_sides.tag == manc_dn].pt_root_id
        )
        & banc_dn_annotations_sides.tag_side.str.contains("right", regex=False)
    ].tag.unique() if dn != manc_dn]
    both = [
        dn
        for dn in banc_dn_annotations_sides[
            banc_dn_annotations_sides.pt_root_id.isin(
                banc_dn_annotations_sides[banc_dn_annotations_sides.tag == manc_dn].pt_root_id
            )
            & ~banc_dn_annotations_sides.tag_side.str.contains("(", regex=False)
        ].tag.unique()
        if dn != manc_dn
    ]

    if len(both) > 0 or len(left) > 0:
        manc_to_fafb_map_banc_sides[manc_dn]["LHS"] = list(set(left).union(both))
    if len(both) > 0 or len(right) > 0:
        manc_to_fafb_map_banc_sides[manc_dn]["RHS"] = list(set(right).union(both))
print(
    f"{sum([len(matches) > 0 for matches in manc_to_fafb_map_banc_sides.values()])}/{len(dn_names_manc_unique)} of the unique MANC names have FAFB matches"
)

219/490 of the unique MANC names have FAFB matches


### Combining

In [30]:
import copy

manc_to_fafb_map_combined_sides = copy.deepcopy(manc_to_fafb_map_stuerner_sides)
print(manc_to_fafb_map_combined_sides["DNfl007"])
for dn, matches in manc_to_fafb_map_banc_sides.items():
    for side, neurons in matches.items():
        manc_to_fafb_map_combined_sides[dn][side] += neurons
print(manc_to_fafb_map_combined_sides["DNfl007"])

for dn, matches in manc_to_fafb_map_combined_sides.items():
    for side, neurons in matches.items():
        manc_to_fafb_map_combined_sides[dn][side] = list(set(neurons))

# manc_to_fafb_map_combined_sides = {
#     manc_dn: {
#         "LHS": list(set(manc_to_fafb_map_stuerner_sides[manc_dn]["LHS"]
#         + manc_to_fafb_map_banc_sides[manc_dn]["LHS"])),
#         "RHS": list(set(manc_to_fafb_map_stuerner_sides[manc_dn]["RHS"]
#         + manc_to_fafb_map_banc_sides[manc_dn]["RHS"])),
#     }
#     for manc_dn in dn_names_manc_unique
# }

defaultdict(<class 'list'>, {'RHS': ['DNg61']})
defaultdict(<class 'list'>, {'RHS': ['DNg61']})


In [None]:
print(
    f"{sum([len(matches['LHS'])+len(matches['RHS']) > 0 for matches in manc_to_fafb_map_combined_sides.values()])}/{len(dn_names_manc_unique)} of the unique MANC names have FAFB matches"
)

442/490 of the unique MANC names have FAFB matches


In [32]:
rows = []
for manc_dn in dn_names_manc_unique:
    for side in ["LHS","RHS"]:
        for match in manc_to_fafb_map_stuerner_sides[manc_dn][side]:
            rows.append((manc_dn, side, match, "Stuerner et al."))
        for match in manc_to_fafb_map_banc_sides[manc_dn][side]:
            rows.append((manc_dn, side, match, "BANC v514"))

        # if (
        #     len(manc_to_fafb_map_stuerner_sides[manc_dn][side]) == 0
        #     and len(manc_to_fafb_map_banc_sides[manc_dn][side]) == 0
        # ):
        #     rows.append((manc_dn, side, None, None))

manc_to_fafb_df = pd.DataFrame(rows, columns=["MANC DN name", "side", "FAFB DN name", "source"])
manc_to_fafb_df.to_csv("manc_to_fafb_sides.csv", index=False)
manc_to_fafb_df


Unnamed: 0,MANC DN name,side,FAFB DN name,source
0,DNad001,LHS,oviDN,Stuerner et al.
1,DNad001,RHS,oviDN,Stuerner et al.
2,DNad002,LHS,DNg66,Stuerner et al.
3,DNad002,RHS,DNg66,Stuerner et al.
4,DNad003,LHS,DNp58,Stuerner et al.
...,...,...,...,...
975,DNxn185,RHS,DNge140,BANC v514
976,DNxn187,LHS,DNpe031,BANC v514
977,DNxn187,RHS,DNpe031,BANC v514
978,DNxn189,LHS,DNpe052,BANC v514
