In [1]:
import os
import numpy as np
import pandas as pd
from scipy.stats import gaussian_kde
import plotly.graph_objects as go


Select taxonomy level (supertype, subclass)

In [2]:
# tx_label = 'SEAAD_Subclass_name'
# tx_level = 'subclass'

tx_label = 'SEAAD_Supertype_name'
tx_level = 'ttype'

Load data

In [3]:
#load isodepths from Emily 
isodepth_file = r'\\allen\programs\celltypes\workgroups\mousecelltypes\SarahWB\datasets\human_exc\data\isodepth\20250624\20250624_isodepths.csv'
isodepth_df = pd.read_csv(isodepth_file)

#get human glut isodepths
glut_isodepth_df = isodepth_df[isodepth_df['SEAAD_Class_name'] == 'Neuronal: Glutamatergic'].copy()
glut_isodepth_df[tx_label] = pd.Categorical(glut_isodepth_df[tx_label], ordered=True) 
glut_isodepth_df = glut_isodepth_df.sort_values(tx_label)

print(len(isodepth_df))
print(len(glut_isodepth_df))


21646
8607


20 bin histogram ordering (Emily's method) 

In [4]:
#accumulate peaks 
peak_isodepth_dict = {}
for tx_type, group_df in glut_isodepth_df.groupby(tx_label):

    if len(group_df) < 1: #remove ME type
        continue

    n, bins = np.histogram(group_df['scaled_gaston_isodepth'], bins=20, density=False) 
    temp = dict(zip(n, bins))
    peak = temp[np.max(n)]

    peak_isodepth_dict[tx_type] = peak

peak_isodepth_df_histo = pd.DataFrame({tx_label: list(peak_isodepth_dict.keys()),
                                      'scaled_gaston_isodepth': list(peak_isodepth_dict.values())})

#sort by isodepth
peak_isodepth_df_histo = peak_isodepth_df_histo.sort_values('scaled_gaston_isodepth').reset_index(drop=True)
isodepth_order_histo = peak_isodepth_df_histo[tx_label].tolist()

isodepth_order_histo


['L2/3 IT_7',
 'L2/3 IT_1',
 'L2/3 IT_6',
 'L2/3 IT_12',
 'L2/3 IT_10',
 'L2/3 IT_8',
 'L4 IT_4',
 'L2/3 IT_5',
 'L2/3 IT_13',
 'L2/3 IT_3',
 'L2/3 IT_2',
 'L4 IT_3',
 'L4 IT_2',
 'L4 IT_1',
 'L6 IT Car3_1',
 'L5 IT_6',
 'L5 IT_2',
 'L5 ET_1',
 'L5 IT_5',
 'L5 ET_2',
 'L5/6 NP_3',
 'L5/6 NP_6',
 'L5/6 NP_4',
 'L5/6 NP_2',
 'L5/6 NP_1',
 'L5 IT_1',
 'L6b_6',
 'L6 IT Car3_2',
 'L5 IT_3',
 'L5 IT_7',
 'L6 CT_3',
 'L6 CT_4',
 'L6 CT_2',
 'L6 IT Car3_3',
 'L6 IT_2',
 'L6 IT_1',
 'L6b_3',
 'L6b_5',
 'L6b_4',
 'L6b_2',
 'L6 CT_1',
 'L6b_1']

Mean isodepth ordering 

In [5]:
peak_isodepth_df_mean = glut_isodepth_df.groupby(tx_label)['scaled_gaston_isodepth'].mean().dropna().reset_index().sort_values(by='scaled_gaston_isodepth', ascending=True)
isodepth_order_mean = peak_isodepth_df_mean[tx_label].tolist()

isodepth_order_mean

['L2/3 IT_6',
 'L2/3 IT_1',
 'L2/3 IT_7',
 'L2/3 IT_8',
 'L2/3 IT_12',
 'L2/3 IT_10',
 'L2/3 IT_13',
 'L2/3 IT_5',
 'L2/3 IT_3',
 'L4 IT_3',
 'L4 IT_4',
 'L4 IT_2',
 'L4 IT_1',
 'L2/3 IT_2',
 'L5 IT_6',
 'L5 IT_5',
 'L5 IT_2',
 'L5 ET_1',
 'L5/6 NP_2',
 'L6 IT Car3_1',
 'L5 IT_1',
 'L5/6 NP_6',
 'L5 IT_7',
 'L5/6 NP_1',
 'L5 IT_3',
 'L5 ET_2',
 'L5/6 NP_3',
 'L5/6 NP_4',
 'L6 IT Car3_3',
 'L6 IT Car3_2',
 'L6 IT_2',
 'L6 CT_3',
 'L6b_6',
 'L6 IT_1',
 'L6 CT_2',
 'L6b_3',
 'L6 CT_4',
 'L6b_5',
 'L6b_1',
 'L6b_4',
 'L6b_2',
 'L6 CT_1']

KDE peak density ordering 

In [6]:
#accumulate peaks 
peak_isodepth_kde_list= []
for tx_type, group_df in glut_isodepth_df.groupby(tx_label):

    values = group_df['scaled_gaston_isodepth'].dropna().values
    if len(values) > 1:  # KDE needs at least 2 data points
        kde = gaussian_kde(values)
        x_grid = np.linspace(values.min(), values.max(), 500)
        y_kde = kde(x_grid)
        peak_x = x_grid[np.argmax(y_kde)]
    else:
        continue
    peak_isodepth_kde_list.append({tx_label: tx_type, 'scaled_gaston_isodepth': peak_x})

peak_isodepth_df_kde = pd.DataFrame(peak_isodepth_kde_list).sort_values(by='scaled_gaston_isodepth').reset_index(drop=True)
isodepth_order_kde = peak_isodepth_df_kde[tx_label].tolist()

isodepth_order_kde

['L2/3 IT_6',
 'L2/3 IT_1',
 'L2/3 IT_7',
 'L2/3 IT_8',
 'L2/3 IT_12',
 'L2/3 IT_10',
 'L2/3 IT_5',
 'L2/3 IT_13',
 'L2/3 IT_3',
 'L4 IT_3',
 'L4 IT_4',
 'L4 IT_2',
 'L4 IT_1',
 'L5 IT_2',
 'L5 IT_6',
 'L5 IT_5',
 'L5 ET_1',
 'L5 IT_1',
 'L5 ET_2',
 'L5/6 NP_6',
 'L5/6 NP_2',
 'L5/6 NP_1',
 'L5/6 NP_3',
 'L5/6 NP_4',
 'L6 IT Car3_1',
 'L5 IT_3',
 'L5 IT_7',
 'L6 IT Car3_2',
 'L6 CT_3',
 'L6 IT Car3_3',
 'L6b_6',
 'L6 CT_2',
 'L2/3 IT_2',
 'L6 IT_2',
 'L6b_3',
 'L6 IT_1',
 'L6b_5',
 'L6b_1',
 'L6b_4',
 'L6 CT_4',
 'L6b_2',
 'L6 CT_1']

In [7]:
print(len(isodepth_order_histo))
print(len(isodepth_order_mean))
print(len(isodepth_order_kde))


42
42
42


Save

In [8]:
peak_isodepth_df_histo.to_csv(r'\\allen\programs\celltypes\workgroups\mousecelltypes\SarahWB\datasets\human_exc\data\isodepth\20250624\isodepth_{}_ordering_histo.csv'.format(tx_level), index=False)
peak_isodepth_df_mean.to_csv(r'\\allen\programs\celltypes\workgroups\mousecelltypes\SarahWB\datasets\human_exc\data\isodepth\20250624\isodepth_{}_ordering_mean.csv'.format(tx_level), index=False)
peak_isodepth_df_kde.to_csv(r'\\allen\programs\celltypes\workgroups\mousecelltypes\SarahWB\datasets\human_exc\data\isodepth\20250624\isodepth_{}_ordering_kde.csv'.format(tx_level), index=False)


Save txts with mean order

In [9]:
if tx_level == 'ttype': 
    peak_isodepth_df_mean[tx_label].to_csv(r'\\allen\programs\celltypes\workgroups\mousecelltypes\SarahWB\datasets\human_exc\data\formatting_dicts\{}_order_raw.txt'.format(tx_level), index=False, header=False)
else: 
    peak_isodepth_df_mean[tx_label].to_csv(r'\\allen\programs\celltypes\workgroups\mousecelltypes\SarahWB\datasets\human_exc\data\formatting_dicts\{}_order.txt'.format(tx_level), index=False, header=False)


Make the subclass then supertype order

In [10]:
with open(r'\\allen\programs\celltypes\workgroups\mousecelltypes\SarahWB\datasets\human_exc\data\formatting_dicts\subclass_order.txt', 'r') as f:
    subclass_order = f.read().splitlines()

with open(r'\\allen\programs\celltypes\workgroups\mousecelltypes\SarahWB\datasets\human_exc\data\formatting_dicts\ttype_order_raw.txt', 'r') as f:
    ttype_order = f.read().splitlines()



In [11]:
tx_df = isodepth_df[isodepth_df['SEAAD_Class_name'] == 'Neuronal: Glutamatergic'][['SEAAD_Subclass_name', 'SEAAD_Supertype_name']].drop_duplicates(subset='SEAAD_Supertype_name')
tx_df

Unnamed: 0,SEAAD_Subclass_name,SEAAD_Supertype_name
18,L2/3 IT,L2/3 IT_6
19,L6 CT,L6 CT_2
21,L6 IT Car3,L6 IT Car3_1
54,L2/3 IT,L2/3 IT_8
76,L2/3 IT,L2/3 IT_5
95,L2/3 IT,L2/3 IT_12
120,L2/3 IT,L2/3 IT_1
219,L2/3 IT,L2/3 IT_13
238,L6 IT Car3,L6 IT Car3_2
290,L2/3 IT,L2/3 IT_7


In [12]:
tx_df['SEAAD_Subclass_name'] = pd.Categorical(tx_df['SEAAD_Subclass_name'], categories=subclass_order, ordered=True)
tx_df['SEAAD_Supertype_name'] = pd.Categorical(tx_df['SEAAD_Supertype_name'], categories=ttype_order, ordered=True)

# Sort by the two columns
tx_df_sorted = tx_df.sort_values(by=['SEAAD_Subclass_name', 'SEAAD_Supertype_name'])
tx_df_sorted


Unnamed: 0,SEAAD_Subclass_name,SEAAD_Supertype_name
18,L2/3 IT,L2/3 IT_6
120,L2/3 IT,L2/3 IT_1
290,L2/3 IT,L2/3 IT_7
54,L2/3 IT,L2/3 IT_8
95,L2/3 IT,L2/3 IT_12
357,L2/3 IT,L2/3 IT_10
219,L2/3 IT,L2/3 IT_13
76,L2/3 IT,L2/3 IT_5
950,L2/3 IT,L2/3 IT_3
11549,L2/3 IT,L2/3 IT_2


In [13]:
tx_df_sorted['SEAAD_Supertype_name'].to_csv(r'\\allen\programs\celltypes\workgroups\mousecelltypes\SarahWB\datasets\human_exc\data\formatting_dicts\ttype_order.txt'.format(tx_level), index=False, header=False)
