In [49]:
import pandas as pd
import numpy as np
import glob
from sklearn.metrics.cluster import adjusted_rand_score, normalized_mutual_info_score, homogeneity_score, completeness_score
import seaborn as sns

In [51]:
def get_dataname(data_dir):
    dname = data_dir.split('/')[-1]
    if 'Maynard' in dname:
        return(dname[7:-4])
    if 'Dataosmfish' in dname:
        return ('osmfish')
    if 'BaristaSeq' in dname:
        return(dname[10:-12])
    if 'BZ' in dname:
        return(dname[7:-4])
    if '1k' in dname:
        return ('STARmap_20180505_BY3_1k')
    if 'global' in dname:
        return(dname[7:-4])
    if 'MERFISH' in dname:
        return (dname[7:-4])
    if 'MsBrainAgingSpatialDonor' in dname:
        return (dname[:-4])

In [4]:
test_dir = '/data/hoan/spatial_transcriptomics/GraphBGM/output/MsBrainAgingSpatialDonor_10Slice0.csv'
dname = test_dir.split('/')[-1]
print(dname, get_dataname(test_dir))

MsBrainAgingSpatialDonor_10Slice0.csv MsBrainAgingSpatialDonor_10Slice0


# Analysis of MAYNARD dataset

In [53]:
data_dir_full = []
for data in glob.glob("/Users/melancholy/Desktop/results section b and c/Maynard_csv/May*.csv", recursive=True):
    data_dir_full.append(data)

In [55]:
data_vec = []
method_vec = []
ARI_vec = []
NMI_vec = []
HOM_vec = []
COM_vec = []

for data_dir in data_dir_full:
    # print(data_dir)
    df = pd.read_csv(data_dir)
    
    # GraphBGMv3a2full
    data_vec.append(get_dataname(data_dir))
    method_vec.append('GraphBG')
    ARI_vec.append(adjusted_rand_score(df['GraphBGMv3a2tied'], df['ground_truth']))
    NMI_vec.append(normalized_mutual_info_score(df['GraphBGMv3a2tied'], df['ground_truth']))
    HOM_vec.append(homogeneity_score(df['GraphBGMv3a2tied'], df['ground_truth']))
    COM_vec.append(completeness_score(df['GraphBGMv3a2tied'], df['ground_truth']))
    
    # GraphST
    data_vec.append(get_dataname(data_dir))
    method_vec.append('GraphST')
    ARI_vec.append(adjusted_rand_score(df['GraphST'], df['ground_truth']))
    NMI_vec.append(normalized_mutual_info_score(df['GraphST'], df['ground_truth']))
    HOM_vec.append(homogeneity_score(df['GraphST'], df['ground_truth']))
    COM_vec.append(completeness_score(df['GraphST'], df['ground_truth']))
    
df_full = pd.DataFrame({'DataName': data_vec, 'Method': method_vec, 'ARI': ARI_vec, 'NMI': NMI_vec, 'HOM': HOM_vec, 'COM': COM_vec})

In [22]:
df_full.head(1)

Unnamed: 0,DataName,Method,ARI,NMI,HOM,COM
0,151669,GraphBG,0.678347,0.675974,0.740235,0.621979


In [24]:
df_full.groupby(['Method']).mean(numeric_only=True)

Unnamed: 0_level_0,ARI,NMI,HOM,COM
Method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
GraphBG,0.596318,0.691604,0.710501,0.675815
GraphST,0.550711,0.672585,0.656605,0.691589


In [26]:
df_full[df_full.Method=='GraphBG']

Unnamed: 0,DataName,Method,ARI,NMI,HOM,COM
0,151669,GraphBG,0.678347,0.675974,0.740235,0.621979
2,151508,GraphBG,0.498936,0.637682,0.637907,0.637457
4,151509,GraphBG,0.55796,0.691697,0.714134,0.670628
6,151674,GraphBG,0.643128,0.740043,0.756627,0.724169
8,151675,GraphBG,0.588264,0.664788,0.693036,0.638753
10,151676,GraphBG,0.522729,0.668763,0.7445,0.607013
12,151510,GraphBG,0.505033,0.650876,0.640913,0.661153
14,151672,GraphBG,0.660752,0.720752,0.721575,0.719932
16,151673,GraphBG,0.652503,0.740522,0.758026,0.723808
18,151507,GraphBG,0.511932,0.69097,0.713357,0.669946


In [38]:
df_origin = pd.read_csv('/Users/melancholy/Desktop/results section b and c/41592_2024_2215_MOESM6_ESM.csv')
df_bm = df_origin[['DataName', 'Method', 'NMI', 'HOM', 'COM']]
df_bm = df_bm.loc[
    df_bm.DataName.isin(np.unique(data_vec)) & 
    ~df_bm.Method.isin(["leiden", "SpaGCN_with", "DeepST", "SpaGCN_without", "SEDR", "conST_nopre", "BASS", "SCAN-IT", "stLearn", "CCST", "STAGATE", "DeepST"])
]

In [40]:
df_bm

Unnamed: 0,DataName,Method,NMI,HOM,COM
2035,151509,SpaceFlow,0.518174,0.655729,0.428323
2036,151509,SpaceFlow,0.549755,0.697503,0.453659
2037,151509,SpaceFlow,0.517360,0.641371,0.433535
2038,151509,SpaceFlow,0.495927,0.622217,0.412253
2039,151509,SpaceFlow,0.600304,0.720121,0.514671
...,...,...,...,...,...
2750,151673,BayesSpace,0.687777,0.701786,0.674317
2751,151673,BayesSpace,0.687777,0.701786,0.674317
2752,151673,BayesSpace,0.688109,0.702144,0.674625
2753,151673,BayesSpace,0.687777,0.701786,0.674317


In [42]:
df_partial = df_full[['DataName', 'Method', 'NMI', 'HOM', 'COM']]
df_concat = pd.concat([df_partial, df_bm])
# print("Remove BayesSpace")
# df_concat = df_concat[df_concat.Method != 'BayesSpace']
df_concat.to_csv("result_metrics_10xVisium.csv", index=False)

In [72]:
df_bm

Unnamed: 0,DataName,Method,NMI,HOM,COM
2035,151509,SpaceFlow,0.518174,0.655729,0.428323
2036,151509,SpaceFlow,0.549755,0.697503,0.453659
2037,151509,SpaceFlow,0.517360,0.641371,0.433535
2038,151509,SpaceFlow,0.495927,0.622217,0.412253
2039,151509,SpaceFlow,0.600304,0.720121,0.514671
...,...,...,...,...,...
2750,151673,BayesSpace,0.687777,0.701786,0.674317
2751,151673,BayesSpace,0.687777,0.701786,0.674317
2752,151673,BayesSpace,0.688109,0.702144,0.674625
2753,151673,BayesSpace,0.687777,0.701786,0.674317


In [16]:
print(np.unique(df_origin[df_origin.Method=='BayesSpace']['DataName']))

['151507' '151508' '151509' '151510' '151669' '151670' '151671' '151672'
 '151673' '151674' '151675' '151676']


In [17]:
print(np.unique(df_concat[df_concat.Method=='BASS']['DataName']))

['151507' '151508' '151509' '151510' '151669' '151670' '151671' '151672'
 '151673' '151674' '151675' '151676']
