In [None]:
import pickle
import seaborn as sns
import imageio as io
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import tifffile
from tqdm.notebook import tqdm
import pathlib
import json
import glob
import PIL
import scanpy as sc

In [None]:
adata_del2=sc.read(r'data\240719AnalysisDAM_TERM\adata_del2.h5ad')

In [None]:
cell_dict2={
'Glutamatergic':['66','11','30','53','17','27','15','34','31','54','13','28','10','50','12','44','80','45','29','79','68','70'],
'OPC': ['40','22'],
'Oligo': ['76','58','2','60','6','4','5','36','35','47','49','39','74','69'],
'Macro': ['72'],
'VLMC':['43','32','61'],
'Endo': ['9','0','19','59','18','38','77','64'],
'Astro': ['7','26','55','48','16','8','24','20','56'],
'Microglia':['67','3','42','83','75'],
'SMC_Peri':['51','84','41','82','78'],
'GABAergic':['52','65','63','25','1','71','37','85','57','33','21','45','23','46','62','73','14','81']
}

In [None]:
cell2023_markers=['Slc17a7','Gad2','Cux2','Rspo1','Scube1','Fezf2','Ndst4','Nxph4','Hs3st4','Tshz2','Chat','Ptpru','Sst','Pvalb','Syt6','Cpne7','Lamp5','Lhx6','Vip','Adarb2','Calb2','Otof','Drd1','Adora2a','Pdgfra','Olig1','Rorb','Aqp4','Foxj1','Cspg4','Vtn','Cldn5','F13a1','Cd3e','Ctss']
cell2023_inlib=list(np.intersect1d(cell2023_markers,adata_del2.var.index.tolist()))
print(len(cell2023_markers),len(cell2023_inlib),cell2023_inlib)

In [None]:
cell2023_inlib_ordered = [gene for gene in cell2023_markers if gene in cell2023_inlib]

print(f"Total genes in cell2023_markers: {len(cell2023_markers)}")
print(f"Genes found in the library: {len(cell2023_inlib_ordered)}")
print("Genes in order:", cell2023_inlib_ordered)

In [None]:
sc.pl.dotplot(adata_del2, cell2023_inlib_ordered, groupby='class',dendrogram=True)

In [None]:
sc.set_figure_params(figsize=(40,8))
sc.pl.heatmap(
    adata_del2, 
    cell2023_inlib_ordered, 
    groupby='ddnres5', 
    swap_axes=True,  # Swap x and y axes
    dendrogram=True, 
    show=False
)


In [None]:
ExN=adata_del2[adata_del2.obs['class']=='Glutamatergic']
ExN

In [None]:
#sc.tl.dendrogram(ExN,  groupby='ddnres5')
sc.pl.dotplot(ExN, cell2023_inlib_ordered, groupby='ddnres5',dendrogram=True)

# Manually label

In [None]:
#unsure:'55_cd3e,cd3g''
cell_dict2={
'ExN':['66','11','30','53','17','27','15','34','31','54','13','28','10','50','12','44','80','45','29','79','68','70'],
'OPC': ['40','22'],
'Oligo': ['76','58','2','60','6','4','5','36','35','47','49','39','74','69'],
'Macro': ['72'],
'VLMC':['43','32','61'],
'Endo': ['9','0','19','59','18','38','77','64'],
'Astro': ['7','26','55','48','16','8','24','20','56'],
'Microglia':['67','3','42','83','75'],
'SMC_Peri':['51','84','41','82','78'],
'InN':['52','65','63','25','1','71','37','85','57','33','21','45','23','46','62','73','14','81']
}

adata_del2.obs['class'] = np.nan

for i in cell_dict2.keys():
    ind = pd.Series(adata_del2.obs['ddnres5']).isin(cell_dict2[i])
    adata_del2.obs.loc[ind,'class'] = i
adata_del2.obs['class'].fillna('Notclustered', inplace=True)
adata_del2.obs['class']=adata_del2.obs['class'].astype('category')

In [None]:

custom_colors = {
    'OPC': '#f58231',  # Orange
    'Oligo': '#3cb44b',  # Green
    'Macro': '#ff1493',  # Yellow#ffe119
    'Astro': '#4363d8',  # Blue
    'Microglia': '#ffe119',  # Pink#ff1493
    'ExN': '#e6194B',  # Bright Red
    'InN': '#bfef45',  # Lime
    'VLMC': '#42d4f4',  # Sky Blue
    'Endo': '#fabed4',  # #ffd8b1 – Peach,#fabed4 – Light Pink
    'SMC_Peri': '#3cb44b'  # Olive
}

unique_classes = adata_del2.obs['class'].cat.categories
adata_del2.uns['class_colors'] = [custom_colors.get(cls, '#000000') for cls in unique_classes]  # Default black for missing classes

sc.set_figure_params(figsize=(5, 5))
sc.pl.umap(
    adata_del2, 
    color='class',
    add_outline=True,
    legend_loc='on data',  
    legend_fontsize=10,
    legend_fontoutline=2,
    frameon=False,
    size=1,
    show=False,
    use_raw=False
)

output_folder=r'data\240719AnalysisDAM_TERM\fig_0904'
output_path = f'{output_folder}/global_umap_class_legendondata.tif'
plt.savefig(output_path, bbox_inches='tight', dpi=300)

# Optionally show the plot
plt.show()


In [None]:
adata_app_wh=adata_whole[adata_whole.obs['batch']=='APP_1']
adata_te4_wh=adata_whole[adata_whole.obs['batch']=='TE4_1']
adata_wt_wh=adata_whole[adata_whole.obs['batch']=='WT_1']

In [None]:
fig=plt.figure(figsize=(12, 8), facecolor="white")
#bad 15
#cortical layers[8,10,12,14] 1,3,6,0,9
fig = plot_cluster_scdata(adata_del2,cmap,clusters=[5,10,11,12,13,15,17,27,28,30,31,32,34,37,43,53,54,61,78],transpose=1,flipx=1,flipy=-1,tag='ddnres5', key = 'X_multi_spatial')

In [None]:
# Use celltypist

In [None]:
import celltypist
from celltypist import models
models.download_models(force_update = True)

In [None]:
models.models_path

In [None]:
models.models_description()

In [None]:
model = models.Model.load(model = 'Mouse_Whole_Brain.pkl')

In [None]:
model.cell_types

In [None]:
predictions = celltypist.annotate(adata_del2, model = 'Mouse_Whole_Brain.pkl', majority_voting = True)
predictions.predicted_labels

In [None]:
adata_del2 = predictions.to_adata()

In [None]:
sc.set_figure_params(figsize=(15,15))
sc.pl.umap(adata_del2, color = ['class', 'predicted_label_2', 'majority_voting'], legend_loc = 'on data')

In [None]:
obs_df = adata_del2.obs[['class', 'predicted_label_2', 'majority_voting_wb','over_clustering']]

In [None]:
adata_del2.obs['majority_voting_wb']=adata_del2.obs.pop('majority_voting')

In [None]:
# Yao2023 mapping

In [None]:
supertype=pd.read_excel('Yao2023_41586_2023_6812_MOESM8_ESM.xlsx')

In [None]:
supertype

In [None]:
supertype.set_index('subclass_id_label', inplace=True)
relevant_columns = ['class_id', 'class_label', 'class_id_label', 'neighborhood', 'anatomical_annotation']
supertype_relevant = supertype[relevant_columns]

In [None]:
duplicates = supertype_relevant.index.duplicated(keep=False)

if duplicates.any():
    print("Duplicated subclass_id_labels found:")
    print(supertype_relevant[duplicates])
else:
    print("No duplicates found.")

In [None]:
supertype_relevant_unique = supertype_relevant[~supertype_relevant.index.duplicated(keep='first')]
supertype_relevant_unique

In [None]:
adata=adata_del2.copy()

In [None]:
mapping_class_id = supertype_relevant_unique['class_id'].to_dict()
mapping_class_label = supertype_relevant_unique['class_label'].to_dict()
mapping_class_id_label = supertype_relevant_unique['class_id_label'].to_dict()
mapping_neighborhood = supertype_relevant_unique['neighborhood'].to_dict()
mapping_anatomical_annotation = supertype_relevant_unique['anatomical_annotation'].to_dict()

adata.obs['class_id'] = adata.obs['majority_voting_wb'].map(mapping_class_id)
adata.obs['class_label'] = adata.obs['majority_voting_wb'].map(mapping_class_label)
adata.obs['class_id_label'] = adata.obs['majority_voting_wb'].map(mapping_class_id_label)
adata.obs['neighborhood'] = adata.obs['majority_voting_wb'].map(mapping_neighborhood)
adata.obs['anatomical_annotation'] = adata.obs['majority_voting_wb'].map(mapping_anatomical_annotation)


In [None]:
print(adata.obs[['majority_voting_wb', 'class_id', 'class_label', 'class_id_label', 'neighborhood', 'anatomical_annotation']].head())


In [None]:
supertype_relevant_unique.loc['221 LDT-PCG Vsx2 Lhx4 Glut']

In [None]:
adata.obs['class_id'] = adata.obs['class_id'].astype('category')

In [None]:
sc.set_figure_params(figsize=(6,6))
sc.pl.umap(adata, color = ['class', 'class_id', 'majority_voting_wb','class_label','anatomical_annotation','ddnres5'], legend_loc = 'on data')

In [None]:
ct_table = pd.crosstab(adata.obs['ddnres5'], adata.obs['class_label'])
ct_table_normalized = ct_table.div(ct_table.sum(axis=1), axis=0)
plt.figure(figsize=(16, 26))  # Adjust the figure size as needed
sns.heatmap(ct_table_normalized, annot=True, cmap="coolwarm", fmt=".2f", linewidths=.5)
plt.title('Heatmap of Cell Types per Leiden Cluster')
plt.xlabel('Leiden Clusters')
plt.ylabel('Predicted Cell Types')
#ct_table_normalized.to_csv('f13_crosstab_ABAZeng23_class_label_ddnres5_norm.csv', index=True)
#ct_table.to_csv('f13_crosstab_ABAZeng23_class_label_ddnres5.csv', index=True)
plt.show()

In [None]:
ct_table = pd.crosstab(adata.obs['ddnres5'], adata.obs['majority_voting_wb'])
ct_table_normalized = ct_table.div(ct_table.sum(axis=1), axis=0)
plt.figure(figsize=(26, 8))  # Adjust the figure size as needed
sns.heatmap(ct_table, annot=True, cmap="coolwarm", fmt="g", linewidths=.5)
plt.title('Heatmap of Cell Types per Leiden Cluster')
plt.xlabel('Leiden Clusters')
plt.ylabel('Predicted Cell Types')
#ct_table_normalized.to_csv('f13_crosstab_ABAZeng23_ddnres5_norm.csv', index=True)
#ct_table.to_csv('f13_crosstab_ABAZeng23_ddnres5.csv', index=True)
plt.show()

In [None]:
data = []

for cell_type in ct_table.index:
    counts = ct_table.loc[cell_type]
    non_zero_counts = counts[counts > 0]
    top_clusters = non_zero_counts.nlargest(3).index.tolist()
    data.append({'ddnres5': cell_type, 'Top_celltype_voting': top_clusters})

df_top_clusters = pd.DataFrame(data)
df_top_clusters
df_top_clusters.to_csv('f13_crosstab_ABAZeng23_ddnres5_class_lable_top3celltype.csv', index=True)

In [None]:
df_top_clusters

In [None]:
from scipy.cluster.hierarchy import linkage, leaves_list

row_linkage = linkage(ct_table, method='single')
col_linkage = linkage(ct_table.T, method='single')

ordered_rows = leaves_list(row_linkage)
ordered_cols = leaves_list(col_linkage)

ct_table_reordered = ct_table.iloc[ordered_rows, ordered_cols]

ct_table_normalized_reordered = ct_table_reordered.div(ct_table_reordered.sum(axis=1), axis=0)

plt.figure(figsize=(20, 20)) 
sns.heatmap(ct_table_normalized_reordered, annot=False, cmap="coolwarm", fmt="g", linewidths=.5)

plt.title('Heatmap of Cell Types per Leiden Cluster (Reordered)')
plt.xlabel('Leiden Clusters')
plt.ylabel('Predicted Cell Types')

plt.show()


In [None]:
ct_table = pd.crosstab(adata.obs['class_label'], adata.obs['majority_voting_wb'])
ct_table_normalized = ct_table.div(ct_table.sum(axis=1), axis=0)
plt.figure(figsize=(26, 8))  # Adjust the figure size as needed
sns.heatmap(ct_table, annot=True, cmap="coolwarm", fmt="g", linewidths=.5)
plt.title('Heatmap of Cell Types per Leiden Cluster')
plt.xlabel('Leiden Clusters')
plt.ylabel('Predicted Cell Types')
#ct_table_normalized.to_csv('f2_crosstab_cell_types_res3.csv', index=True)
#ct_table.to_csv('f13_crosstab_ddnres5_majority_voting_wb_cts.csv', index=True)
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import linkage, leaves_list
from scipy.spatial.distance import pdist, squareform

row_distances = pdist(ct_table, metric='euclidean')
col_distances = pdist(ct_table.T, metric='euclidean')

row_linkage = linkage(row_distances, method='average')
col_linkage = linkage(col_distances, method='average')

ordered_rows = leaves_list(row_linkage)
ordered_cols = leaves_list(col_linkage)

ct_table_reordered = ct_table.iloc[ordered_rows, ordered_cols]
ct_table_normalized_reordered = ct_table_reordered.div(ct_table_reordered.sum(axis=1), axis=0)

plt.figure(figsize=(20, 32)) 
sns.heatmap(ct_table_normalized_reordered, annot=False, cmap="coolwarm", fmt="g",linewidths=1,linecolor='black')

plt.title('Heatmap of Cell Types per Leiden Cluster (Reordered by Association)')
plt.xlabel('Leiden Clusters')
plt.ylabel('Predicted Cell Types')

plt.show()


In [None]:
ct_table = pd.crosstab(adata.obs['ddnres5'], adata.obs['class_name'])
ct_table_normalized = ct_table.div(ct_table.sum(axis=1), axis=0)

data = []
for cell_type in ct_table.index:
    counts = ct_table.loc[cell_type]
    non_zero_counts = counts[counts > 0]
    top_clusters = non_zero_counts.nlargest(10).index.tolist()
    data.append({'CellType': cell_type, 'TopClusters': ', '.join(map(str, top_clusters))})
df_top_clusters = pd.DataFrame(data)


In [None]:
ct_table.loc[cell_type]

In [None]:
df_top_cell_types

In [None]:
df = adata.obs[['ddnres5', 'subclass_name']]

cluster_celltypes = df.groupby('ddnres5')['class_name'].apply(lambda x: x.unique().tolist())
cluster_celltypes_df = cluster_celltypes.reset_index()
cluster_celltypes_df.to_csv('f13_ddnres5_class_name.csv', index=True)
print(cluster_celltypes_df)

In [None]:
adata_inh=adata[adata.obs['class']=='GABAergic']

In [None]:
np.unique(adata_inh.obs['ddnres5'].tolist())

In [None]:
adata.obs['class'].unique()

In [None]:
fig=plt.figure(figsize=(12, 8), facecolor="white")

fig = plot_cluster_scdata(adata_del2,cmap,clusters=[41],transpose=1,flipx=1,flipy=-1,tag='ddnres5', key = 'X_multi_spatial')

In [None]:
df_scdata=pd.read_csv('f9_adata_del2_ddnres5_rgg.csv')


In [None]:
df_scdata['41_n'].head(20)

In [None]:
import matplotlib.cm as cm
def plot_gene_scdata(scdata2, gene='SOX9', nmax=20, sz_min=5, sz_max=30, transpose=1, flipx=1, flipy=1, tag='X_spatial', recompute_raw=False, scale_bar_length=50):
    Xcells = np.array(scdata2.obsm[tag])[:, ::transpose] * [flipx, flipy]
    gene_idx = list(scdata2.var.index).index(gene)
    
    if 'X_raw' not in scdata2.obsm or recompute_raw:
        Xnorm = (np.exp(scdata2.X) - 1)
        ncts = np.sum(Xnorm, axis=1)[:, np.newaxis]  # Changed from axis=1[0] to correct sum over axis
        scdata2.obsm['X_raw'] = np.round(Xnorm / ncts * np.array(scdata2.obs['total_counts'])[:, np.newaxis])

    cts = np.array(scdata2.obsm['X_raw'][:, gene_idx]).copy()
    cts[np.isnan(cts)] = 0
    ncts = np.clip(cts / nmax, 0, 1)
    size = sz_min + ncts * (sz_max - sz_min)
    cols = cm.coolwarm(ncts)
    good_cells = np.argsort(cts)
    XC = -Xcells[good_cells, ::-1]
    fig, ax = plt.subplots(facecolor='white')
    plt.title(gene + ' - N max ' + str(nmax))
    fig.set_facecolor('white')
    scatter = ax.scatter(XC[:, 0], XC[:, 1], c=cols[good_cells], s=size[good_cells])

    plt.grid(False)
    plt.axis('equal')
    plt.xticks([])
    plt.yticks([])
    
    return fig

output_folder = r'data\240719AnalysisDAM_TERM\fig_0904'
# fig = plt.figure(figsize=(6, 4), facecolor="white")

In [None]:
plt.style.use('default')
fig = plt.figure(figsize=(24, 20), facecolor="white")
fig = plot_gene_scdata(adata, gene='Hs3st4', nmax=10, sz_min=5, sz_max=3, transpose=-1, flipx=-1, flipy=1, tag='X_multi_spatial', recompute_raw=True)
plt.show()

In [None]:
# Allen Brain Map_my_cell

In [None]:
mapmycell=pd.read_csv(r'data\240719AnalysisDAM_TERM\adata_mock_10xWholeMouseBrain.csv')

In [None]:
mapmycell

In [None]:
mapmycell.set_index('cell_id', inplace=True)
mapmycell

In [None]:
mapmycell.drop(columns=['class_label','subclass_label','supertype_label'], inplace=True)

In [None]:
plt.hist(mapmycell['class_correlation_coefficient'])

In [None]:
plt.hist(mapmycell['subclass_correlation_coefficient'])

In [None]:
adata_co=adata.copy()

In [None]:

if (mapmycell.index == adata_co.obs.index).all():
    print("Indices match. Proceeding with assignment.")
else:
    print("Warning: Indices do not match. Please check the indices before proceeding.")


In [None]:
# Assign mapmycell to adata_co.obs
# Merge mapmycell into adata_co.obs
adata_co.obs = adata_co.obs.join(mapmycell, how='left')



In [None]:
adata_co.obs

In [None]:
adata_co.obs['class_name'] = adata_co.obs['class_name'].astype('category')


In [None]:
class_means = adata.to_df().groupby(adata.obs['class1']).mean()
class_name_means = adata.to_df().groupby(adata.obs['class_name']).mean()
combined_means = pd.concat([class_means, class_name_means], keys=['class1', 'class_name'])
correlation_matrix = combined_means.T.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, cmap='coolwarm', annot=False)
plt.title("Pairwise Pearson Correlation Between Clusters")
plt.show()

In [None]:
class_means = adata_co.to_df().groupby(adata_co.obs['class_name']).mean()
class_name_means = adata_co.to_df().groupby(adata_co.obs['ddnres5']).mean()

common_genes = class_means.columns.intersection(class_name_means.columns)
class_means = class_means[common_genes]
class_name_means = class_name_means[common_genes]

correlation_matrix = pd.DataFrame(index=class_name_means.index, columns=class_means.index)

for class_name_cluster in class_name_means.index:
    for class_cluster in class_means.index:
        profile_class_name = class_name_means.loc[class_name_cluster]
        profile_class = class_means.loc[class_cluster]

        correlation = profile_class_name.corr(profile_class)
        correlation_matrix.loc[class_name_cluster, class_cluster] = correlation

correlation_matrix = correlation_matrix.astype(float).fillna(0)

plt.figure(figsize=(20, 40))
sns.heatmap(correlation_matrix, cmap='coolwarm', annot=True, fmt=".2f")
plt.xlabel('class')
plt.ylabel('class_name')
plt.title("Pairwise Pearson Correlation Between 'class' and 'class_name' Clusters")
plt.show()


In [None]:
correlation_matrix

In [None]:
sc.set_figure_params(figsize=(9,9))
sc.pl.umap(adata_co, color = ['class', 'class_name', 'ddnres5'], legend_loc = 'on data')

In [None]:
fig=plt.figure(figsize=(12, 8), facecolor="white")
#bad 15
#CORTEX:11,13,15,17,30,
#CA3:10, CA1:34,DG: 12
#['10', '11', '12', '13', '15', '17', '27', '28', '29', '30', '31','34', '44', '50', '53', '54', '66', '68', '70', '79', '80']

fig = plot_cluster_scdata(adata_del2,cmap,clusters=[25],transpose=1,flipx=1,flipy=-1,tag='ddnres5', key = 'X_multi_spatial')

In [None]:
adata=adata_co.copy()

In [None]:
adata_in=adata[adata.obs['class']=='GABAergic']

In [None]:
sc.pl.umap(adata_in, color = ['class_name'])

In [None]:
sc.pl.umap(adata_in, color = [ 'ddnres5','Lamp5','Sncg'], legend_loc='on data')

In [None]:
ddnres5_means = adata.to_df().groupby(adata.obs['ddnres5']).mean()

subclass_name_means = adata.to_df().groupby(adata.obs['subclass_name']).mean()
common_genes = ddnres5_means.columns.intersection(subclass_name_means.columns)

ddnres5_means = ddnres5_means[common_genes]
subclass_name_means = subclass_name_means[common_genes]
correlation_matrix = pd.DataFrame(index=subclass_name_means.index, columns=ddnres5_means.index)

for subclass_cluster in subclass_name_means.index:
    for ddnres_cluster in ddnres5_means.index:
        profile_subclass = subclass_name_means.loc[subclass_cluster]
        profile_ddnres = ddnres5_means.loc[ddnres_cluster]
        correlation = profile_subclass.corr(profile_ddnres)
        correlation_matrix.loc[subclass_cluster, ddnres_cluster] = correlation

correlation_matrix = correlation_matrix.astype(float)

results = []

for ddnres_cluster in correlation_matrix.columns:
    correlations = correlation_matrix[ddnres_cluster]
    sorted_correlations = correlations.sort_values(ascending=False)
    top3 = sorted_correlations.head(3)
    for subclass_cluster, corr_value in top3.items():
        results.append({
            'ddnres5_cluster': ddnres_cluster,
            'subclass_name_cluster': subclass_cluster,
            'correlation': corr_value
        })

top_correlations_df = pd.DataFrame(results)

In [None]:
top_correlations_df

In [None]:
import pandas as pd

ddnres5_means = adata.to_df().groupby(adata.obs['ddnres5']).mean()
subclass_name_means = adata.to_df().groupby(adata.obs['subclass_name']).mean()

common_genes = ddnres5_means.columns.intersection(subclass_name_means.columns)

ddnres5_means = ddnres5_means[common_genes]
subclass_name_means = subclass_name_means[common_genes]

correlation_matrix = pd.DataFrame(index=subclass_name_means.index, columns=ddnres5_means.index)

for subclass_cluster in subclass_name_means.index:
    for ddnres_cluster in ddnres5_means.index:
        profile_subclass = subclass_name_means.loc[subclass_cluster]
        profile_ddnres = ddnres5_means.loc[ddnres_cluster]
        correlation = profile_subclass.corr(profile_ddnres)
        correlation_matrix.loc[subclass_cluster, ddnres_cluster] = correlation

correlation_matrix = correlation_matrix.astype(float)

reformatted_results = []
for ddnres_cluster in correlation_matrix.columns:
    correlations = correlation_matrix[ddnres_cluster]
    sorted_correlations = correlations.sort_values(ascending=False)
    top3 = sorted_correlations.head(3)

    rank_1 = top3.index[0] if len(top3) > 0 else None
    corr_1 = top3.iloc[0] if len(top3) > 0 else None
    rank_2 = top3.index[1] if len(top3) > 1 else None
    corr_2 = top3.iloc[1] if len(top3) > 1 else None
    rank_3 = top3.index[2] if len(top3) > 2 else None
    corr_3 = top3.iloc[2] if len(top3) > 2 else None
    
    reformatted_results.append({
        'ddnres5_cluster': ddnres_cluster,
        'subclass_name_rank_1': rank_1,
        'correlation_rank_1': corr_1,
        'subclass_name_rank_2': rank_2,
        'correlation_rank_2': corr_2,
        'subclass_name_rank_3': rank_3,
        'correlation_rank_3': corr_3
    })

reformatted_df = pd.DataFrame(reformatted_results)
reformatted_df.to_csv('f13_ddnres5_subclass_name.csv', index=True)
print(reformatted_df)


In [None]:
names=pd.read_excel('f13_names_class1_class_name1_subclass_name1.xlsx')
names.set_index('ddnres5', inplace=True)
adata.obs['ddnres5'] = adata.obs['ddnres5'].astype(int)
names.index=names.index.astype(int)
adata.obs['class1'] = adata.obs['ddnres5'].map(names['class1'])
adata.obs['class_name1'] = adata.obs['ddnres5'].map(names['Class_name1'])
adata.obs['subclass_name1'] = adata.obs['ddnres5'].map(names['subclass_name1'])
adata.obs

In [None]:
print(adata.obs[['ddnres5', 'class1','class_name1','subclass_name1']].head(20))

In [None]:
sc.pl.umap(adata, color=['class','class1','class_name1','subclass_name1'])

In [None]:
adata.uns['log1p']["base"] = None
sc.tl.rank_genes_groups(adata, 'class1', method='t-test')
result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
df_scdata_class1 = pd.DataFrame({group + '_' + key[:1]: result[key][group] for group in groups for key in ['names','logfoldchanges','pvals','pvals_adj']})
#df_scdata.to_csv('f9_adata_del2_ddnres5_rgg.csv')
sc.pl.rank_genes_groups_dotplot(adata, n_genes=4)

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=5)

In [None]:
sc.tl.dendrogram(adata, 'subclass_name1')
adata.uns['log1p']["base"] = None
sc.tl.rank_genes_groups(adata, 'subclass_name1', method='t-test')
result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
df_scdata_subclass_name1 = pd.DataFrame({group + '_' + key[:1]: result[key][group] for group in groups for key in ['names','logfoldchanges','pvals','pvals_adj']})
#df_scdata.to_csv('f9_adata_del2_ddnres5_rgg.csv')
#sc.pl.rank_genes_groups_dotplot(adata, n_genes=4)

In [None]:
import scanpy as sc
from collections import OrderedDict

sc.pp.scale(adata, zero_center=True, max_value=None)
adata.layers['scaled'] = adata.X.copy()

genes_to_exclude = {
    'App', 'Syp', 'Zfp36l2', 'Psap', 'Prkar1b', 'Gaa', 'Nefl', 'Cd47', 'Ctsb',
    'Vip', 'Dvl1', 'Gfap', 'Sqstm1', 'Cd3e', 'Cd8a', 'Clu', 'Hp', 'Pten', 'Cxcl1', 'Clta',
    'Gas7','Grin2b','Ppp3cb','Tubb3','Ppfia2','Lyst'
}

key = 'rank_genes_groups'
n_genes = 4
groups = adata.uns[key]['names'].dtype.names

genes_list = []
for group in groups:
    top_genes = adata.uns[key]['names'][group][:n_genes]
    genes_list.extend(top_genes)

unique_genes_ordered = list(OrderedDict.fromkeys(genes_list))

adjusted_genes = [gene for gene in unique_genes_ordered if gene not in genes_to_exclude]

sc.pl.rank_genes_groups_dotplot(
    adata,
    var_names=adjusted_genes,
    groupby='subclass_name1',  
    layer='scaled',
    vmax=1, 
    vmin=-1,  
    cmap='coolwarm',
    show=True
)



In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

class_means = adata.to_df().groupby(adata.obs['subclass_name']).mean()
class_name1_means = adata.to_df().groupby(adata.obs['subclass_name1']).mean()

common_genes = class_means.columns.intersection(class_name1_means.columns)
class_means = class_means[common_genes]
class_name1_means = class_name1_means[common_genes]

correlation_matrix = pd.DataFrame(index=class_name1_means.index, columns=class_means.index)

for subclass_name1_cluster in class_name1_means.index:
    for subclass_name_cluster in class_means.index:
        profile_subclass_name1 = class_name1_means.loc[subclass_name1_cluster]
        profile_subclass_name = class_means.loc[subclass_name_cluster]
        correlation = profile_subclass_name1.corr(profile_subclass_name)
        correlation_matrix.loc[subclass_name1_cluster, subclass_name_cluster] = correlation

correlation_matrix = correlation_matrix.astype(float)

top3_subclass_clusters = {}
unique_subclass_clusters = set()

for subclass_name1_cluster in correlation_matrix.index:
    correlations = correlation_matrix.loc[subclass_name1_cluster]
    sorted_correlations = correlations.sort_values(ascending=False)
    top3 = sorted_correlations.head(3)
    top3_clusters = top3.index.tolist()
    top3_subclass_clusters[subclass_name1_cluster] = top3_clusters

    unique_subclass_clusters.update(top3_clusters)
unique_subclass_clusters = list(unique_subclass_clusters)

heatmap_data = pd.DataFrame(index=correlation_matrix.index, columns=unique_subclass_clusters)

for subclass_name1_cluster in correlation_matrix.index:
    for subclass_cluster in top3_subclass_clusters[subclass_name1_cluster]:
        corr_value = correlation_matrix.loc[subclass_name1_cluster, subclass_cluster]
        heatmap_data.loc[subclass_name1_cluster, subclass_cluster] = corr_value
heatmap_data = heatmap_data.astype(float)
avg_correlations = heatmap_data.mean(axis=0)
sorted_subclass_clusters = avg_correlations.sort_values(ascending=False).index
heatmap_data = heatmap_data[sorted_subclass_clusters]

plt.figure(figsize=(30, 20))

vmin = heatmap_data.min().min()
vmax = heatmap_data.max().max()

sns.heatmap(
    heatmap_data,
    annot=False,
    fmt=".2f",
    cmap="coolwarm",
    linewidths=0.5,
    linecolor='gray',
    cbar_kws={'label': 'Pearson Correlation'},
    vmin=vmin,
    vmax=vmax
)

plt.xlabel('Subclass Clusters')
plt.ylabel('Subclass_name1 Clusters')
plt.title('Top 3 Pearson Correlations between subclass_name1 and subclass_name Clusters')
plt.tight_layout()
plt.show()