In [None]:
import pickle
import seaborn as sns
import imageio as io
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import tifffile
from tqdm.notebook import tqdm
import pathlib
import json
import glob
import PIL
import scanpy as sc

In [None]:
adata_mg=sc.read(r'data\240719AnalysisDAM_TERM\adata_mg.h5ad')
adata_whole=sc.read(r'data\240719AnalysisDAM_TERM\adata_whole.h5ad')

In [None]:
output_folder = r'data\240719AnalysisDAM_TERM\fig_0910'

In [None]:
adata=adata_mg.copy()

# Fig1b

In [None]:
custom_colors = {
    'Mg_0': 'skyblue',    # Yellow#ffe119
    'Mg_2': '#f58231',    # blue4363d8
    'Mg_1': '#ffe119', #  Green#3cb44b
    'Mg_6': '#e6194B', # Lime#3cb44b
    'Mg_5': '#f032e6', # Bright Red
    'Mg_7': '#fabed4', #  Orange#f58231 Magenta#f032e6 pink#fabed4
    'Mg_3': '#3cb44b', # Olive#808000 4363d8
    'Mg_4': '#4363d8' # Magenta#f032e6 Bright Red#e6194B     
}

unique_classes = adata.obs['celltype_5'].cat.categories
adata.uns['celltype_5_colors'] = [custom_colors.get(cls, '#000000') for cls in unique_classes]  # Default black for missing classes
sc.set_figure_params(figsize=(5, 5))
sc.pl.umap(
    adata, 
    color='celltype_5',
    add_outline=True,
    legend_loc='on data',  
    legend_fontsize=16,
    legend_fontoutline=2,
    frameon=False,
    size=20,
    show=False,
    use_raw=False
)

output_path = f'{output_folder}/MG_umap_unicolor_update2.pdf'
plt.savefig(output_path, bbox_inches='tight', dpi=300)
plt.show()


# Fig1c

In [None]:
mark1=['Tmem119','Sall1','Axl','Cd47','Atp6v1a','Clec7a','Trem2','Tyrobp','Lpl','Spp1','Gpnmb','H2-Aa']
sc.pl.dotplot(adata, mark1, groupby='celltype_5',dendrogram=False, save='Mg_dotplot2.pdf')

# Extended Fig2a

In [None]:

sc.tl.embedding_density(adata, basis='umap', groupby='geno_type',key_added='geno_type_density')
sc.set_figure_params(figsize=(3, 3))
sc.pl.embedding_density(
    adata, 
    basis='umap',
    key='geno_type_density',
    color_map='coolwarm', 
    vmax=1,
    save='genotype_density_2.pdf'
)



# Fig1d

In [None]:
custom_colors = {
    'Mg_0': 'skyblue',    # Yellow#ffe119
    'Mg_2': '#f58231',    # blue4363d8
    'Mg_1': '#ffe119', #  Green#3cb44b
    'Mg_6': '#e6194B', # Lime#3cb44b
    'Mg_5': '#f032e6', # Bright Red
    'Mg_7': '#fabed4', #  Orange#f58231 Magenta#f032e6 pink#fabed4
    'Mg_3': '#3cb44b', # Olive#808000 4363d8
    'Mg_4': '#4363d8' # Magenta#f032e6 Bright Red#e6194B     
}
df = adata.obs[['geno_type', 'celltype_5']]
count_df = df.groupby(['geno_type', 'celltype_5']).size().unstack(fill_value=0)

x_order = ['WT', 'E4', 'APP', 'TE4']
count_df = count_df.reindex(x_order)
#count_df = count_df[y_order]

proportion_df = count_df.div(count_df.sum(axis=1), axis=0)

colors = [custom_colors[col] for col in proportion_df.columns] 
ax = proportion_df.plot(kind='bar', stacked=True, figsize=(5, 4), color=colors)

plt.title('Proportion of Microglia clusters')
plt.xlabel('Genotypes')
plt.ylabel('Proportion')

plt.legend( bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()

plt.savefig(f'{output_folder}\\Mg_genotype_proportion_2.pdf', format='pdf', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
cluster_mapping = adata.obs['celltype_5'].to_dict()
adata_whole.obs['celltype_5'] = adata_whole.obs_names.map(cluster_mapping)

adata_whole.obs['celltype_5'].fillna('unknwon', inplace=True)

In [None]:
adata_whole.obs['celltype_5_outline']=adata_whole.obs['celltype_5'].copy()

adata_whole.obs['celltype_5_outline'] = adata_whole.obs['celltype_5_outline'].astype(str)
adata_whole.obs['outline'] = adata_whole.obs['outline'].astype(str)

labeled_cells = adata_whole.obs.index[adata_whole.obs['outline'].isin(['outline'])]
adata_whole.obs.loc[labeled_cells, 'celltype_5_outline'] = adata_whole.obs.loc[labeled_cells, 'outline']

adata_whole.obs['celltype_5_outline'] = adata_whole.obs['celltype_5_outline'].astype('category')

In [None]:
adata_whole.obs['celltype_5_outline']

In [None]:
adata_app_wh=adata_whole[adata_whole.obs['batch']=='APP_1']
adata_te4_wh=adata_whole[adata_whole.obs['batch']=='TE4_3']
adata_wt_wh=adata_whole[adata_whole.obs['batch']=='WT_1']
adata_e4_wh=adata_whole[adata_whole.obs['batch']=='E4_2']

adata_app_mg=adata[adata.obs['batch']=='APP_1']
adata_te4_mg=adata[adata.obs['batch']=='TE4_3']
adata_wt_mg=adata[adata.obs['batch']=='WT_1']
adata_e4_mg=adata[adata.obs['batch']=='E4_2']

In [None]:
custom_colors = {
    'Mg_0': 'skyblue',    # Yellow#ffe119
    'Mg_2': '#f58231',    # blue4363d8
    'Mg_1': '#ffe119', #  Green#3cb44b
    'Mg_6': '#e6194B', # Lime#3cb44b
    'Mg_5': '#f032e6', # Bright Red
    'Mg_7': '#fabed4', #  Orange#f58231 Magenta#f032e6 pink#fabed4
    'Mg_3': '#3cb44b', # Olive#808000 4363d8
    'Mg_4': '#4363d8' # Magenta#f032e6 Bright Red#e6194B     
}

def plot_cluster_scdata_str_diff(scdata, custom_colors, clusters=['DAM_1','DAM_2'], transpose=1, flipx=1, flipy=1, tag='cluster', key='X_spatial'):
    
    unique_clusters = [cluster for cluster in np.unique(scdata.obs[tag]) if cluster in clusters]
    x, y = (np.array(scdata.obsm[key]) * [flipx, flipy])[:, ::transpose].T
    plt.scatter(x, y, c='#E0E0E0', s=2, marker='.') 
   
    for cluster in unique_clusters:
        cluster_ = str(cluster)
        inds = scdata.obs[tag] == cluster_
        x_ = x[inds]
        y_ = y[inds]
        col = custom_colors.get(cluster_, 'grey')  
        if cluster_ == 'outline':
            s = 3  
        else:
            s = 200  
        plt.scatter(x_, y_, c=col, s=s, marker='.', label=cluster_)
    
    plt.grid(False)
    plt.axis("off")
    plt.axis("equal")
    #plt.legend(loc='upper right')
    plt.tight_layout()
    return plt.gcf()


In [None]:
fig = plt.figure(figsize=(6, 6), facecolor="white")
fig = plot_cluster_scdata_str_diff(adata_te4_wh, custom_colors, clusters=['outline','Mg_0','Mg_3','Mg_6','Mg_7'], transpose=1, flipx=-1, flipy=1, tag='celltype_5_outline', key='X_spatial')
plt.savefig(f'{output_folder}\\mgcluster_TE4_mg0367_outline_2.tif', format='tif', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
fig = plt.figure(figsize=(6, 6), facecolor="white")
fig = plot_cluster_scdata_str_diff(adata_app_wh, custom_colors, clusters=['outline','Mg_0','Mg_3','Mg_6','Mg_7'], transpose=1, flipx=1, flipy=-1, tag='celltype_5_outline', key='X_spatial')
plt.savefig(f'{output_folder}\\mgcluster_APP_mg0367_outline_2.tif', format='tif', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
fig = plt.figure(figsize=(6, 6), facecolor="white")
fig = plot_cluster_scdata_str_diff(adata_app_wh, custom_colors, clusters=['outline','Mg_2','Mg_1','Mg_4','Mg_5'], transpose=1, flipx=1, flipy=-1, tag='celltype_5_outline', key='X_spatial')
plt.savefig(f'{output_folder}\\mgcluster_APP_mg1245_outline_2.tif', format='tif', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
fig = plt.figure(figsize=(6, 6), facecolor="white")
fig = plot_cluster_scdata_str_diff(adata_wt_wh, custom_colors, clusters=['outline','Mg_0','Mg_3','Mg_6','Mg_7'], transpose=1, flipx=-1, flipy=1, tag='celltype_5_outline', key='X_spatial')
plt.savefig(f'{output_folder}\\mgcluster_WT_mg0367_outline_2.tif', format='tif', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
fig = plt.figure(figsize=(6, 6), facecolor="white")
fig = plot_cluster_scdata_str_diff(adata_wt_wh, custom_colors, clusters=['outline','Mg_2','Mg_1','Mg_4','Mg_5'], transpose=1, flipx=-1, flipy=1, tag='celltype_5_outline', key='X_spatial')
plt.savefig(f'{output_folder}\\mgcluster_WT_mg1245_outline_2.tif', format='tif', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
fig = plt.figure(figsize=(6, 6), facecolor="white")
fig = plot_cluster_scdata_str_diff(adata_e4_wh, custom_colors, clusters=['outline','Mg_0','Mg_3','Mg_6','Mg_7'], transpose=1, flipx=1, flipy=1, tag='celltype_5_outline', key='X_spatial')
plt.savefig(f'{output_folder}\\mgcluster_E4_mg0367_outline_2.tif', format='tif', dpi=300, bbox_inches='tight')
#plt.savefig(f'{output_folder}\\mgcluster_WT_mg1245_outline_1.tif', format='tif', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
fig = plt.figure(figsize=(6, 6), facecolor="white")
fig = plot_cluster_scdata_str_diff(adata_e4_wh, custom_colors, clusters=['outline','Mg_2','Mg_1','Mg_4','Mg_5'], transpose=1, flipx=1, flipy=1, tag='celltype_5_outline', key='X_spatial')
#plt.savefig(f'{output_folder}\\mgcluster_E4_mg0367_outline_1.tif', format='tif', dpi=300, bbox_inches='tight')
plt.savefig(f'{output_folder}\\mgcluster_E4_mg1245_outline_2.tif', format='tif', dpi=300, bbox_inches='tight')
plt.show()

# Extended Fig2b

In [None]:
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np

batch_to_genotype = {
    'APP_1': 'APP', 'APP_2': 'APP', 'APP_3': 'APP', 'APP_4': 'APP',
    'E4_1': 'E4', 'E4_2': 'E4',
    'TE4_1': 'TE4', 'TE4_2': 'TE4', 'TE4_3': 'TE4', 'TE4_4': 'TE4',
    'WT_1': 'WT', 'WT_2': 'WT'
}

adata.obs['geno_type'] = adata.obs['batch'].map(batch_to_genotype)

all_data = []
for cell_class in adata.obs['celltype_5'].unique():
    adata_temp = adata[adata.obs['celltype_5'] == cell_class]
    adata_temp.obs['geno_type'] = adata_temp.obs['batch'].map(batch_to_genotype)
    total_counts_batch = adata.obs['batch'].value_counts()
    temp_counts_batch = adata_temp.obs['batch'].value_counts()
    counts_df_batch = pd.DataFrame({
        'Total': total_counts_batch,
        'Temp': temp_counts_batch
    }).fillna(0)

    counts_df_batch['Percentage'] = (counts_df_batch['Temp'] / counts_df_batch['Total']) * 100
    counts_df_batch['geno_type'] = counts_df_batch.index.map(batch_to_genotype)
    genotype_stats = counts_df_batch.groupby('geno_type').agg(
        Mean_Percentage=('Percentage', 'mean'),
        Std_Percentage=('Percentage', 'std')
    )

    genotype_stats['celltype_5'] = cell_class
    all_data.append(genotype_stats)

all_data_df = pd.concat(all_data)
all_data_df = all_data_df.reset_index()

custom_order = ['WT', 'E4', 'APP', 'TE4']

all_data_df['geno_type'] = pd.Categorical(all_data_df['geno_type'], categories=custom_order, ordered=True)
all_data_df = all_data_df.sort_values(by=['celltype_5', 'geno_type'])

color_map = {
    'WT': 'skyblue',
    'E4': '#bfef45',
    'APP': '#fabed4',
    'TE4': '#ec008c'
}
all_data_df['color'] = all_data_df['geno_type'].map(color_map)

class_names = all_data_df['celltype_5'].unique()
num_genotypes = len(custom_order)
x_positions = []

spacing = 1.5  
bar_width = 0.8

for i, class_name in enumerate(class_names):
    class_data_len = len(all_data_df[all_data_df['celltype_5'] == class_name])
    start_pos = i * (num_genotypes + spacing)
    x_positions.extend(np.arange(start_pos, start_pos + class_data_len))

plt.figure(figsize=(8, 6))
plt.bar(x_positions, all_data_df['Mean_Percentage'], 
        yerr=all_data_df['Std_Percentage'], color=all_data_df['color'], capsize=4, width=bar_width)

plt.title('Total Percentage of Each Class by Genotype')
plt.xlabel('Class')
plt.ylabel('Percentage (%)')

class_ticks = [np.mean(x_positions[i * num_genotypes:(i + 1) * num_genotypes]) for i in range(len(class_names))]
plt.xticks(class_ticks, class_names, rotation=0)

legend_labels = [plt.Line2D([0], [0], color=color_map[geno], lw=4) for geno in custom_order]
plt.legend(legend_labels, custom_order, title="Genotype", loc='upper right')

plt.tight_layout()

output_path = os.path.join(output_folder, 'combined_class_genotype_percentage_colored_plot_use_5.pdf')
plt.savefig(output_path, format='pdf', dpi=300, bbox_inches='tight')
plt.show()
