Pancreas: Run64_Pankreas_ROI6, Run64_Pankreas_ROI8, Run64_Pankreas_ROI10, Run64_Pankreas_ROI12, Run64_Pankreas_ROI14, Run64_Pankreas_ROI15, Run64_Pankreas_ROI19, Run64_Pankreas_ROI22

In [None]:
from anndata import AnnData
import anndata as ad
import scanpy as sc
import squidpy as sq
import pandas as pd
import numpy as np
import seaborn as sns
from numpy.random import default_rng
import matplotlib.pyplot as plt
from scipy.stats import zscore
#from matplotlib.pyplot import figure

sc.logging.print_header()
print(f"squidpy=={sq.__version__}")

# Settings

In [None]:
sample = 'ROI15'

In [None]:
# save results in:
import os
# Make a single directory
if not os.path.exists('results_subcluster_' + sample):
    os.mkdir('results_subcluster_' + sample)

In [None]:
# figure & saving settings
sc.settings.set_figure_params(dpi=220, fontsize=12, dpi_save=700, figsize=(10,10))
sc.settings.figdir = 'results_subcluster_' + sample

# Read in data

In [None]:
adata = sc.read_h5ad('data/' + sample + '.h5ad')
adata

# Assign immune/ non immune cells

In [None]:
sc.settings.set_figure_params(dpi=220, fontsize=20, dpi_save=700, figsize=(10,10))
sc.pl.umap(adata, color=['CD45', 'Cytokeratin'], s=20, use_raw=False,  legend_fontsize=12, color_map='coolwarm', vcenter=0)

In [None]:
immune_type= {
'0' : '0',
'1' : '1',
'2' : 'Tumour cells',
'3' : 'Tumour cells',
'4' :  'Tumour cells',
'5' :  'Tumour cells',
'6' :  'Tumour cells',
'7' : 'Tumour cells',
'8' :  'Tumour cells',
'9' :  'Tumour cells',
'10' :  'Tumour cells',
'11' :  'Tumour cells',
'12' :  'Tumour cells',
'13' :  'Tumour cells',
'14' :  'Tumour cells',
'15' :  'Tumour cells',
'16' :  'Mix',
'17' :  'Tumour cells',
'18' :  'Immune cells',
'19' :  'Immune cells',
'20' :  'Tumour cells',
'21' :  'Immune cells',
'22' :  'Tumour cells',
'23' :  'Tumour cells',
'24' :  '24',
'25' :  'Tumour cells',
'26' :  'Mix',
'27' :  'Tumour cells',
'28' :  'Tumour cells',
'29' :  'Tumour cells',
'30' :  'Tumour cells',
'31' :  'Tumour cells',
'32' :  'Tumour cells',
'33' :  'Tumour cells',
'34' :  'Tumour cells',
'35' :  '35',
'36' :  '36',
'37' :  '37',
}
adata.obs['immune_annotation'] = (adata.obs['leiden'].map(immune_type).astype('category'))

In [None]:
sc.pl.umap(adata, color=['immune_annotation', 'CD45'], s=20, use_raw=False,  legend_fontsize=10, color_map='coolwarm', vcenter=0,  legend_loc='on data')
sc.pl.umap(adata, color=['immune_annotation', 'Cytokeratin'], s=20, use_raw=False,  legend_fontsize=10, color_map='coolwarm', vcenter=0,  legend_loc='on data')

# First annotation of original Leiden

In [None]:
first_annotation = {
'0' : 'Granulocyte/ CAF',
'1' : 'Granulocyte/ CAF',
'2' : 'Endothel?',
'3' : 'CD4+ T cells',
'4' :  'CD4+ T cells',
'5' :  'Tumour cells',
'6' :  'Collagen ',
'7' : 'M2 Macrophages',
'8' :  'Dendritic cells/ Mast cells',
'9' :  'Dendritic cells/ Mast cells',
'10' : 'Dendritic cells', 
'11' :  'M2 Macrophages',
'12' :  '?',
'13' :  'CD8+ T cells',
'14' :  'Granulocyte',
'15' :  'M1 Macrophages',
'16' :  'Tumour cells',
'17' :  'B cells',
'18' :  'Collagen ',
'19' :  'Dendritic cells', 
'20' :  'Dendritic cells/ Mast cells',
'21' :  'CD8+ T cells',
'22' :  'Endothel?',
'23' :  'M2 Macrophages',
'24' :  'Collagen / CAF',
'25' :  'Tumour cells',
'26' :  'NK cells',
'27' :  'Granulocyte',
'28' :  'Plasma cells',
'29' :  'Fibroblasts',
'30' :  'Collagen ',
'31' :  'Unknown',
'32' :  'Plasma cells',
'33' :  'Unknown',
'34' :  'Fibroblasts',

}
adata.obs['first_annotation'] = (adata.obs['leiden'].map(first_annotation).astype('category'))

In [None]:
sc.settings.set_figure_params(dpi=220, fontsize=5, dpi_save=700, figsize=(10,10))
sc.pl.umap(adata, color=['first_annotation'], s=20, use_raw=False,  legend_fontsize=12, title = sample, color_map='coolwarm', vcenter=0)
sc.pl.umap(adata, color=['first_annotation', 'CD45'], s=20, use_raw=False, legend_fontsize=7,  title = sample, legend_loc='on data', color_map='coolwarm', vcenter=0)

# Choose cluster

In [None]:
# get IDs for every Cluster
def getCluster(numCluster):
    id_of_cluster = [key for key, val in  adata.obs['leiden'].items() if val == str(numCluster)] # get all IDs of one cluster
    clusterID = adata.to_df().loc[id_of_cluster] # generate a dataframe with only the IDs of one cluster 
    return clusterID

In [None]:
cluster0 = getCluster(0)
cluster1 = getCluster(1)
cluster2 = getCluster(2)
cluster3 = getCluster(3)
cluster4 = getCluster(4)
cluster5 = getCluster(5)
cluster6 = getCluster(6)
cluster7 = getCluster(7)
cluster8 = getCluster(8)
cluster9 = getCluster(9)
cluster10 = getCluster(10)
cluster11 = getCluster(11)
cluster12 = getCluster(12)
cluster13 = getCluster(13)
cluster14 = getCluster(14)
cluster15 = getCluster(15)
cluster16 = getCluster(16)
cluster17 = getCluster(17)
cluster18 = getCluster(18)
cluster19 = getCluster(19)
cluster20 = getCluster(20)
cluster21 = getCluster(21)
cluster22 = getCluster(22)
cluster23 = getCluster(23)
cluster24 = getCluster(24)
cluster25 = getCluster(25)
cluster26 = getCluster(26)
cluster27 = getCluster(27)
cluster28 = getCluster(28)
cluster29 = getCluster(29)
cluster30 = getCluster(30)
cluster31 = getCluster(31)
cluster32 = getCluster(32)
cluster33 = getCluster(33)
cluster34 = getCluster(34)
cluster35 = getCluster(35)
cluster36 = getCluster(36)
cluster37 = getCluster(37)
cluster38 = getCluster(38)
cluster39 = getCluster(39)
cluster40 = getCluster(40)

In [None]:
# generate AnnData for every Subcluster
cluster0 = AnnData(cluster0)
cluster1 = AnnData(cluster1)
cluster2 = AnnData(cluster2)
cluster3 = AnnData(cluster3)
cluster4 = AnnData(cluster4)
cluster5 = AnnData(cluster5)
cluster6 = AnnData(cluster6)
cluster7 = AnnData(cluster7)
cluster8 = AnnData(cluster8)
cluster9 = AnnData(cluster9)
cluster10 = AnnData(cluster10)
cluster11 = AnnData(cluster11)
cluster12 = AnnData(cluster12)
cluster13 = AnnData(cluster13)
cluster14 = AnnData(cluster14)
cluster15 = AnnData(cluster15)
cluster16 = AnnData(cluster16)
cluster17 = AnnData(cluster17)
cluster18 = AnnData(cluster18)
cluster19 = AnnData(cluster19)
cluster20 = AnnData(cluster20)
cluster21 = AnnData(cluster21)
cluster22 = AnnData(cluster22)
cluster23 = AnnData(cluster23)
cluster24 = AnnData(cluster24)
cluster25 = AnnData(cluster25)
cluster26 = AnnData(cluster26)
cluster27 = AnnData(cluster27)
cluster28 = AnnData(cluster28)
cluster29 = AnnData(cluster29)
cluster30 = AnnData(cluster30)
cluster31 = AnnData(cluster31)
cluster32 = AnnData(cluster32)
cluster33 = AnnData(cluster33)
cluster34 = AnnData(cluster34)
cluster35 = AnnData(cluster35)
cluster36 = AnnData(cluster36)
cluster37 = AnnData(cluster37)
cluster38 = AnnData(cluster38)
cluster39 = AnnData(cluster39)
cluster40 = AnnData(cluster40)

## PCA

In [None]:
# compute PCs for every Subcluster
def pc(data1):
    sc.settings.set_figure_params(dpi=50, fontsize=10,figsize=(8,8))
    sc.tl.pca(data1, svd_solver='arpack')
    sc.pl.pca_variance_ratio(data1, log=False, n_pcs=50)

In [None]:
#pc(cluster0) 
#pc(cluster1) 
#pc(cluster2) 
#pc(cluster3)
#pc(cluster4)
#pc(cluster5) 
#pc(cluster6)
#pc(cluster7)
pc(cluster8)
#pc(cluster9)
#pc(cluster10) 
#pc(cluster11)
#pc(cluster12) 
#pc(cluster13)
#pc(cluster14) 
#pc(cluster15)
#pc(cluster16)
#pc(cluster17)
#pc(cluster18)
#pc(cluster20)
#pc(cluster21)
#pc(cluster22) 
#pc(cluster23) 
#pc(cluster24)
#pc(cluster25)
#pc(cluster28)
#pc(cluster30)
#pc(cluster31)
#pc(cluster32)

In [None]:
pc0 = 5
pc1 = 4
pc2 = 8
pc3 = 7
pc4 = 5
pc5 = 5
pc6 = 3
pc7 = 6
pc8 = 6
pc9 = 5
pc10 = 3
pc11 = 3
pc12 = 6
pc13 = 6
pc14 = 5
pc15 = 5
pc16 = 4
pc17 = 4
pc18 = 5
pc19 = 6
pc20 = 7
pc21 = 6
pc22= 6
pc23 = 5
pc24 = 5
pc25 = 5
pc28 = 4
pc30 = 5
pc31 = 5
pc32 = 4

## Generate UMAPs for Subcluster

In [None]:
def umap(data1, pc1, subcluster, res):
    sc.settings.set_figure_params(dpi=100, fontsize=12,figsize=(10,10))
    # Computing the neighborhood graph
    sc.pp.neighbors(data1, n_neighbors=15, n_pcs=pc1)
    sc.tl.umap(data1, random_state=0, min_dist=0.3, spread=0.4) 
    sc.tl.leiden(data1,resolution=res, key_added = "leiden")
    
    
    #plot results
    sc.pl.umap(data1, color ='leiden', s=60, use_raw=False, title = sample + ' Cluster ' + subcluster, save=  sample + '_' + subcluster + '.pdf')

In [None]:
#umap(cluster0, pc0, '0', 0.8)
#umap(cluster1, pc1, '1', 0.5)
umap(cluster2, pc2, '2', 0.5)
umap(cluster3, pc3, '3', 0.5)
#umap(cluster4, pc4, '4', 0.7)
#umap(cluster5, pc5, '5', 0.5)
#umap(cluster6, pc6, '6', 0.9)
#umap(cluster7, pc7, '7', 0.7)
umap(cluster8, pc8, '8', 0.5)
#umap(cluster9, pc9, '9', 0.5)
#umap(cluster10, pc10, '10', 1.4)
#umap(cluster11, pc11, '11', 0.5)
#umap(cluster12, pc12, '12', 0.6)
#umap(cluster13, pc13, '13', 0.5)
#umap(cluster14, pc14, '14', 0.5)
umap(cluster15, pc15, '15', 0.7)
#umap(cluster16, pc16, '16', 0.9)
#umap(cluster17, pc17, '17', 0.5)
#umap(cluster18, pc18, '18', 0.5)
#umap(cluster19, pc19, '19', 0.5)
#umap(cluster20, pc20, '20', 0.5)
#umap(cluster21, pc21, '21', 0.5)
#umap(cluster22, pc22, '22', 0.5)
#umap(cluster23, pc23, '23', 0.5)
#umap(cluster24, pc24, '24', 0.5)
#umap(cluster25, pc25, '25', 0.5)
#umap(cluster30, pc30, '30', 0.5)
#umap(cluster31, pc31, '31', 0.5)
#umap(cluster32, pc32, '32', 0.5)

## Generate Heatmaps for Subcluster

In [None]:
# median per cluster for heatmap
def median_cluster(data, numOfCluster, cluster):
    cluster_count = 0
    cluster_df = pd.DataFrame(columns=data.columns.to_list())

    while cluster_count < numOfCluster:
        id_of_cluster = [key for key, val in cluster.items() if val == str(cluster_count)] # get all IDs of one cluster
        df_of_cluster = data.loc[id_of_cluster] # generate a dataframe with only the IDs of one cluster 
        median_of_cluster = df_of_cluster.median().to_frame().T # compute the median per marker (column)
        cluster_df = pd.concat([cluster_df, median_of_cluster]) # merge the median results in a dataframe
        cluster_count += 1
    
    cluster_df = cluster_df.reset_index()
    cluster_df = cluster_df.drop('index', axis=1)

    return cluster_df

In [None]:
#cluster0_data = median_cluster(cluster0.to_df(), 7, cluster0.obs['leiden'])
#cluster1_data  = median_cluster(cluster1.to_df(), 4, cluster1.obs['leiden'])
cluster2_data  = median_cluster(cluster2.to_df(), 4, cluster2.obs['leiden'])
cluster3_data  = median_cluster(cluster3.to_df(), 3, cluster3.obs['leiden'])
#cluster4_data  = median_cluster(cluster4.to_df(), 8, cluster4.obs['leiden'])
#cluster5_data  = median_cluster(cluster5.to_df(), 7, cluster5.obs['leiden'])
#cluster6_data  = median_cluster(cluster6.to_df(), 8, cluster6.obs['leiden'])
#cluster7_data  = median_cluster(cluster7.to_df(), 6, cluster7.obs['leiden'])
cluster8_data  = median_cluster(cluster8.to_df(), 4, cluster8.obs['leiden'])
#cluster9_data  = median_cluster(cluster9.to_df(), 6, cluster9.obs['leiden'])
#cluster10_data  = median_cluster(cluster10.to_df(), 14, cluster10.obs['leiden'])
#cluster11_data  = median_cluster(cluster11.to_df(), 4, cluster11.obs['leiden'])
#cluster12_data  = median_cluster(cluster12.to_df(), 7, cluster12.obs['leiden'])
#cluster13_data  = median_cluster(cluster13.to_df(), 5, cluster13.obs['leiden'])
#cluster14_data  = median_cluster(cluster14.to_df(), 7, cluster14.obs['leiden'])
cluster15_data  = median_cluster(cluster15.to_df(), 6, cluster15.obs['leiden'])
#cluster16_data  = median_cluster(cluster16.to_df(), 6, cluster16.obs['leiden'])
#cluster17_data  = median_cluster(cluster17.to_df(), 6, cluster17.obs['leiden'])
#cluster18_data  = median_cluster(cluster18.to_df(), 4, cluster18.obs['leiden'])
#cluster19_data  = median_cluster(cluster19.to_df(), 3, cluster19.obs['leiden'])
#cluster20_data  = median_cluster(cluster20.to_df(), 3, cluster20.obs['leiden'])
#cluster21_data  = median_cluster(cluster21.to_df(), 6, cluster21.obs['leiden'])
#cluster22_data  = median_cluster(cluster22.to_df(), 5, cluster22.obs['leiden'])
#cluster23_data  = median_cluster(cluster23.to_df(), 6, cluster23.obs['leiden'])
#cluster24_data  = median_cluster(cluster24.to_df(), 3, cluster24.obs['leiden'])
#cluster25_data  = median_cluster(cluster25.to_df(), 3, cluster25.obs['leiden'])
#cluster30_data  = median_cluster(cluster30.to_df(), 4, cluster30.obs['leiden'])
#cluster31_data  = median_cluster(cluster31.to_df(), 6, cluster31.obs['leiden'])
#cluster32_data  = median_cluster(cluster32.to_df(), 6, cluster32.obs['leiden'])

In [None]:
sns.set(font_scale=0.8)
def plotHeatmap(data, subcluster):
    plot= sns.clustermap(data.T,
                         standard_scale=1,  # Either 0 (rows) or 1 (columns).
                         cmap='Reds', 
                         yticklabels=True, xticklabels=True,
                         figsize=(10, 10),
                         row_cluster=False,
                         col_cluster=True)
    plot.fig.suptitle(sample + ' Cluster ' + str(subcluster))
    plot = plt.savefig('results_subcluster_' + sample + '/matrixplot' + sample + '_' + str(subcluster) + '.png', dpi=150);

In [None]:
#plotHeatmap(cluster0_data, 0 )
#plotHeatmap(cluster1_data, 1)
plotHeatmap(cluster2_data, 2)
plotHeatmap(cluster3_data, 3)
#plotHeatmap(cluster4_data, 4)
#plotHeatmap(cluster5_data, 5)
#plotHeatmap(cluster6_data, 6)
#plotHeatmap(cluster7_data, 7)
plotHeatmap(cluster8_data, 8)
#plotHeatmap(cluster9_data, 9)
#plotHeatmap(cluster10_data, 10)
#plotHeatmap(cluster11_data, 11)
#plotHeatmap(cluster12_data, 12)
#plotHeatmap(cluster13_data, 13)
#plotHeatmap(cluster14_data, 14)
plotHeatmap(cluster15_data, 15)
#plotHeatmap(cluster16_data, 16)
#plotHeatmap(cluster17_data, 17)
#plotHeatmap(cluster19_data, 19)
#plotHeatmap(cluster18_data, 18)
#plotHeatmap(cluster20_data, 20)
#plotHeatmap(cluster21_data, 21)
#plotHeatmap(cluster22_data, 22)
#plotHeatmap(cluster23_data, 23)
#plotHeatmap(cluster24_data, 24)
#plotHeatmap(cluster25_data, 25)
#plotHeatmap(cluster30_data, 30)
#plotHeatmap(cluster31_data, 31)
#plotHeatmap(cluster32_data, 32)

In [None]:
def countsPerCluster(data):
    ser_counts = data.obs['leiden'].value_counts()
    ser_counts.name = 'cell counts'
    meta_leiden = pd.DataFrame(ser_counts)
    return meta_leiden

In [None]:
cellCounts = countsPerCluster(cluster15)

In [None]:
cellCounts['cell counts'].to_list()

# Assigne new Subcluster to ROI

In [None]:
def listOfSubcluster(current_subcluster, clusterNum, index):
    newList = []
    for idx in range(len(current_subcluster)):
        newEntry = clusterNum + '.' + current_subcluster[idx]
        newList.append(newEntry)
    newdf = pd.DataFrame(data = newList, index = index)
    return newdf

In [None]:
# new Subcluster Lists
#df0 = listOfSubcluster(cluster0.obs['leiden'].to_list(), '0', cluster0.obs['leiden'].index)
#df1 = listOfSubcluster(cluster1.obs['leiden'].to_list(), '1', cluster1.obs['leiden'].index)
df2 = listOfSubcluster(cluster2.obs['leiden'].to_list(), '2', cluster2.obs['leiden'].index)
#df3 = listOfSubcluster(cluster3.obs['leiden'].to_list(), '3', cluster3.obs['leiden'].index)
#df4 = listOfSubcluster(cluster4.obs['leiden'].to_list(), '4', cluster4.obs['leiden'].index)
#df5 = listOfSubcluster(cluster5.obs['leiden'].to_list(), '5', cluster5.obs['leiden'].index)
#df6 = listOfSubcluster(cluster6.obs['leiden'].to_list(), '6', cluster6.obs['leiden'].index)
#df7 = listOfSubcluster(cluster7.obs['leiden'].to_list(), '7', cluster7.obs['leiden'].index)
#df9 = listOfSubcluster(cluster9.obs['leiden'].to_list(), '9', cluster9.obs['leiden'].index)
#df10 = listOfSubcluster(cluster10.obs['leiden'].to_list(), '10', cluster10.obs['leiden'].index)
#df11 = listOfSubcluster(cluster11.obs['leiden'].to_list(), '11', cluster11.obs['leiden'].index)
#df12 = listOfSubcluster(cluster12.obs['leiden'].to_list(), '12', cluster12.obs['leiden'].index)
#df14 = listOfSubcluster(cluster14.obs['leiden'].to_list(), '14', cluster14.obs['leiden'].index)
df15 = listOfSubcluster(cluster15.obs['leiden'].to_list(), '15', cluster15.obs['leiden'].index)
#df18 = listOfSubcluster(cluster18.obs['leiden'].to_list(), '18', cluster18.obs['leiden'].index)
#df21 = listOfSubcluster(cluster21.obs['leiden'].to_list(), '21', cluster21.obs['leiden'].index)
#df22 = listOfSubcluster(cluster22.obs['leiden'].to_list(), '22', cluster22.obs['leiden'].index)
#df23 = listOfSubcluster(cluster23.obs['leiden'].to_list(), '23', cluster23.obs['leiden'].index)
#df31 = listOfSubcluster(cluster31.obs['leiden'].to_list(), '31', cluster31.obs['leiden'].index)
#df32 = listOfSubcluster(cluster32.obs['leiden'].to_list(), '32', cluster32.obs['leiden'].index)

In [None]:
def oldCluster(clusterNum):
    id_of_cluster = [key for key, val in adata.obs['leiden'].items() if val == str(clusterNum)]
    df = pd.DataFrame(data = ([str(clusterNum)] * len(id_of_cluster)), index = id_of_cluster)
    return df

In [None]:
df0 = oldCluster(0)
df1 = oldCluster(1)
#df2 = oldCluster(2)
df3 = oldCluster(3)
df4 = oldCluster(4)
df5 = oldCluster(5)
df6 = oldCluster(6)
df7 = oldCluster(7)
df8 = oldCluster(8)
df9 = oldCluster(9)
df10 = oldCluster(10)
df11 = oldCluster(11)
df12 = oldCluster(12)
df13 = oldCluster(13)
df14 = oldCluster(14)
#df15 = oldCluster(15)
df16 = oldCluster(16)
df17 = oldCluster(17)
df18 = oldCluster(18)
df19 = oldCluster(19)
df20 = oldCluster(20)
df21 = oldCluster(21)
df22 = oldCluster(22)
df23 = oldCluster(23)
df24 = oldCluster(24)
df25 = oldCluster(25)
df26 = oldCluster(26)
df27 = oldCluster(27)
df28 = oldCluster(28)
df29 = oldCluster(29)
df30 = oldCluster(30)
df31 = oldCluster(31)
df32 = oldCluster(32)
df33 = oldCluster(33)
df34 = oldCluster(34)
df35 = oldCluster(35)
df36 = oldCluster(36)
df37 = oldCluster(37)
df38 = oldCluster(38)
df39 = oldCluster(39)
df40 = oldCluster(40)
df41 = oldCluster(41)
df42 = oldCluster(42)
df43 = oldCluster(43)

In [None]:
newCLuster = pd.concat([df0, df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, df14, df15, df16, df17, df18, df19, df20, df21, df22,
                       df23, df24, df25, df26, df27, df28, df29, df30, df31, df32, df33, df34, df35, df36, df37, df38, df39, df40, df41, df42, df43])

In [None]:
adata.obs['subLeiden'] = newCLuster

# Heatmap with new subcluster

In [None]:
# median per cluster for heatmap
def median_clusterAll(data, numOfCluster, cluster):
    cluster_count = 0
    cluster_df = pd.DataFrame(columns=data.columns.to_list())

    while cluster_count < numOfCluster:
        id_of_cluster = [key for key, val in cluster.items() if val == str(cluster_count)] # get all IDs of one cluster
        df_of_cluster = data.loc[id_of_cluster] # generate a dataframe with only the IDs of one cluster 
        median_of_cluster = df_of_cluster.median().to_frame().T # compute the median per marker (column)
        cluster_df = pd.concat([cluster_df, median_of_cluster]) # merge the median results in a dataframe
        cluster_count += 1
    
    cluster_df = cluster_df.reset_index()
    cluster_df = cluster_df.drop('index', axis=1)

    return cluster_df

In [None]:
cluster_df = median_clusterAll(adata.to_df(), 35, adata.obs['subLeiden']) # compute median per subcluster
cluster_df= cluster_df.drop([2, 15], axis=0) # drop old/original cluster

In [None]:
idx0 = np.unique(df0[0].to_list())

In [None]:
def newIdx(df, data):
    idx0 = np.unique(df[0].to_list())
    data['idx'] = idx0
    data = data.set_index('idx')
    return data

In [None]:
# reindex the data, so that the subcluster have the correct index
#df0 = newIdx(df0, cluster0_data)
df2 = newIdx(df2, cluster2_data)
df15 = newIdx(df15, cluster15_data)

In [None]:
# add the new subcluster to the dataframe
dataAll = cluster_df.append([df2, df15])
dataAll.shape

In [None]:
sns.set(font_scale=1)
def plotHeatmapAll(data, name):
    plot= sns.clustermap(data.T,
                         standard_scale=1,  # Either 0 (rows) or 1 (columns).
                         cmap='Reds', 
                         yticklabels=True, xticklabels=True,
                         figsize=(10, 10),
                         row_cluster=True,
                         col_cluster=True,
                         dendrogram_ratio=0.1)

    plt.setp(plot.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    plt.setp(plot.ax_heatmap.xaxis.get_majorticklabels(), rotation=35, ha='right', rotation_mode='anchor')
    plt.subplots_adjust(top=0.98) # space between title and plot
    plot.fig.suptitle('Cell Type Annotation ' + name,  y=1, fontsize=12) 
    plot.fig.subplots_adjust(right=0.7)
    plot.ax_cbar.set_position((0.89, .28, .015, .4))
    plot = plt.savefig('results_subcluster_' + sample + '/Newmatrixplot' + sample + '.png', dpi=250, bbox_inches='tight');

In [None]:
sc.settings.set_figure_params(dpi=220, fontsize=5, dpi_save=700, figsize=(15,15))
plotHeatmapAll(dataAll, sample)

# Annotate

In [None]:
cellTypes=[
'Granulocyte',
'Granulocyte',
#2' : '2',
'Effector T cells ',
'Tumour cells',
'Tumour cells',
'Collagen ',
'Tumour cells',
'Dendritic cells', 
'Dendritic cells', 
'CD8+ T cells',
'Tumour cells',
'Tumour cells',
'CD8+ T cells',
'Granulocyte',
#'15' :  'Macrophage15',
'Tumour cells',
'B cells',
'Collagen ',
'Dendritic cells', 
'Mast cells',
'CD8+ T cells',
'CD4+ T cells',
'M2 Macrophages',
'Collagen ',
'Tumour cells',
'NK cells',
'Granulocyte',
'Unknown',
'Fibroblasts',
'Collagen ',
'Unknown',
'Plasma cells',
'Unknown',
'Tumour cells',
  
# 2
'CD4+ T cells',
'Effector T cells ',
'CD4+ T cells',
'CD4+ T cells',

# 3
'M1 Macrophages',
'CAF',
'M2 Macrophages',
'M1 Macrophages',
'M2 Macrophages',
'M1 Macrophages',
 
]

In [None]:
dataAll.index = cellTypes

In [None]:
plotHeatmapAll(dataAll,  'Annotated')

# Merge

In [None]:
annotated_merged_data = dataAll.groupby(level=0).mean()

In [None]:
sc.settings.set_figure_params(dpi=220, fontsize=12, dpi_save=700, figsize=(10,10))
plotHeatmapAll(annotated_merged_data, sample)

# Annotated UMAP

In [None]:
old_to_new = {
'0' : 'Granulocyte',
'1' : 'Granulocyte',
#2' : '2',
'3' : 'Effector T cells ',
'4' :  'Tumour cells',
'5' :  'Tumour cells',
'6' :  'Collagen ',
'7' : 'Tumour cells',
'8' :  'Dendritic cells', 
'9' :  'Dendritic cells', 
'10' :  'CD8+ T cells',
'11' :  'Tumour cells',
'12' :  'Tumour cells',
'13' :  'CD8+ T cells',
'14' :  'Granulocyte',
#'15' :  'Macrophage15',
'16' :  'Tumour cells',
'17' :  'B cells',
'18' :  'Collagen ',
'19' :  'Dendritic cells', 
'20' :  'Mast cells',
'21' :  'CD8+ T cells',
'22' :  'CD4+ T cells',
'23' :  'M2 Macrophages',
'24' :  'Collagen ',
'25' :  'Tumour cells',
'26' :  'NK cells',
'27' :  'Granulocyte',
'28' :  'Unknown',
'29' :  'Fibroblasts',
'30' :  'Collagen ',
'31' :  'Unknown',
'32' :  'Plasma cells',
'33' :  'Unknown',
'34' :  'Tumour cells',
    
'2.0' : 'CD4+ T cells',
'2.1' : 'Effector T cells ',
'2.2' : 'CD4+ T cells',
'2.3' : 'CD4+ T cells',
    
'15.0' : 'M1 Macrophages',
'15.1' : 'CAF',
'15.2' : 'M2 Macrophages',
'15.3' : 'M1 Macrophages',
'15.4' : 'M2 Macrophages',
'15.5' : 'M1 Macrophages',
 
}
adata.obs['annotation'] = (adata.obs['subLeiden'].map(old_to_new).astype('category'))

In [None]:
sc.settings.set_figure_params(dpi=160, fontsize=10, dpi_save=300, figsize=(7,7))
sc.pl.umap(adata, color=['annotation'], s=10, use_raw=False,  legend_fontsize=10, title = sample, save = sample + '.png', color_map='coolwarm', vcenter=0)
sc.pl.umap(adata, color=['annotation'], s=10, use_raw=False, legend_fontsize=10,  title = sample, legend_loc='on data', save = sample + '_logOnData.png', color_map='coolwarm', vcenter=0)

In [None]:
selced_marker = [
'CD45',  
'CD45RO',   
'CD3',
# CD2,
'CD4',
'CD8',    
'CD25-1',     
'FoxP3',
'CD152 CTLA4',   
#PD-L1
'CD19',     
'CD20 Cytoplasmic',
'CD38',     
'CD138', 
'CD69',    
'CD56',    
'CD314 NKG2D',     
'CD61',
'CD68', 
'CD206',     
'CD163',# IN ROI19 on Tumour cells       
'CD11c', 
'CD123',
'CD15',    
'CD33',  
'CD117',   
'HLA-DR',
'Ki-67',     
'HNF-4-alpha',  
'Cytokeratin',    
'CD99',
'CD31',    
'CD105',    
'Podoplanin',
'SM Actin',
'Myosin SM',
'Collagen III',
'Collagen IV',
'Fibronectin',
'Galectin9',    
]

In [None]:
sc.settings.set_figure_params(dpi=160, fontsize=16, dpi_save=300, figsize=(7,7))
sc.pl.umap(adata, color=['CD45', 'Cytokeratin'], s=10, use_raw=False,  legend_fontsize=16, color_map='coolwarm', vcenter=0, save = sample + '_Expression.png',)

In [None]:
sc.settings.set_figure_params(dpi=160, fontsize=10, dpi_save=300, figsize=(7,7))
sc.pl.spatial(adata, color="annotation", spot_size=40, scale_factor=1, legend_fontsize=10, save = '_Spatial' + sample + '.png', title = sample)