# EndoCardium
## Xiaonan Wang
## 19July2024

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
# from matplotlib_venn import venn2
# from matplotlib_venn import venn3
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt
from functools import reduce
import seaborn as sns

cmap = LinearSegmentedColormap.from_list(name='gene_cmap', colors=['lightgrey', 'thistle', 'red', 'darkred']) 

sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80, color_map='viridis')
sc.logging.print_header()

In [None]:
def expPlotFun(adata, group = 'Study', color='louvain_rd10', vmin=None, vmax=None, size=100):
    if vmax is None:
        if color in adata.raw.var_names:
            vmax = np.max(adata.raw[:,color].X)
    if vmin is None:
        vmin = 0
        
    gs = adata.obs[group].cat.categories
    
    ng = len(gs)+1
    fig,ax = plt.subplots(1,ng, figsize=(5*ng,4.5), sharex=True, sharey=True)
    
    ax1=sc.pl.umap(adata ,ax=ax[0], legend_loc="on data", color=color, show=False, size=size, color_map=cmap, vmin=vmin, vmax=vmax, legend_fontsize=25)
    for i in range(len(gs)):
        ax2=sc.pl.umap(adata[adata.obs[group]==gs[i],:], title=gs[i],ax=ax[i+1], legend_loc="on data", vmin=vmin, vmax=vmax, color=color, show=False, size=size, color_map=cmap, legend_fontsize=25)
    fig.tight_layout()

In [None]:
def barplot_fun(adata, con1, con2, plot=True):
    t1 = pd.crosstab(adata.obs[con1], adata.obs[con2],dropna =False)
    t1 = t1.div(t1.sum(axis=0),axis=1)*10000
    t1_per = t1.div(t1.sum(axis=1),axis=0)*100
    
    if (plot==True):
        fig, ax = plt.subplots(nrows=1, ncols=2,figsize=(12,4))
        t1.plot.bar(edgecolor="black", ax=ax[0])
        ax[0].legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
        ax[0].set_ylabel('normalised # of cells')
        ax[0].grid()
        t1_per.plot.bar(stacked=True, edgecolor="black", ax=ax[1])
        ax[1].legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
        ax[1].set_ylabel('% in each cluster')
        ax[1].grid()
        fig.tight_layout()
    return t1_per

In [None]:
plt.rcParams["figure.figsize"] = (5,4.5)

In [None]:
adata = sc.read('./write/EndoCells_v1.h5ad')

## Read in Data

In [None]:
adata = sc.read('../New_Batch_Analysis_v2/write/Final_allcells_raw_afterQC.h5ad')

In [None]:
print(adata.obs_keys())

In [None]:
cls = ['0', '1', '2', '3', '4', '6']
idx = np.in1d(adata.obs['louvain_rd10'] ,cls)

In [None]:
adata = adata[idx, :]

In [None]:
adata.shape

In [None]:
import anndata
adata = anndata.AnnData(X=np.exp(adata.raw.X.toarray())-1, var=adata.raw.var, obs=adata.obs)

In [None]:
adata.obs['louvain_rd10'] = adata.obs['louvain_rd10'].astype('category')
louvain_rd10_colors = [
    '#0B5345', '#15AB0F', '#63D95E', '#1D8348','#4BB80C', '#52BE80', '#1EF861', 
    '#D4E95E', '#8CA11C', '#7B8003', '#444604',
    '#5175FB', 
    '#58DDF5', '#2190A4', 
    '#A42DE9', '#7015EB', '#ABB7FF',
    '#F22BA2', '#EC7DBF',
    '#CC3D6E', '#FCC176', '#FFADAB', '#A93226', '#FD1901',
    '#FF5733', '#ff9900',
    '#C0C0C0'    
]
palette = np.array(louvain_rd10_colors)[np.array(list(adata.obs['louvain_rd10'].cat.categories)).astype(int)]
adata.uns['louvain_rd10_colors'] = palette

## Quality Control

In [None]:
sc.pp.filter_cells(adata, min_genes=100)
sc.pp.filter_genes(adata, min_counts=1)

In [None]:
print(adata.shape)

## Find highly variable genes

In [None]:
sc.pp.normalize_per_cell(adata, counts_per_cell_after=10000)

In [None]:
adata.raw = sc.pp.log1p(adata, copy=True)

In [None]:
filter_result = sc.pp.filter_genes_dispersion(
    adata.X, min_mean=0.0125, max_mean=4, min_disp=0.5)
print(sum(filter_result.gene_subset))
sc.pl.filter_genes_dispersion(filter_result)

In [None]:
adata = adata[:, filter_result.gene_subset]

In [None]:
sc.pp.log1p(adata)

In [None]:
adata.obs['percent_mito'] = adata.obs['percent_mito'].astype('float32')

In [None]:
sc.pp.regress_out(adata, ['n_counts', 'percent_mito'])

In [None]:
sc.pp.scale(adata)

## Dimensionality reduction

In [None]:
sc.tl.pca(adata)

In [None]:
sc.pl.pca_variance_ratio(adata, log=True)

In [None]:
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=50)

In [None]:
sc.tl.umap(adata, random_state=1)

In [None]:
sc.pl.umap(adata, color=['louvain_rd10', 'Study'], legend_loc='on data', legend_fontsize=15)

In [None]:
#sc.tl.draw_graph(adata, maxiter=100, random_state=1)

## Clustering

In [None]:
sc.tl.louvain(adata, resolution = 0.7,key_added='louvain_Endo', random_state=5)
sc.pl.umap(adata, color=['Study', 'louvain_Endo'], legend_loc='on data', legend_fontsize=15, color_map=cmap)

In [None]:
expPlotFun(adata, group='Study', color='louvain_Endo')

In [None]:
adata.write('./write/EndoCells_v1.h5ad')

## Batch correction

In [None]:
B0 = pd.DataFrame(data=adata[adata.obs['Study']=='WT',:].obsm['X_pca'],
                  index=adata[adata.obs['Study']=='WT'].obs_names)

B1 = pd.DataFrame(data=adata[adata.obs['Study']=='Injured',:].obsm['X_pca'],
                  index=adata[adata.obs['Study']=='Injured'].obs_names)

B2 = pd.DataFrame(data=adata[adata.obs['Study']=='Injured_Runx1_KO',:].obsm['X_pca'],
                  index=adata[adata.obs['Study']=='Injured_Runx1_KO'].obs_names)

In [None]:
%reload_ext rpy2.ipython

In [None]:
%%R -i B0 -i B1 -i B2 -o PC_BC 

suppressMessages(library(scran))

t1 = Sys.time()
mnncount = fastMNN(data.matrix(B0), data.matrix(B1), data.matrix(B2),
                      BPPARAM=MulticoreParam(detectCores()), pc.input=TRUE)
t2 = Sys.time()
print(t2-t1)

PC_BC = data.frame(mnncount$corrected)

In [None]:
CN = np.array([])
CN = np.concatenate(np.append(CN, [B0.index, B1.index,B2.index]))

In [None]:
adata_mnn = adata.copy()
adata_mnn = adata_mnn[CN,:]
adata_mnn.obsm['X_pca'] = PC_BC.values

In [None]:
sc.pp.neighbors(adata_mnn, n_neighbors=15, n_pcs=50)

In [None]:
sc.tl.umap(adata_mnn, random_state=1)

In [None]:
sc.pl.umap(adata, color=['louvain_Endo', 'Study', 'louvain_rd10'], legend_loc='on data', legend_fontsize=15)
sc.pl.umap(adata_mnn, color=['louvain_BC', 'Study', 'louvain_rd10'], legend_loc='on data', legend_fontsize=15, save='_EndoCells_Louvain_Study.pdf')

In [None]:
sc.tl.louvain(adata_mnn, resolution = 1.1,key_added='louvain_BC', random_state=3)
expPlotFun(adata_mnn, group='Study', color='louvain_BC')

In [None]:
sc.pl.umap(adata, color=['Study'], legend_loc='right margin', legend_fontsize=15)

In [None]:
barplot_fun(adata_mnn,  'louvain_BC','Study', plot=True)

In [None]:
adata_mnn.raw.shape

In [None]:
sc.tl.rank_genes_groups(adata_mnn, 'louvain_BC', n_genes=adata_mnn.raw.X.shape[1], key_added='DE_cluster')

In [None]:
DElist = pd.DataFrame(adata.uns['DE_cluster']['names'])
DElist.to_csv('Endo_DEcluster_cutoff100.csv')

In [None]:
DElist.head(10)

In [None]:
pd.DataFrame(adata_mnn.uns['DE_cluster']['names']).head(50).to_csv('EndoCells_top50_MarkerGenes.csv')

In [None]:
sc.pl.umap(adata_mnn, color=['louvain_BC'], legend_loc='on data', legend_fontsize=15, color_map=cmap)

In [None]:
sc.pl.umap(adata_mnn, color=adata_mnn.uns['DE_cluster']['names'][0], legend_loc='on data', legend_fontsize=15, color_map=cmap)

In [None]:
sc.tl.paga(adata_mnn, groups='louvain_BC')

In [None]:
del adata_mnn.uns['louvain_BC_colors']

In [None]:
sc.pl.paga_compare(adata_mnn, basis='umap')
#sc.pl.paga_compare(adata, basis='umap', threshold=.2, arrowsize=10, edge_width_scale=.5,
                   dashed_edges='connectivities')

In [None]:
adata.write('./write/EndoCells_v2.h5ad')
adata_mnn.write('./write/EndoCells_v2_BC.h5ad')

## Test some genes

In [None]:
adata = sc.read('./write/EndoCells_v2_BC.h5ad')

In [None]:
sc.pl.umap(adata, color=['snai1b', 'twist1b', 'snai1a', 'snai2', 'zeb2b'], ncols=3, wspace=0.3, legend_loc='right margin', legend_fontsize=15, color_map=cmap)

In [None]:
len(adata_mnn.var_names)

In [None]:
adata.uns['iroot'] = 1971
sc.tl.dpt(adata)

In [None]:
cmap = LinearSegmentedColormap.from_list(name='gene_cmap', colors=['black', 'purple', 'red', 'orange', 'yellow']) 
sc.pl.umap(adata, color=['dpt_pseudotime'], ncols=3, wspace=0.3, legend_loc='right margin', legend_fontsize=15, color_map=cmap)

In [None]:
adata.obs['Study'].value_counts()

In [None]:
sc.pl.umap(adata, color=['myh11a', 'tagln'], color_map=cmap)

In [None]:
sc.pl.umap(adata, color=['pcna', 'il11ra','il13ra1'], color_map=cmap, vmax=2.5)

In [None]:
sc.pl.umap(adata, color='louvain_rd10')

In [None]:
sc.pl.umap(adata, color='DBpos')

# Cell cycling analysis

In [None]:
adata = sc.read('./write/EndoCells_v2_BC.h5ad')

In [None]:
import anndata

In [None]:
adata = anndata.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)

In [None]:
print(adata.shape)

In [None]:
cell_cycle_genes = [x.strip()[0].upper()+x.strip()[1:].lower() for x in open('/home/xw251/rds/rds-bg200-hphi-gottgens/users/xw251/Files/regev_lab_cell_cycle_genes.txt')]
print(len(cell_cycle_genes))

In [None]:
s_genes = cell_cycle_genes[:43]
g2m_genes = cell_cycle_genes[43:]
print(len(s_genes))
print(len(g2m_genes))

In [None]:
GN = pd.read_csv('./drerio_mmusculus.txt', delimiter='\t', index_col=0)

In [None]:
SGo = np.intersect1d(s_genes, [str(x) for x in GN['mmusculus_homolog_associated_gene_name']])
Stab = GN[np.in1d([str(x) for x in GN['mmusculus_homolog_associated_gene_name']], SGo)]
G2Mo = np.intersect1d(g2m_genes, [str(x) for x in GN['mmusculus_homolog_associated_gene_name']])
G2Mtab = GN[np.in1d([str(x) for x in GN['mmusculus_homolog_associated_gene_name']], G2Mo)]

In [None]:
sg_drerio = np.unique([str(x) for x in Stab['external_gene_name']])
g2mg_drerio = np.unique([str(x) for x in G2Mtab['external_gene_name']])
print(len(sg_drerio))
print(len(g2mg_drerio))

In [None]:
sg_drerio_ol = np.intersect1d(sg_drerio, adata.var_names)
g2mg_drerio_ol = np.intersect1d(g2mg_drerio, adata.var_names)
print(len(sg_drerio_ol))
print(len(g2mg_drerio_ol))

In [None]:
sc.pp.scale(adata)
sc.tl.score_genes_cell_cycle(adata, s_genes=sg_drerio, g2m_genes=g2mg_drerio, use_raw=False)

In [None]:
adata1 = sc.read('./write/EndoCells_v2_BC.h5ad')

In [None]:
adata1.obs['phase'] = adata.obs['phase']
adata1.obs['G2M_score'] = adata.obs['G2M_score']
adata1.obs['S_score'] = adata.obs['S_score']

In [None]:
sc.pl.umap(adata1, color='phase', save="_Endo_phase.pdf")

In [None]:
sc.pl.umap(adata1, color='louvain_rd10', save="_Endo_louvain_rd10.pdf")