In [None]:
import scanpy as sc
import muon as mu
import numpy as np
import pandas as pd
import scanpy.external as sce
sc.set_figure_params(dpi=300,dpi_save=300)

In [None]:
adata = sc.read('/home/carsten/alvira_bioinformatics/postnatal_lung_multiome/data/single_cell_files/multiome_gex_processed_cell_typed_raw.gz.h5ad')
sc.pp.normalize_total(adata)
sc.pp.log1p(adata,base=10)

In [None]:
adata.obs['treatment'].value_counts()

In [None]:
adata.obs['celltype'].cat.categories

In [None]:
adata[adata.obs['celltype']=='nor-2'].obs['celltype'].value_counts()

In [None]:
pd.set_option('display.max_rows', None)
adata.obs.groupby('treatment')['celltype'].value_counts()

In [None]:
prolif_ec = adata[adata.obs['celltype']=='Proliferating EC'].copy()
sc.pp.highly_variable_genes(prolif_ec,batch_key='mouse')
sc.pp.pca(prolif_ec)
sce.pp.harmony_integrate(prolif_ec,key='mouse',max_iter_harmony=20)
sc.pp.neighbors(prolif_ec,use_rep='X_pca_harmony')
sc.tl.umap(prolif_ec)
sc.tl.leiden(prolif_ec)
sc.pl.umap(prolif_ec, color=['Kit','Car4','Gja5','Car8','Slc6a2','Ccl21a','leiden', 'mouse'])

In [None]:
prolif_ec.obs.groupby('leiden')['mouse'].value_counts()

In [None]:
sc.tl.rank_genes_groups(prolif_ec, "leiden", method="wilcoxon")
sc.pl.rank_genes_groups_dotplot(
    prolif_ec,
    groupby="leiden",
    dendrogram=False,
    n_genes=int(50 / len(prolif_ec.obs["leiden"].unique())),
)
sc.pl.dotplot(prolif_ec,['Car8','Slc6a2'], groupby='leiden')
prolif_ec.obs['leiden'].value_counts()

In [None]:
vec = adata[adata.obs['celltype']=='Venous EC'].copy()
sc.pp.highly_variable_genes(vec,batch_key='mouse')
sc.pp.pca(vec)
sce.pp.harmony_integrate(vec,key='mouse')
sc.pp.neighbors(vec,use_rep='X_pca_harmony')
sc.tl.umap(vec)
sc.tl.leiden(vec)
sc.pl.umap(vec, color=['Mki67','Car8','Slc6a2','leiden', 'mouse'])
sc.tl.rank_genes_groups(vec, "leiden", method="wilcoxon")
sc.pl.rank_genes_groups_dotplot(
    vec,
    groupby="leiden",
    dendrogram=False,
    n_genes=int(50 / len(vec.obs["leiden"].unique())),
)
sc.pl.dotplot(vec,['Car8','Slc6a2','Mki67'], groupby='leiden')
vec.obs['leiden'].value_counts()

In [None]:
mes = adata[adata.obs['lineage']=='mesenchymal']
sc.pp.highly_variable_genes(mes,batch_key='mouse')
sc.pp.pca(mes)
sce.pp.harmony_integrate(mes,key='mouse')
sc.pp.neighbors(mes,use_rep='X_pca_harmony')
sc.tl.umap(mes)
sc.pl.umap(mes,color = 'celltype')
sc.pl.umap(mes,color='treatment')


In [None]:
end = adata[adata.obs['lineage']=='endothelial']
sc.pp.highly_variable_genes(end,batch_key='mouse')
sc.pp.pca(end)
sce.pp.harmony_integrate(end,key='mouse')
sc.pp.neighbors(end,use_rep='X_pca_harmony')
sc.tl.umap(end)
sc.pl.umap(end,color = 'celltype')
sc.pl.umap(end,color='treatment')

In [None]:
sc.pl.umap(end, color =['Car8','Mki67'])

In [None]:
epi = adata[adata.obs['lineage']=='epithelial']
sc.pp.highly_variable_genes(epi,batch_key='mouse')
sc.pp.pca(epi)
sce.pp.harmony_integrate(epi,key='mouse')
sc.pp.neighbors(epi,use_rep='X_pca_harmony')
sc.tl.umap(epi)
sc.pl.umap(epi,color = 'celltype')
sc.pl.umap(epi,color='treatment')

In [None]:
adata.obs.groupby('mouse')['lineage'].value_counts(normalize=True)

In [None]:
imm = adata[adata.obs['lineage']=='immune']
sc.pp.highly_variable_genes(imm,batch_key='mouse')
sc.pp.pca(imm)
sce.pp.harmony_integrate(imm,key='mouse',max_iter_harmony=20)
sc.pp.neighbors(imm,use_rep='X_pca_harmony')
sc.tl.umap(imm)
sc.tl.leiden(imm)
sc.pl.umap(imm,color = 'celltype')
sc.pl.umap(imm,color='treatment')

In [None]:
sc.pl.umap(imm, color='leiden')

In [None]:
sc.tl.rank_genes_groups(imm, "leiden", method="wilcoxon")
sc.pl.rank_genes_groups_dotplot(
    imm,
    groupby="leiden",
    dendrogram=False,
    n_genes=int(50 / len(vec.obs["leiden"].unique())),
)
imm.obs['leiden'].value_counts()

In [None]:
sc.pl.embedding(adata[adata.obs['lineage']=='endothelial'],
               basis='X_umap_endothelial',
               color=['Mki67','Car8','celltype'])

In [None]:
sc.pl.dotplot(adata[adata.obs['celltype']=='Alveolar fibroblast'],
              ['Acta1'],
              groupby=['celltype','treatment']
             )

In [None]:
pd.set_option('display.max_rows', None)

adata.obs.groupby('celltype')['mouse'].value_counts()

In [None]:
sc.pl.umap(adata, color ='mouse')

In [None]:
sc.pl.umap(adata, color ='doublet_score')

In [None]:
sc.pl.umap(adata, color =['Epcam','Col1a1','Cdh5','Ptprc'])

In [None]:
adata.obs['predicted_doublet'] = adata.obs['predicted_doublet'].astype('str')
sc.pl.umap(adata, color ='predicted_doublet')

In [None]:
adata.obs['predicted_doublet'].value_counts(normalize=True)

In [None]:
adata.obs['mouse'].value_counts(normalize=True)