In [None]:
'''
Goal:Compare this dataset to 
A single-cell atlas of the myometrium in human parturition

Author:Carsten Knutsen
Date:231114
conda_env:uterus_sc
Notes: compare bulk DEGs in single cell
'''

In [None]:
import scanpy as sc

# Only needed for processing
import numpy as np
import pandas as pd
import os 
pd.set_option('display.max_rows', 500)

output = '/home/carsten/alvira_bioinformatics/uterus/data/pilot/231114_pique2022_comparison'
os.makedirs(output, exist_ok=True)
sc.settings.figdir = output

In [None]:
adata = sc.read('/home/carsten/alvira_bioinformatics/uterus/data/single_cell_files/scanpy_files/uterus_processed_celltyped.gz.h5ad')

### Figure 1

In [None]:
fig1_genes = {'SMC':['LMOD1','MYH11','MYLK','PDE5A'],
             'Stromal':['CYP4B1','SMOC2','HSD11B1','SFRP2'],
             'Endo':['CCL14','MALL','RBP5','SELP'],
             'LED':['ADGRG3','FLT4','RELN','TBX1'],
             'Macro':['GPR34','SIGLEC1','FCGBP','MMP9'],
             'Lymphoid':['CD27','CD8A','IFNG','SAMD3']}
# sc.pl.umap(adata,color = genes)
sc.pl.DotPlot(adata,fig1_genes,standard_scale='var',groupby='Cell Subtype').style(cmap='Reds').legend(width=2).add_totals().show()
sc.pl.umap(adata,color='Cell Subtype')
sc.pl.umap(adata,color='GroupContract')
sc.pl.umap(adata,color='Patient')

for key in fig1_genes:
    gene_ls = fig1_genes[key]
    sc.pl.umap(adata,color=gene_ls, cmap='viridis')

In [None]:
sc.tl.rank_genes_groups(adata,groupby='Cell Subtype', method='wilcoxon')
sc.pl.rank_genes_groups_dotplot(adata, n_genes=3, save='wilcoxon_markers.png')

In [None]:
adata.obs['Cell Subtype'].cat.categories

In [None]:
adata.obs['lineage_adjusted'] = [None if y == 'Endometrial cell' else x.capitalize() for x,y in zip(adata.obs['Lineage'].values, adata.obs['Cell Subtype'].values) ]
adata.obs['lineage_adjusted2'] = ['Endometrial' if y == 'Endometrial cell' else x.capitalize() for x,y in zip(adata.obs['Lineage'].values, adata.obs['Cell Subtype'].values) ]
adata.uns['lineage_adjusted_colors'] = ['#d62728', '#2ca02c', '#1f77b4']
adata.uns['lineage_adjusted2_colors'] = ['#ff7f0e', '#d62728', '#2ca02c', '#1f77b4']
sc.pl.umap(adata,color=['lineage_adjusted'],na_in_legend=False,title='Lineage', save='lineage_adjusted.png')
sc.pl.umap(adata,color=['lineage_adjusted2'],groups =['Endometrial'],na_in_legend=False,title='Endometrial tissue', save='lineage_adjusted2.png')


In [None]:
# sc.tl.rank_genes_groups(adata,groupby='lineage_adjusted2',key_added='lineage_markers', method='wilcoxon')
# sc.pl.rank_genes_groups_dotplot(adata, n_genes=3, key='lineage_markers', save='lineage_wilcoxon_markers.png')

In [None]:
adata.obs['Cell Subtype']

In [None]:
sc.pl.embedding(adata[adata.obs['Lineage']=='Mesenchymal'],
               basis = 'X_umap_Mesenchymal',
               color=['Cell Subtype'])
sc.pl.embedding(adata[adata.obs['Lineage']=='Mesenchymal'],
               basis = 'X_umap_Mesenchymal',
               color=['OXTR','ELANE','IFNG'])

In [None]:
### SMC subtype markers
sc.pl.dotplot(adata[adata.obs['Lineage']=='Mesenchymal'],
              ['OXTR','ELANE','IFNG'],
             groupby='Cell Subtype',)

sc.pl.dotplot(adata[adata.obs['Lineage']=='Mesenchymal'],
              {'SMC-1':['PGM5-AS1'],
              'SMC-2':['CSH1','TAC3'],
              'SMC-3':['LYSMD2','KLF13','DOCK10','CMTM7']},
             groupby='Cell Subtype',)

In [None]:
### Their top myofibroblast markers
sc.pl.dotplot(adata[adata.obs['Lineage']=='Mesenchymal'],
              ['ACTG2','DES','PCP4'],
             groupby='Cell Subtype',)

In [None]:
adata.obs['Cell Subtype'].value_counts(normalize=True)*100

In [None]:
sc.pl.dotplot(adata[adata.obs['Lineage']=='Mesenchymal'],
              ['MYH11','ACTA2','TUBB'],
             groupby='Cell Subtype',)

In [None]:
sc.pl.embedding(adata[adata.obs['Lineage']=='Mesenchymal'],
               basis = 'X_umap_Mesenchymal',
               color=['MYH11','ACTA2','TUBB'])

### Tabula Sapiens uterus

In [None]:
ts_adata = sc.read('/home/carsten/alvira_bioinformatics/uterus/data/single_cell_files/external_datasets/TS_Uterus.h5ad')

In [None]:
ts_adata.x = ts_adata.layers['decontXcounts'].copy()
sc.pp.normalize_total(ts_adata, target_sum=1e4)
sc.pp.log1p(ts_adata,base=10)

In [None]:
sc.pl.umap(ts_adata,color='free_annotation')

In [None]:
sc.pl.umap(ts_adata[ts_adata.obs['anatomical_information']=='Myometrium'],color='PAEP',use_raw=False)

In [None]:
sc.pl.umap(ts_adata,color=['PAEP','RMST','ZPLD1'],use_raw=False)

In [None]:
sc.pl.umap(ts_adata,color=['PAEP','KRT8','CDH1','PTPRR'],use_raw=False)

In [None]:
ts_adata.obs['anatomical_information'].value_counts()

In [None]:
ts_adata.obs['donor'].value_counts()

In [None]:
endo_ts = ts_adata[ts_adata.obs['anatomical_information']=='Endometrium']