In [None]:
import scanpy as sc

# Only needed for processing
import numpy as np
import pandas as pd
import os 
import seaborn as sns
import matplotlib.pyplot as plt
pd.set_option('display.max_rows', 500)
import liana as li
import anndata

sc.set_figure_params(dpi=300,dpi_save=300, format="svg",fontsize=10)

sns.set_style('white', rc={
    'xtick.bottom': True,
    'ytick.left': True,
})
plt.rcParams["font.family"] = "Arial"
output_fol = '/home/carsten/alvira_bioinformatics/uterus/data/pilot/241117_tabula_sapiens'
os.makedirs(output_fol,exist_ok=True)
sc.settings.figdir = output_fol


In [None]:
uterus_adata = sc.read('/home/carsten/alvira_bioinformatics/uterus/data/single_cell_files/scanpy_files/uterus_processed_celltyped.gz.h5ad')
ct_order=['Ciliated', 'Epithelial', 'Glandular', 'Lumenal',
         'Capillary','Lymphatic EC', 'Macrovascular',
          'Basophil', 'Dendritic', 'Myeloid','Proliferative myeloid','NK cell',  'T cell',
          'Matrix fibroblast','Vessel fibroblast','Trophoblast','Uterine smooth muscle', 'Vascular smooth muscle',
         ]
uterus_adata.obs['Cell Subtype'] = pd.Categorical(uterus_adata.obs['Cell Subtype'], categories=ct_order)
uterus_adata.obs['organ_tissue'] = 'Myometrium'


In [None]:
ts_adata = sc.read('/home/carsten/alvira_bioinformatics/uterus/data/outside_data/TS_stromal.h5ad')
ts_uterus_adata = ts_adata[ts_adata.obs['organ_tissue']=='Uterus']

In [None]:
ts_adata.obs['cell_ontology_class'].cat.categories

In [None]:
ts_vsm = ts_adata[ts_adata.obs['cell_ontology_class'].isin(['vascular associated smooth muscle cell'])]
uterus_vsm = uterus_adata[uterus_adata.obs['Cell Subtype']=='Vascular smooth muscle']
vsm_adata = ts_vsm.concatenate([uterus_vsm],batch_key='Dataset',batch_categories=['TS','Ansari2024'])

sc.tl.rank_genes_groups(vsm_adata,'Dataset',method='wilcoxon',pts=True)
sc.pl.rank_genes_groups_dotplot(vsm_adata,title='VSM comparison',save='dataset_vsm_comparison.svg')
sc.pl.dotplot(vsm_adata,['NOTCH3','EEF1A1','FAU','FTX','CHSY3'],groupby='organ_tissue',)
df = sc.get.rank_genes_groups_df(
                    vsm_adata, group="Ansari2024"
                )
df = df.rename({'pct_nz_group':'Ansari2024_%expressed',
                  'pct_nz_reference':'TabulaSapiens_%expressed'},axis=1)
df['%expressed_difference'] = df['Ansari2024_%expressed'] - df['TabulaSapiens_%expressed']
df.sort_values('%expressed_difference').to_csv(f'{output_fol}/dataset_degs_vsm_wilcoxon.csv')

In [None]:
ts_usm = ts_adata[ts_adata.obs['cell_ontology_class'].isin(['myometrial cell'])]
uterus_usm = uterus_adata[uterus_adata.obs['Cell Subtype']=='Uterine smooth muscle']
usm_adata = ts_usm.concatenate([uterus_usm],batch_key='Dataset',batch_categories=['TS','Ansari2024'])

sc.tl.rank_genes_groups(usm_adata,'Dataset',method='wilcoxon',pts=True)
sc.pl.rank_genes_groups_dotplot(usm_adata,title='usm comparison',save='dataset_usm_comparison.svg')
df = sc.get.rank_genes_groups_df(
                    usm_adata, group="Ansari2024"
                )
df = df.rename({'pct_nz_group':'Ansari2024_%expressed',
                  'pct_nz_reference':'TabulaSapiens_%expressed'},axis=1)
df['%expressed_difference'] = df['Ansari2024_%expressed'] - df['TabulaSapiens_%expressed']
df.sort_values('%expressed_difference').to_csv(f'{output_fol}/dataset_degs_usm_wilcoxon.csv')

In [None]:
sc.pl.dotplot(ts_adata,['ACTA2','TAGLN','OXTR','DPP6','LINGO2'],use_raw=False,groupby='cell_ontology_class',save='TS_usm_markers.svg')