In [None]:
'''
Goal:Create figures for initial rnascope experiments

Author:Carsten Knutsen
Date:240117
conda_env:uterus_sc
Notes: Create figures for initial rnascope 
'''

In [None]:
import scanpy as sc

# Only needed for processing
import numpy as np
import pandas as pd
import os 
import scanpy.external as sce
pd.set_option('display.max_rows', 500)
import liana as li

output = '/home/carsten/alvira_bioinformatics/uterus/data/pilot/240117_rnascope'
os.makedirs(output, exist_ok=True)
sc.set_figure_params(dpi=300, format="png")
sc.settings.figdir = output


In [None]:
adata = sc.read('/home/carsten/alvira_bioinformatics/uterus/data/single_cell_files/scanpy_files/uterus_processed_celltyped.gz.h5ad')
adata.raw = adata


In [None]:
def produce_rnascope_graphs(adata,title,obs,groups,genes,output):
    os.makedirs(output, exist_ok=True)
    sc.settings.figdir = output
    adata_cts = adata[adata.obs[obs].isin(groups)]
    sc.pl.umap(adata,color=genes,save=f'_{title}_all')
    sc.pl.dotplot(adata,genes,groupby=[obs],save=f'_{title}_all.png')
    sc.pl.dotplot(adata_cts,genes,groupby=[obs],save=f'_{title}_specifc_celltypes.png')

In [None]:
experiments = {'Smooth muscle':{'celltypes': ['Uterine smooth muscle','Vascular smooth muscle'],
                               'genes':['ACTA2','DPP6','NOTCH3',]},
              'Endothelial':{'celltypes': ['Macrovasular','Capillary','Lymphatic EC'],
                               'genes':['NOSTRIN','GJA5','PROX1']},
              'Fibroblast':{'celltypes': ['Matrix fibroblast','Vessel fibroblast'],
                               'genes':['PDGFRA','RORB','CCDC80']},
              }


In [None]:
for experiment in experiments.keys():
    figs = f'{output}/{experiment}'
    cts = experiments[experiment]['celltypes']
    genes = experiments[experiment]['genes']
    produce_rnascope_graphs(adata,experiment,'Cell Subtype',cts,genes,figs)

In [None]:
output = '/home/carsten/alvira_bioinformatics/uterus/data/pilot/240117_rnascope/Macrovascular_subcluster'
os.makedirs(output, exist_ok=True)
sc.set_figure_params(dpi=300, format="png")
sc.settings.figdir = output

lin_adata = adata[adata.obs['Cell Subtype']=='Macrovascular']
sc.pp.highly_variable_genes(lin_adata,
                            batch_key="Patient"
                            )

sc.pp.pca(lin_adata, use_highly_variable=True)
sce.pp.harmony_integrate(lin_adata, key="Patient", max_iter_harmony=50)
sc.pp.neighbors(lin_adata, use_rep='X_pca_harmony')
sc.tl.leiden(
    lin_adata,
    key_added=f"leiden_Macrovascular",
)
sc.tl.umap(lin_adata,min_dist=0.1)
sc.tl.rank_genes_groups(lin_adata, f"leiden_Macrovascular", method="wilcoxon")
print(lin_adata.obs[f"leiden_Macrovascular"].cat.categories)
sc.pl.rank_genes_groups_dotplot(
    lin_adata,
    groupby=f"leiden_Macrovascular",
    n_genes=int(150 / len(lin_adata.obs[f"leiden_Macrovascular"].unique())),
    show=False,
    save=f"Macrovascular_leiden_markers.png",
)
for color in ['GJA5','VEGFC','SLC6A2','EBF1','COL8A1','leiden_Macrovascular','Patient','GroupContract','Cell Subtype']:
    sc.pl.umap(lin_adata, color = color, save=color)

In [None]:
with pd.ExcelWriter(
        f"{output}/art_leiden_markers.xlsx", engine="xlsxwriter"
) as writer:
    for ct in lin_adata.obs[f"leiden_Macrovascular"].unique():
        df = sc.get.rank_genes_groups_df(
            lin_adata, key="rank_genes_groups", group=ct
        )
        df.to_excel(writer, sheet_name=f"{ct} v rest"[:31])

In [None]:
adata

In [None]:
output = '/home/carsten/alvira_bioinformatics/uterus/data/pilot/240117_rnascope/usm_subcluster'
os.makedirs(output, exist_ok=True)
sc.set_figure_params(dpi=300, format="png")
sc.settings.figdir = output
lin_adata = adata[adata.obs['Cell Subtype']=='Uterine smooth muscle']
sc.pp.highly_variable_genes(lin_adata,
                            batch_key="Patient"
                            )

sc.pp.pca(lin_adata, use_highly_variable=True)
sce.pp.harmony_integrate(lin_adata, key="Patient", max_iter_harmony=50)
sc.pp.neighbors(lin_adata, use_rep='X_pca_harmony')
sc.tl.leiden(
    lin_adata,
    key_added=f"leiden_usm",
)
sc.tl.umap(lin_adata,min_dist=0.1)
sc.tl.rank_genes_groups(lin_adata, f"leiden_usm", method="wilcoxon")
print(lin_adata.obs[f"leiden_usm"].cat.categories)
sc.pl.rank_genes_groups_dotplot(
    lin_adata,
    groupby=f"leiden_usm",
    n_genes=int(50 / len(lin_adata.obs[f"leiden_usm"].unique())),
    show=False,
    save=f"usm_leiden_markers.png",
)

for color in ['MKI67','leiden_usm','Patient','GroupContract','Cell Subtype']:
    sc.pl.umap(lin_adata, color = color, save=color)

In [None]:
lin_adata.obs['leiden_usm'].value_counts()

In [None]:
lin_adata.obs.groupby('leiden_usm')['Patient'].value_counts()

In [None]:
sc.pl.umap(lin_adata, color ='ATAD2')

In [None]:
sc.pl.umap(lin_adata, color ='MKI67')

In [None]:
with pd.ExcelWriter(
        f"{output}/usm_leiden_markers.xlsx", engine="xlsxwriter"
) as writer:
    for ct in lin_adata.obs[f"leiden_usm"].unique():
        df = sc.get.rank_genes_groups_df(
            lin_adata, key="rank_genes_groups", group=ct
        )
        df.to_excel(writer, sheet_name=f"{ct} v rest"[:31])

In [None]:
output = '/home/carsten/alvira_bioinformatics/uterus/data/pilot/240117_rnascope/vsm_subcluster'
os.makedirs(output, exist_ok=True)
sc.set_figure_params(dpi=300, format="png")
sc.settings.figdir = output

lin_adata = adata[adata.obs['Cell Subtype']=='Vascular smooth muscle']
sc.pp.highly_variable_genes(lin_adata,
                            batch_key="Patient"
                            )

sc.pp.pca(lin_adata, use_highly_variable=True)
sce.pp.harmony_integrate(lin_adata, key="Patient", max_iter_harmony=50)
sc.pp.neighbors(lin_adata, use_rep='X_pca_harmony')
sc.tl.leiden(
    lin_adata,
    key_added=f"leiden_vsm",
)
sc.tl.umap(lin_adata,min_dist=0.1)
sc.tl.rank_genes_groups(lin_adata, f"leiden_vsm", method="wilcoxon")
print(lin_adata.obs[f"leiden_vsm"].cat.categories)
sc.pl.rank_genes_groups_dotplot(
    lin_adata,
    groupby=f"leiden_vsm",
    n_genes=int(50 / len(lin_adata.obs[f"leiden_vsm"].unique())),
    show=False,
    save=f"vsm_leiden_markers.png",
)

for color in ['MKI67','RGS6','ANGPT4','RYR2','RGS6','leiden_vsm','Patient','GroupContract','Cell Subtype']:
    sc.pl.umap(lin_adata, color = color, save=color)

In [None]:
with pd.ExcelWriter(
        f"{output}/vsm_leiden_markers.xlsx", engine="xlsxwriter"
) as writer:
    for ct in lin_adata.obs[f"leiden_vsm"].unique():
        df = sc.get.rank_genes_groups_df(
            lin_adata, key="rank_genes_groups", group=ct
        )
        df.to_excel(writer, sheet_name=f"{ct} v rest"[:31])

In [None]:
sc.pl.embedding(adata[adata.obs['Lineage']=='Mesenchymal'],
               basis = 'X_umap_Mesenchymal',
               color='RGS6')

In [None]:
sc.pl.embedding(adata[adata.obs['Lineage']=='Mesenchymal'],
               basis = 'X_umap_Mesenchymal',
               color='MKI67')

In [None]:
sc.pl.embedding(adata[adata.obs['Lineage']=='Mesenchymal'],
               basis = 'X_umap_Mesenchymal',
               color=['FRMD3','GRIK2'])

In [None]:
genes  = ['RGS6',
'ZFHX3',
'AKAP6',
'FRMD4A',
'RCAN2',
'LTBP1',
         'RYR2',
         'ANGPT4']

sc.pl.embedding(adata[adata.obs['Lineage']=='Mesenchymal'],
               basis = 'X_umap_Mesenchymal',
               color=genes)

In [None]:
sc.pl.embedding(adata[adata.obs['Lineage']=='Mesenchymal'],
               basis = 'X_umap_Mesenchymal',
               color=['ANGPT4'])