In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
import infercnvpy as cnv
import os
from anndata import AnnData
import matplotlib.pyplot as plt

In [None]:
cancer='ovarian/'
root='/cluster/dataset/boeva/scRNAdata/'
annotationsPath=root+'annotations/'+cancer
samplesPath=root+'preprocessed/'+cancer+'2022-01-03_10-58-47/samples/'

In [None]:
sampleIds=['OMAJORO','ONWLE']
sampleId=sampleIds[1]
annotations=os.listdir(annotationsPath)
annotations.remove('cc_genes_2.csv')

In [None]:
adata=sc.read_h5ad(samplesPath+sampleId+'_adata.h5ad')

In [None]:
def embeddings_counts(adata: AnnData) -> None:
    sc.tl.pca(adata, n_comps=np.min([200, np.min(adata.shape) - 1]))
    sc.pp.neighbors(adata)
    sc.tl.umap(adata)

In [None]:
def embeddings_cnv(adata: AnnData) -> None:
    cnv.tl.pca(adata, n_comps=np.min([200, np.min(adata.shape) - 1]))
    cnv.pp.neighbors(adata)
    cnv.tl.umap(adata)
    cnv.tl.leiden(adata)

In [None]:
def get_scores(adata, annotaions):
    for name in annotations:
        gene_list=pd.read_csv(annotationsPath+name).dropna()
        name=name.split('.')[0]
        if name=='cc_genes_2':
            continue
        sc.tl.score_genes(adata, np.array(gene_list[name]), score_name=name)

In [None]:
def plot_scores_cnv(adata, annotations):
    nrow = 3;
    ncol = 4;
    fig, axs = plt.subplots(nrows = nrow, ncols = ncol, figsize=(30, 21), gridspec_kw=dict(wspace=0.4), dpi=200)

    c=0
    for ax in axs.reshape(-1):
        cnv.pl.umap(adata, color=annotations[c].split('.')[0], ax=ax, show=False, title=annotations[c].split('.')[0]+'_cnv')
        c=c+1

In [None]:
def plot_scores_(adata, annotations):
    nrow = 3;
    ncol = 4;
    fig, axs = plt.subplots(nrows = nrow, ncols = ncol, figsize=(30, 21), gridspec_kw=dict(wspace=0.4), dpi=200)

    c=0
    for ax in axs.reshape(-1):
        sc.pl.umap(adata, color=annotations[c].split('.')[0], ax=ax, show=False)
        c=c+1

In [None]:
adata.raw=adata
sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
sc.pp.log1p(adata)
sc.pp.scale(adata)

In [None]:
embeddings_cnv(adata)
embeddings_counts(adata)

In [None]:
get_scores(adata, annotations)

In [None]:
# No embedding

fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(4.5, 5), gridspec_kw=dict(wspace=0.4), dpi=200)
sc.pl.umap(adata, color='celltype', ax=ax, show=False)
plot_scores_(adata, annotations)

In [None]:
# CNV embedding

fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(4.5, 5), gridspec_kw=dict(wspace=0.4), dpi=200)
cnv.pl.umap(adata, color='celltype', ax=ax, show=False)
plot_scores_cnv(adata, annotations)