In [None]:
import pandas as pd
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
from maayanlab_bioinformatics.normalization import zscore_normalize, log2_normalize
from scipy.stats import zscore
import qnorm

In [None]:
def make_umap(rna_df, n_neighbors=15, min_dist=0.01, resolution=1):   
    adata = sc.AnnData(log2_normalize(qnorm.quantile_normalize(rna_df)).T.values, dtype="float")
    adata.var['gene_names'] = rna_df.index.values
    adata.obs['samples'] = rna_df.columns.values
    # Sort genes by variance
    adata.var['var_rank'] = (-np.var(adata.X, axis=0, dtype="float")).argsort()
    adata = adata[:, adata.var.var_rank < 5000]

    # UMAP
    sc.pp.pca(adata)
    sc.pp.neighbors(adata, n_neighbors=n_neighbors)  # create neighborhood graph
    sc.tl.umap(adata, min_dist=min_dist, alpha=0.1)  # embed umap based on neighborhood graph
    sc.tl.leiden(adata, resolution=resolution)  # clustering
    clusters = adata.obs['leiden'].unique()
    cmap = plt.cm.get_cmap('tab20b', len(clusters))
    for i, cluster in enumerate(sorted(clusters)):
        mask = adata.obs['leiden'] == cluster
        ax = plt.scatter(x=adata.obsm['X_umap'][mask, 0], y=adata.obsm['X_umap'][mask, 1], 
                    color=cmap(i), label=f'Cluster {cluster}', s=10)
    plt.legend(title='Leiden clusters', bbox_to_anchor=(1.05, 1), loc='best')
    ax.axes.set_xticks([])
    ax.axes.set_yticks([])
    plt.xlabel('UMAP-1')
    plt.ylabel('UMAP-2')

In [None]:
ct = 'BR'

rna_df = pd.read_csv(f'out/{ct}_tumor_counts.tsv', index_col=0, sep='\t')
make_umap(rna_df, n_neighbors=10, min_dist=0.01, resolution=.8)

In [None]:
ct = 'CCRCC'

rna_df = pd.read_csv(f'out/{ct}_tumor_counts.tsv', index_col=0, sep='\t')
make_umap(rna_df, n_neighbors=5, min_dist=0.01, resolution=.5)

In [None]:
ct = 'CO'

rna_df = pd.read_csv(f'out/{ct}_tumor_counts.tsv', index_col=0, sep='\t')
make_umap(rna_df, n_neighbors=3, min_dist=0.01, resolution=.6)

In [None]:
ct = 'GBM'

rna_df = pd.read_csv(f'out/{ct}_tumor_counts.tsv', index_col=0, sep='\t')
make_umap(rna_df, n_neighbors=3, min_dist=0.01, resolution=.7)

In [None]:
ct = 'HNSCC'

rna_df = pd.read_csv(f'out/{ct}_tumor_counts.tsv', index_col=0, sep='\t')
make_umap(rna_df, n_neighbors=3, min_dist=0.01, resolution=.5)

In [None]:
ct = 'LSCC'

rna_df = pd.read_csv(f'out/{ct}_tumor_counts.tsv', index_col=0, sep='\t')
make_umap(rna_df, n_neighbors=3, min_dist=0.01, resolution=.5)

In [None]:
ct = 'LUAD'

rna_df = pd.read_csv(f'out/{ct}_tumor_counts.tsv', index_col=0, sep='\t')
make_umap(rna_df, n_neighbors=3, min_dist=0.01, resolution=.5)

In [None]:
ct = 'OV'

rna_df = pd.read_csv(f'out/{ct}_tumor_counts.tsv', index_col=0, sep='\t')
make_umap(rna_df, n_neighbors=3, min_dist=0.01, resolution=.8)

In [None]:
ct = 'PDAC'

rna_df = pd.read_csv(f'out/{ct}_tumor_counts.tsv', index_col=0, sep='\t')
make_umap(rna_df, n_neighbors=3, min_dist=0.01, resolution=.5)

In [None]:
ct = 'UCEC'

rna_df = pd.read_csv(f'out/{ct}_tumor_counts.tsv', index_col=0, sep='\t')
make_umap(rna_df, n_neighbors=3, min_dist=0.01, resolution=.5)