# 05 - Downstream Integration

In [1]:
from libraries import *
from parameters import *

In [2]:
%load_ext rpy2.ipython

In [3]:
os.getcwd()
os.chdir(projectDir)

In [None]:
adata = sc.read(par_save_filename_3)

## Normalization and log transform

In [None]:
adata.layers['counts'] = adata.X.copy()

sc.pp.normalize_total(adata, target_sum=par_preprocessing_target_sum)
sc.pp.log1p(adata)
adata.raw = adata

In [None]:
sc.pp.highly_variable_genes(adata, n_top_genes=par_downstream_n_top_genes)

In [None]:
sc.pp.scale(adata, max_value=10)

In [None]:
n_pcs = min(min(adata.n_obs, adata.n_vars)-1, par_downstream_n_pcs)
n_pcs

## PCA, UMAP embedding and leiden clustering

In [None]:
sc.pp.pca(adata, n_comps=n_pcs, svd_solver='arpack')

In [None]:
sc.pp.neighbors(adata, n_neighbors=par_downstream_n_neighbors, metric=par_downstream_neighbor_metric)

In [None]:
sc.tl.umap(adata)

In [None]:
sc.tl.leiden(adata, resolution=par_leiden_clustering_resolution)

In [None]:
sc.tl.diffmap(adata)

In [None]:
umap_point_size = np.maximum(120000/adata.n_obs, 2)

In [None]:
f, ax = plt.subplots(figsize=(4, 4))
sc.pl.umap(adata, color='leiden', 
           legend_loc='on data', 
           legend_fontoutline=3, 
           legend_fontsize=14, 
           legend_fontweight='normal', 
           title='Clusters', 
           ax=ax, 
           show=False, 
           size=0.3);

In [None]:
sc.tl.dendrogram(adata, groupby='leiden')

In [None]:
sc.pl.dendrogram(adata, groupby='leiden')

## Identify and plot cluster marker genes

In [None]:
sc.tl.rank_genes_groups(adata, groupby="leiden", n_genes=2000, method="t-test_overestim_var")

In [None]:
markerGenes = pd.DataFrame(adata.uns['rank_genes_groups']['names'])
markerGenes = markerGenes.iloc[0:10,:]
markerGenes = np.unique(markerGenes.values.flatten())

In [None]:
sc.pl.matrixplot(adata, markerGenes, groupby='leiden', dendrogram=True,
                      use_raw=False, vmin=-3, vmax=3,cmap='bwr',  swap_axes=True, figsize=(10,14))

In [None]:
sc.pl.heatmap(adata, markerGenes, 
              groupby='leiden', 
              figsize=(20, 10),
              use_raw=False,
              vmin=-3, 
              vmax=3,
              cmap='bwr', 
              show_gene_labels=True, 
              dendrogram=True, 
              swap_axes=False)

In [None]:
sc.pl.heatmap(adata,markerGenes, 
              groupby='leiden', 
              show_gene_labels=True, 
              swap_axes=True)

In [None]:
sc.pl.dotplot(adata, markerGenes, groupby='leiden')

In [None]:
sc.pl.rank_genes_groups_matrixplot(adata, 
                                   n_genes=10, 
                                   standard_scale='var', 
                                   cmap='Blues')

## Scoring cell cycle signatures

In [None]:
gene_list_url = 'https://raw.githubusercontent.com/theislab/scanpy_usage/master/180209_cell_cycle/data/regev_lab_cell_cycle_genes.txt'

cell_cycle_genes = [str(x.strip(), 'utf-8').capitalize() for x in urlopen(gene_list_url)] # capitalize = shame


s_genes = cell_cycle_genes[:43]
g2m_genes = cell_cycle_genes[43:]


sc.tl.score_genes_cell_cycle(adata, s_genes=s_genes, g2m_genes=g2m_genes)

In [None]:
sc.pl.umap(adata, color='S_score', vmax=0.5, cmap='Reds')

In [None]:
sc.pl.umap(adata, color='G2M_score', vmax=0.5, cmap='Reds')

In [None]:
sc.pl.umap(adata, color='phase', vmax=0.5, cmap='Reds')

## Percent mito UMI

In [None]:
f, ax = plt.subplots(1, 4, figsize=(20, 4))
sc.pl.scatter(adata,
              x='n_umis',
              y='n_genes',
              color='mt_frac',
              ax=ax[0],
              show=False,
              right_margin=2.85,
              title='Percent mitochondrial UMIs')
ax[0].set_xscale('log')
ax[0].set_yscale('log')

sc.pl.scatter(adata, x='n_umis', y='mt_frac', ax=ax[1], show=False)
ax[1].set_xscale('log')
plt.subplots_adjust(wspace=0.5)

sc.pl.violin(adata, keys='log10_n_umis', groupby='sample_name', rotation=90, ax=ax[2], show=False)
sc.pl.violin(adata, keys='log10_n_umis', groupby='sample_name', rotation=90, ax=ax[3], show=False)

In [None]:
adata.write(par_save_filename_4)

## Save the single and multiple KO cells as two separate anndata objects

In [5]:
fBarMat = adata.obs[adata.uns['feature_barcode_names']]
fBarMat[fBarMat>0] = 1

In [10]:
adata.obs = adata.obs.assign(KONo = fBarMat.sum(axis=1).to_numpy()) 
adata.obs = adata.obs.assign(KOType = "SingleKO")
adata.obs.loc[adata.obs.KONo > 1,'KOType'] = "MultipleKO"

In [11]:
adata.obs["KOType"].value_counts()

SingleKO      341664
MultipleKO    177871
Name: KOType, dtype: int64

In [12]:
for elem in fBarMat.columns.to_list():
     adata.obs.loc[adata.obs[elem] > 0,elem] = 1

In [13]:
adataSingle = adata[adata.obs.KOType == "SingleKO", :]
adataMultiple = adata[adata.obs.KOType == "MultipleKO", :]

In [None]:
adataSingle.write(par_save_filename_5)

In [None]:
adataMultiple.write(par_save_filename_6)