# Single-cell RNA-seqs analysis using Python 

Adapted from:  
Single-cell best practices  
https://www.sc-best-practices.org/preamble.html

## Dimensionality reduction  
Needs conda env `sc_py_training`. 

In [1]:
import scanpy as sc

sc.settings.verbosity = 0
sc.settings.set_figure_params(
    dpi=80,
    facecolor="white",
    frameon=False,
)

In [None]:
##Load the data from the previous exercise (01)

In [2]:
adata = sc.read("E-MTAB-6945_feature_selection.h5ad")

In [3]:
adata

AnnData object with n_obs × n_vars = 5218 × 35682
    obs: 'age', 'cell_type', 'developmental_stage', 'disease', 'genotype', 'individual', 'organism_part', 'organism', 'sex', 'strain', 'genotype.1', 'age_ontology', 'cell_type_ontology', 'developmental_stage_ontology', 'disease_ontology', 'genotype_ontology', 'individual_ontology', 'organism_part_ontology', 'organism_ontology', 'sex_ontology', 'strain_ontology', 'genotype_ontology.1'
    var: 'gene_symbols', 'chromosome', 'start', 'end', 'width', 'source', 'type', 'score', 'phase', 'gene_version', 'gene_name', 'gene_source', 'gene_biotype', 'mito'

In [None]:
# which layer are we going to use for dimensionality reduction/PCA? 

In [None]:
adata.X = adata.layers["INSERT_LAYER"]

#### 5.1 PCA

In [None]:
# setting highly variable as highly deviant to use scanpy 'use_highly_variable' argument in sc.pp.pca
adata.var["highly_variable"] = adata.var["highly_deviant"]
sc.pp.pca(adata, svd_solver="arpack", use_highly_variable=True)

In [None]:
#Visualise PCA

In [None]:
sc.pl.pca_scatter(adata, color="total_counts")

#### 5.2 t-SNE

In [None]:
sc.tl.tsne(adata, use_rep="X_pca")

In [None]:
sc.pl.tsne(adata, color="total_counts")

#### 5.3 UMAP

In [None]:
sc.pp.neighbors(adata)
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color="total_counts")

#### 5.4 Inspecting quality control metrics 

In [None]:
sc.pl.umap(
    adata,
    color=["total_counts", "pct_counts_mito", "scDblFinder_score", "scDblFinder_class"],
)

In [None]:
# adata.write("E-MTAB-6945_dimensionality_reduction.h5ad")

In [None]:
adata

In [None]:
#Do you observe cells on the UMAP plot that should be removed ? 

### 6. Clustering

In [None]:
import scanpy as sc

sc.settings.verbosity = 0
sc.settings.set_figure_params(dpi=80, facecolor="white", frameon=False)

In [None]:
# del adata
adata = sc.read("E-MTAB-6945_log1p_normalization.h5ad")
adata

In [None]:
# Move X to another layer
adata.layers["counts_norm"] = adata.X

# Use the scran_normalization layer as the new main data layer, X
adata.X = adata.layers["scran_normalization"]
adata

In [None]:
sc.pp.neighbors(adata, n_pcs=30)
sc.tl.umap(adata)

In [None]:
sc.tl.leiden(adata)

In [None]:
sc.tl.leiden(adata, key_added="leiden_res0_25", resolution=0.25)
sc.tl.leiden(adata, key_added="leiden_res0_5", resolution=0.5)
sc.tl.leiden(adata, key_added="leiden_res1", resolution=1.0)
adata

In [None]:
sc.pl.umap(
    adata,
    color=["leiden_res0_25", "leiden_res0_5", "leiden_res1"],
    legend_loc="on data",
)

#### Question:  
1. Which Leiden resolution parameter gives a clustering that coincides best with the UMAP projections?