# Notebook to check the Quality of data after filtration

**Created by :** Srivalli Kolla

**Created on :** 22 April, 2025

**Modified on :** 22 April, 2025

**University of Würzburg**

Env : scanpy (Python 3.12.2)

# Importing Packages

In [None]:
import bbknn
import anndata
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import datetime
import os
from pywaffle import Waffle
import matplotlib.pyplot as plt

In [None]:
sc.settings.verbosity = 3
sc.logging.print_versions()

plt.rcParams['figure.dpi'] = 300  
plt.rcParams['savefig.dpi'] = 300

timestamp = datetime.datetime.now().strftime("%d_%m_%y")

# Import Data

In [None]:
adata = sc.read_h5ad('../data/acm_raw_basic_qc_filtered_22_04_25.h5ad')
adata

## Check if data is raw or Normalized

In [4]:
def X_is_raw(adata):
    return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis=0))

In [5]:
adata.X = adata.layers['raw_counts']

In [None]:
print(X_is_raw(adata))

# Data Normalization

In [None]:
sc.pp.normalize_total(adata, target_sum=1e6)
sc.pp.log1p(adata)

In [None]:
print(X_is_raw(adata))

In [9]:
adata.layers['cpm_normalization'] = adata.X.copy()

In [None]:
adata

# Data visualization

In [None]:
sc.pp.pca(adata, n_comps = 50,svd_solver = 'arpack')
sc.pp.neighbors(adata, use_rep = "X_pca", n_neighbors = 150, metric = 'minkowski')

In [None]:
sc.tl.umap(adata)
sc.pl.umap(adata, color = ['sample','Sample_ID','Sex', 'Genotype', 'Treatment', 'Condition', 'total_counts','n_genes_by_counts', 'doublet_scores', 'predicted_doublets', 'pct_counts_mt', 'pct_counts_ribo', 'percent_chrY', 'XIST-percentage', 'gender_check_cov', 'phase'], layer= 'cpm_normalization',frameon = False, cmap= 'RdYlBu_r')

In [None]:
sc.pl.umap(adata,color= ['Ttn','Myh6','Dcn','Col1a1','Pecam1','Cdh5','Myh11','Acta2'],frameon = False , cmap= 'RdYlBu_r',layer = 'cpm_normalization')

# Batch correction - Sample level

In [None]:
bbknn_donor = bbknn.bbknn(adata,batch_key = 'sample', neighbors_within_batch = 4, approx = True,  copy = True)
bbknn_donor

## Data visualization after batch correction

In [None]:
sc.tl.umap(bbknn_donor)
sc.pl.umap(bbknn_donor, color = ['sample','Sample_ID','Sex', 'Genotype', 'Treatment', 'Condition', 'total_counts','n_genes_by_counts', 'doublet_scores', 'predicted_doublets', 'pct_counts_mt', 'pct_counts_ribo', 'percent_chrY', 'XIST-percentage', 'gender_check_cov', 'phase'], layer= 'cpm_normalization',frameon = False, cmap= 'RdYlBu_r')

In [None]:
sc.pl.umap(bbknn_donor,color= ['Ttn','Myh6','Dcn','Col1a1','Pecam1','Cdh5','Myh11','Acta2'],frameon = False,cmap= 'RdYlBu_r',layer = 'cpm_normalization')

# Batch correction - Treatment level

In [None]:
bbknn_donor2 = bbknn.bbknn(adata,batch_key = 'Treatment', neighbors_within_batch = 4, approx = True,  copy = True)
bbknn_donor2

## Data visualization after batch correction

In [None]:
sc.tl.umap(bbknn_donor2)
sc.pl.umap(bbknn_donor2, color = ['sample','Sample_ID','Sex', 'Genotype', 'Treatment', 'Condition', 'total_counts','n_genes_by_counts', 'doublet_scores', 'predicted_doublets', 'pct_counts_mt', 'pct_counts_ribo', 'percent_chrY', 'XIST-percentage', 'gender_check_cov', 'phase'], layer= 'cpm_normalization',frameon = False, cmap= 'RdYlBu_r')

In [None]:
sc.pl.umap(bbknn_donor2,color= ['Ttn','Myh6','Dcn','Col1a1','Pecam1','Cdh5','Myh11','Acta2'],frameon = False,cmap= 'RdYlBu_r',layer = 'cpm_normalization')