In [None]:
import scanpy as sc
import rpy2
import anndata2ri
import logging
import os

import rpy2.rinterface_lib.callbacks as rcb
import rpy2.robjects as ro
rcb.logger.setLevel(logging.ERROR)
ro.pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython


In [None]:
# used environment r_cyno

In [None]:
base_package_version_path = os.path.join("D:\\","monkey_IZI","analysis","package_versions")
pre = 'M05'

#### Sample Cyno1_6hr_S3

In [None]:
ident = 'Cyno1_6hr_S3'
path_filtered_counts = os.path.join("D:\\","monkey_IZI","nextflow","output_dir","cyno","cellranger","count",ident,"outs","filtered_feature_bc_matrix.h5")
path_raw_counts = os.path.join("D:\\","monkey_IZI","nextflow","output_dir","cyno","cellranger","count",ident,"outs","raw_feature_bc_matrix.h5")

adata_filtered_bc = sc.read_10x_h5(path_filtered_counts)
adata_filtered_bc.var_names_make_unique()
adata_raw_bc = sc.read_10x_h5(path_raw_counts)
adata_raw_bc.var_names_make_unique()

adata_pp = adata_filtered_bc.copy()
sc.pp.normalize_per_cell(adata_pp)
sc.pp.log1p(adata_pp)

sc.pp.pca(adata_pp)
sc.pp.neighbors(adata_pp)
sc.tl.leiden(adata_pp, key_added="soupx_groups")

# Preprocess variables for SoupX
soupx_groups = adata_pp.obs["soupx_groups"]

del adata_pp

cells = adata_filtered_bc.obs_names
genes = adata_filtered_bc.var_names
data = adata_filtered_bc.X.T

data_tod = adata_raw_bc.X.T

del adata_raw_bc

In [None]:
%%R -i data -i data_tod -i genes -i cells -i soupx_groups -o out 
library(SoupX)
# specify row and column names of data
rownames(data) = genes
colnames(data) = cells
# ensure correct sparse format for table of counts and table of droplets
data <- as(data, "sparseMatrix")
data_tod <- as(data_tod, "sparseMatrix")

# Generate SoupChannel Object for SoupX 
sc = SoupChannel(data_tod, data, calcSoupProfile = FALSE)

# Add extra meta data to the SoupChannel object
soupProf = data.frame(row.names = rownames(data), est = rowSums(data)/sum(data), counts = rowSums(data))
sc = setSoupProfile(sc, soupProf)
# Set cluster information in SoupChannel
sc = setClusters(sc, soupx_groups)

# Estimate contamination fraction
sc  = autoEstCont(sc, doPlot=FALSE)
# Infer corrected table of counts and rount to integer
out = adjustCounts(sc, roundToInt = TRUE)

In [None]:
adata_filtered_bc.layers["counts"] = adata_filtered_bc.X
adata_filtered_bc.layers["soupX_counts"] = out.T
adata_filtered_bc.X = adata_filtered_bc.layers["soupX_counts"]

save_object_path = os.path.join("D:\\","monkey_IZI","analysis","anndata_objects","M05", pre + '_' + ident + '_bg_corrected.h5ad')
adata_filtered_bc.write(save_object_path)

#### Sample Cyno1_24hr_S6

In [None]:
ident = 'Cyno1_24hr_S6'
path_filtered_counts = os.path.join("D:\\","monkey_IZI","nextflow","output_dir","cyno","cellranger","count",ident,"outs","filtered_feature_bc_matrix.h5")
path_raw_counts = os.path.join("D:\\","monkey_IZI","nextflow","output_dir","cyno","cellranger","count",ident,"outs","raw_feature_bc_matrix.h5")

adata_filtered_bc = sc.read_10x_h5(path_filtered_counts)
adata_filtered_bc.var_names_make_unique()
adata_raw_bc = sc.read_10x_h5(path_raw_counts)
adata_raw_bc.var_names_make_unique()

adata_pp = adata_filtered_bc.copy()
sc.pp.normalize_per_cell(adata_pp)
sc.pp.log1p(adata_pp)

sc.pp.pca(adata_pp)
sc.pp.neighbors(adata_pp)
sc.tl.leiden(adata_pp, key_added="soupx_groups")

# Preprocess variables for SoupX
soupx_groups = adata_pp.obs["soupx_groups"]

del adata_pp

cells = adata_filtered_bc.obs_names
genes = adata_filtered_bc.var_names
data = adata_filtered_bc.X.T

data_tod = adata_raw_bc.X.T

del adata_raw_bc

In [None]:
%%R -i data -i data_tod -i genes -i cells -i soupx_groups -o out 
library(SoupX)
# specify row and column names of data
rownames(data) = genes
colnames(data) = cells
# ensure correct sparse format for table of counts and table of droplets
data <- as(data, "sparseMatrix")
data_tod <- as(data_tod, "sparseMatrix")

# Generate SoupChannel Object for SoupX 
sc = SoupChannel(data_tod, data, calcSoupProfile = FALSE)

# Add extra meta data to the SoupChannel object
soupProf = data.frame(row.names = rownames(data), est = rowSums(data)/sum(data), counts = rowSums(data))
sc = setSoupProfile(sc, soupProf)
# Set cluster information in SoupChannel
sc = setClusters(sc, soupx_groups)

# Estimate contamination fraction
sc  = autoEstCont(sc, doPlot=FALSE)
# Infer corrected table of counts and rount to integer
out = adjustCounts(sc, roundToInt = TRUE)

In [None]:
adata_filtered_bc.layers["counts"] = adata_filtered_bc.X
adata_filtered_bc.layers["soupX_counts"] = out.T
adata_filtered_bc.X = adata_filtered_bc.layers["soupX_counts"]

save_object_path = os.path.join("D:\\","monkey_IZI","analysis","anndata_objects","M05", pre + '_' + ident + '_bg_corrected.h5ad')
adata_filtered_bc.write(save_object_path)

#### Cyno1_TimeZero_S1

In [None]:
ident = 'Cyno1_TimeZero_S1'
path_filtered_counts = os.path.join("D:\\","monkey_IZI","nextflow","output_dir","cyno","cellranger","count",ident,"outs","filtered_feature_bc_matrix.h5")
path_raw_counts = os.path.join("D:\\","monkey_IZI","nextflow","output_dir","cyno","cellranger","count",ident,"outs","raw_feature_bc_matrix.h5")

adata_filtered_bc = sc.read_10x_h5(path_filtered_counts)
adata_filtered_bc.var_names_make_unique()
adata_raw_bc = sc.read_10x_h5(path_raw_counts)
adata_raw_bc.var_names_make_unique()

adata_pp = adata_filtered_bc.copy()
sc.pp.normalize_per_cell(adata_pp)
sc.pp.log1p(adata_pp)

sc.pp.pca(adata_pp)
sc.pp.neighbors(adata_pp)
sc.tl.leiden(adata_pp, key_added="soupx_groups")

# Preprocess variables for SoupX
soupx_groups = adata_pp.obs["soupx_groups"]

del adata_pp

cells = adata_filtered_bc.obs_names
genes = adata_filtered_bc.var_names
data = adata_filtered_bc.X.T

data_tod = adata_raw_bc.X.T

del adata_raw_bc

In [None]:
%%R -i data -i data_tod -i genes -i cells -i soupx_groups -o out 
library(SoupX)
# specify row and column names of data
rownames(data) = genes
colnames(data) = cells
# ensure correct sparse format for table of counts and table of droplets
data <- as(data, "sparseMatrix")
data_tod <- as(data_tod, "sparseMatrix")

# Generate SoupChannel Object for SoupX 
sc = SoupChannel(data_tod, data, calcSoupProfile = FALSE)

# Add extra meta data to the SoupChannel object
soupProf = data.frame(row.names = rownames(data), est = rowSums(data)/sum(data), counts = rowSums(data))
sc = setSoupProfile(sc, soupProf)
# Set cluster information in SoupChannel
sc = setClusters(sc, soupx_groups)

# Estimate contamination fraction
sc  = autoEstCont(sc, doPlot=FALSE)
# Infer corrected table of counts and rount to integer
out = adjustCounts(sc, roundToInt = TRUE)

In [None]:
adata_filtered_bc.layers["counts"] = adata_filtered_bc.X
adata_filtered_bc.layers["soupX_counts"] = out.T
adata_filtered_bc.X = adata_filtered_bc.layers["soupX_counts"]

save_object_path = os.path.join("D:\\","monkey_IZI","analysis","anndata_objects","M05", pre + '_' + ident + '_bg_corrected.h5ad')
adata_filtered_bc.write(save_object_path)

#### Cyno2_6hr_S4

In [None]:
ident = 'Cyno2_6hr_S4'
path_filtered_counts = os.path.join("D:\\","monkey_IZI","nextflow","output_dir","cyno","cellranger","count",ident,"outs","filtered_feature_bc_matrix.h5")
path_raw_counts = os.path.join("D:\\","monkey_IZI","nextflow","output_dir","cyno","cellranger","count",ident,"outs","raw_feature_bc_matrix.h5")

adata_filtered_bc = sc.read_10x_h5(path_filtered_counts)
adata_filtered_bc.var_names_make_unique()
adata_raw_bc = sc.read_10x_h5(path_raw_counts)
adata_raw_bc.var_names_make_unique()

adata_pp = adata_filtered_bc.copy()
sc.pp.normalize_per_cell(adata_pp)
sc.pp.log1p(adata_pp)

sc.pp.pca(adata_pp)
sc.pp.neighbors(adata_pp)
sc.tl.leiden(adata_pp, key_added="soupx_groups")

# Preprocess variables for SoupX
soupx_groups = adata_pp.obs["soupx_groups"]

del adata_pp

cells = adata_filtered_bc.obs_names
genes = adata_filtered_bc.var_names
data = adata_filtered_bc.X.T

data_tod = adata_raw_bc.X.T

del adata_raw_bc

In [None]:
%%R -i data -i data_tod -i genes -i cells -i soupx_groups -o out 
library(SoupX)
# specify row and column names of data
rownames(data) = genes
colnames(data) = cells
# ensure correct sparse format for table of counts and table of droplets
data <- as(data, "sparseMatrix")
data_tod <- as(data_tod, "sparseMatrix")

# Generate SoupChannel Object for SoupX 
sc = SoupChannel(data_tod, data, calcSoupProfile = FALSE)

# Add extra meta data to the SoupChannel object
soupProf = data.frame(row.names = rownames(data), est = rowSums(data)/sum(data), counts = rowSums(data))
sc = setSoupProfile(sc, soupProf)
# Set cluster information in SoupChannel
sc = setClusters(sc, soupx_groups)

# Estimate contamination fraction
sc  = autoEstCont(sc, doPlot=FALSE)
# Infer corrected table of counts and rount to integer
out = adjustCounts(sc, roundToInt = TRUE)

In [None]:
adata_filtered_bc.layers["counts"] = adata_filtered_bc.X
adata_filtered_bc.layers["soupX_counts"] = out.T
adata_filtered_bc.X = adata_filtered_bc.layers["soupX_counts"]

save_object_path = os.path.join("D:\\","monkey_IZI","analysis","anndata_objects","M05", pre + '_' + ident + '_bg_corrected.h5ad')
adata_filtered_bc.write(save_object_path)

#### Cyno2_24hr_S5

In [None]:
ident = 'Cyno2_24hr_S5'
path_filtered_counts = os.path.join("D:\\","monkey_IZI","nextflow","output_dir","cyno","cellranger","count",ident,"outs","filtered_feature_bc_matrix.h5")
path_raw_counts = os.path.join("D:\\","monkey_IZI","nextflow","output_dir","cyno","cellranger","count",ident,"outs","raw_feature_bc_matrix.h5")

adata_filtered_bc = sc.read_10x_h5(path_filtered_counts)
adata_filtered_bc.var_names_make_unique()
adata_raw_bc = sc.read_10x_h5(path_raw_counts)
adata_raw_bc.var_names_make_unique()

adata_pp = adata_filtered_bc.copy()
sc.pp.normalize_per_cell(adata_pp)
sc.pp.log1p(adata_pp)

sc.pp.pca(adata_pp)
sc.pp.neighbors(adata_pp)
sc.tl.leiden(adata_pp, key_added="soupx_groups")

# Preprocess variables for SoupX
soupx_groups = adata_pp.obs["soupx_groups"]

del adata_pp

cells = adata_filtered_bc.obs_names
genes = adata_filtered_bc.var_names
data = adata_filtered_bc.X.T

data_tod = adata_raw_bc.X.T

del adata_raw_bc

In [None]:
%%R -i data -i data_tod -i genes -i cells -i soupx_groups -o out 
library(SoupX)
# specify row and column names of data
rownames(data) = genes
colnames(data) = cells
# ensure correct sparse format for table of counts and table of droplets
data <- as(data, "sparseMatrix")
data_tod <- as(data_tod, "sparseMatrix")

# Generate SoupChannel Object for SoupX 
sc = SoupChannel(data_tod, data, calcSoupProfile = FALSE)

# Add extra meta data to the SoupChannel object
soupProf = data.frame(row.names = rownames(data), est = rowSums(data)/sum(data), counts = rowSums(data))
sc = setSoupProfile(sc, soupProf)
# Set cluster information in SoupChannel
sc = setClusters(sc, soupx_groups)

# Estimate contamination fraction
sc  = autoEstCont(sc, doPlot=FALSE)
# Infer corrected table of counts and rount to integer
out = adjustCounts(sc, roundToInt = TRUE)

In [None]:
adata_filtered_bc.layers["counts"] = adata_filtered_bc.X
adata_filtered_bc.layers["soupX_counts"] = out.T
adata_filtered_bc.X = adata_filtered_bc.layers["soupX_counts"]

save_object_path = os.path.join("D:\\","monkey_IZI","analysis","anndata_objects","M05", pre + '_' + ident + '_bg_corrected.h5ad')
adata_filtered_bc.write(save_object_path)

#### Cyno2_TimeZero_S2

In [None]:
ident = 'Cyno2_TimeZero_S2'
path_filtered_counts = os.path.join("D:\\","monkey_IZI","nextflow","output_dir","cyno","cellranger","count",ident,"outs","filtered_feature_bc_matrix.h5")
path_raw_counts = os.path.join("D:\\","monkey_IZI","nextflow","output_dir","cyno","cellranger","count",ident,"outs","raw_feature_bc_matrix.h5")

adata_filtered_bc = sc.read_10x_h5(path_filtered_counts)
adata_filtered_bc.var_names_make_unique()
adata_raw_bc = sc.read_10x_h5(path_raw_counts)
adata_raw_bc.var_names_make_unique()

adata_pp = adata_filtered_bc.copy()
sc.pp.normalize_per_cell(adata_pp)
sc.pp.log1p(adata_pp)

sc.pp.pca(adata_pp)
sc.pp.neighbors(adata_pp)
sc.tl.leiden(adata_pp, key_added="soupx_groups")

# Preprocess variables for SoupX
soupx_groups = adata_pp.obs["soupx_groups"]

del adata_pp

cells = adata_filtered_bc.obs_names
genes = adata_filtered_bc.var_names
data = adata_filtered_bc.X.T

data_tod = adata_raw_bc.X.T

del adata_raw_bc

In [None]:
%%R -i data -i data_tod -i genes -i cells -i soupx_groups -o out 
library(SoupX)
# specify row and column names of data
rownames(data) = genes
colnames(data) = cells
# ensure correct sparse format for table of counts and table of droplets
data <- as(data, "sparseMatrix")
data_tod <- as(data_tod, "sparseMatrix")

# Generate SoupChannel Object for SoupX 
sc = SoupChannel(data_tod, data, calcSoupProfile = FALSE)

# Add extra meta data to the SoupChannel object
soupProf = data.frame(row.names = rownames(data), est = rowSums(data)/sum(data), counts = rowSums(data))
sc = setSoupProfile(sc, soupProf)
# Set cluster information in SoupChannel
sc = setClusters(sc, soupx_groups)

# Estimate contamination fraction
sc  = autoEstCont(sc, doPlot=FALSE)
# Infer corrected table of counts and rount to integer
out = adjustCounts(sc, roundToInt = TRUE)

In [None]:
adata_filtered_bc.layers["counts"] = adata_filtered_bc.X
adata_filtered_bc.layers["soupX_counts"] = out.T
adata_filtered_bc.X = adata_filtered_bc.layers["soupX_counts"]

save_object_path = os.path.join("D:\\","monkey_IZI","analysis","anndata_objects","M05", pre + '_' + ident + '_bg_corrected.h5ad')
adata_filtered_bc.write(save_object_path)

Save versions

In [None]:
import pkg_resources
with open(os.path.join(base_package_version_path, pre + '_package_versions.txt'), "w") as file:
    for package in pkg_resources.working_set:
        file.write(f"{package.key}=={package.version}\n")
        print(f"{package.key}=={package.version}")