# Integration Visualization

In [None]:
# load packages
import sys
import scanpy as sc
import os
import sys

In [None]:
# set up dirs
work_dir = "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/"
sc.settings.figdir = os.path.join(work_dir, "figures", "combined", "integration", "final_figures/")
sc.set_figure_params(dpi = 600, dpi_save=600)

### Integration by "subproject"

In [None]:
# read anndata objects

#################
# By Subproject #
#################

# HVG
adata_hvg_sp = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_HVG_22-02-24.h5ad"))
adata_hvg_sp.obs['subproject'] = adata_hvg_sp.obs['project'].astype(str) + "_P" + adata_hvg_sp.obs['patient'].astype(str) + "_" + adata_hvg_sp.obs['timepoint'].astype(str)

# scVI
adata_scvi_sp = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_scVI_integrated-subproject_01-03-24.h5ad"))

# scANVI Annotation 1.0
adata_scanvi_1_sp = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_scANVI_anno1.0_integrated-subproject_01-03-24.h5ad"))

# scANVI Annotation 2.0
adata_scanvi_2_sp = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_scANVI_anno2.0_integrated-subproject_01-03-24.h5ad"))

# scGen Annotation 1.0
adata_scgen_1_sp = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_scGen_anno1.0_integrated-subproject_01-03-24.h5ad"))

# scGen Annotation 2.0
adata_scgen_2_sp = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_scGen_anno2.0_integrated-subproject_01-03-24.h5ad"))

# BBKNN
adata_bbknn_sp = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_BBKNN_integrated-subproject_01-03-24.h5ad"))

# Harmony
adata_harmony_sp = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_harmony_integrated-subproject_01-03-24.h5ad"))

# FastMNN
adata_mnn_sp = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_MNN_integrated-subproject_01-03-24.h5ad"))

# Scanorama
adata_scanorama_sp = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_scanorama_integrated-subproject_01-03-24.h5ad"))


In [None]:
# Unintegrated data visualization
noint_sp_umap = sc.pl.umap(
    adata=adata_hvg_sp,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_unintegrated_integration_plots_umap.png"
)

In [None]:
# scVI integrated data visualization
scvi_sp_umap = sc.pl.umap(
    adata=adata_scvi_sp,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_scVI_integrated-subproject_integration_plots_umap.png"
)

In [None]:
# scANVI 1.0 integrated data visualization
scanvi_1_sp_umap = sc.pl.umap(
    adata=adata_scanvi_1_sp,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_scANVI_anno1.0_integrated-subproject_integration_plots_umap.png"
)

In [None]:
# scANVI 2.0 integrated data visualization
scanvi_2_sp_umap = sc.pl.umap(
    adata=adata_scanvi_2_sp,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_scANVI_anno2.0_integrated-subproject_integration_plots_umap.png"
)

In [None]:
# scGen 1.0 integrated data visualization
scgen_1_sp_umap = sc.pl.umap(
    adata=adata_scgen_1_sp,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_scGen_anno1.0_integrated-subproject_integration_plots_umap.png"
)

In [None]:
# scGen 2.0 integrated data visualization
scgen_2_sp_umap = sc.pl.umap(
    adata=adata_scgen_2_sp,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_scGen_anno2.0_integrated-subproject_integration_plots_umap.png"
)

In [None]:
# BBKNN integrated data visualization
bbknn_sp_umap = sc.pl.umap(
    adata=adata_bbknn_sp,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_BBKNN_integrated-subproject_integration_plots_umap.png"
)

In [None]:
# Harmony integrated data visualization
harmony_sp_umap = sc.pl.umap(
    adata=adata_harmony_sp,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_harmony_integrated-subproject_integration_plots_umap.png"
)

In [None]:
# FastMNN integrated data visualization
mnn_sp_umap = sc.pl.umap(
    adata=adata_mnn_sp,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_MNN_integrated-subproject_integration_plots_umap.png"
)

In [None]:
# Scanorama integrated data visualization
scanorama_sp_umap = sc.pl.umap(
    adata=adata_scanorama_sp,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    save="Combined_scanorama_integrated-subproject_integration_plots_umap.png"
)

### Integration by "sample"

In [None]:
# read anndata objects

#################
# By Subproject #
#################

# HVG
adata_hvg_s = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_HVG_22-02-24.h5ad"))
adata_hvg_s.obs['subproject'] = adata_hvg_s.obs['project'].astype(str) + "_P" + adata_hvg_s.obs['patient'].astype(str) + "_" + adata_hvg_s.obs['timepoint'].astype(str)

# scVI
adata_scvi_s = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_scVI_integrated-sample_22-02-24.h5ad"))
adata_scvi_s.obs['subproject'] = adata_scvi_s.obs['project'].astype(str) + "_P" + adata_scvi_s.obs['patient'].astype(str) + "_" + adata_scvi_s.obs['timepoint'].astype(str)

# scANVI Annotation 1.0
adata_scanvi_1_s = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_scANVI_anno1.0_integrated-sample_22-02-24.h5ad"))
adata_hvg_s.obs['subproject'] = adata_hvg_s.obs['project'].astype(str) + "_P" + adata_hvg_s.obs['patient'].astype(str) + "_" + adata_hvg_s.obs['timepoint'].astype(str)

# scANVI Annotation 2.0
adata_scanvi_2_s = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_scANVI_anno2.0_integrated-sample_22-02-24.h5ad"))
adata_scanvi_2_s.obs['subproject'] = adata_scanvi_2_s.obs['project'].astype(str) + "_P" + adata_scanvi_2_s.obs['patient'].astype(str) + "_" + adata_scanvi_2_s.obs['timepoint'].astype(str)

# scGen Annotation 1.0
adata_scgen_1_s = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_scGen_anno1.0_integrated-sample_22-02-24.h5ad"))
adata_scgen_1_s.obs['subproject'] = adata_scgen_1_s.obs['project'].astype(str) + "_P" + adata_scgen_1_s.obs['patient'].astype(str) + "_" + adata_scgen_1_s.obs['timepoint'].astype(str)

# scGen Annotation 2.0
adata_scgen_2_s = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_scGen_anno2.0_integrated-sample_22-02-24.h5ad"))
adata_scgen_2_s.obs['subproject'] = adata_scgen_2_s.obs['project'].astype(str) + "_P" + adata_scgen_2_s.obs['patient'].astype(str) + "_" + adata_scgen_2_s.obs['timepoint'].astype(str)

# BBKNN
adata_bbknn_s = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_BBKNN_integrated-sample_22-02-24.h5ad"))
adata_bbknn_s.obs['subproject'] = adata_bbknn_s.obs['project'].astype(str) + "_P" + adata_bbknn_s.obs['patient'].astype(str) + "_" + adata_bbknn_s.obs['timepoint'].astype(str)

# Harmony
adata_harmony_s = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_harmony_integrated-sample_22-02-24.h5ad"))
adata_harmony_s.obs['subproject'] = adata_harmony_s.obs['project'].astype(str) + "_P" + adata_harmony_s.obs['patient'].astype(str) + "_" + adata_harmony_s.obs['timepoint'].astype(str)

# FastMNN
adata_mnn_s = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_MNN_integrated-sample_22-02-24.h5ad"))
adata_mnn_s.obs['subproject'] = adata_mnn_s.obs['project'].astype(str) + "_P" + adata_mnn_s.obs['patient'].astype(str) + "_" + adata_mnn_s.obs['timepoint'].astype(str)

# Scanorama
adata_scanorama_s = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_annotated_2.0_TCR_scanorama_integrated-sample_22-02-24.h5ad"))
adata_scanorama_s.obs['subproject'] = adata_scanorama_s.obs['project'].astype(str) + "_P" + adata_scanorama_s.obs['patient'].astype(str) + "_" + adata_scanorama_s.obs['timepoint'].astype(str)


In [None]:
adata_mnn_s.obs

In [None]:
adata_hvg_s.obs

In [None]:
adata_mnn_s.obs["sample1"]=adata_mnn_s.obs_names.map(adata_hvg_s.obs["sample"])

In [None]:
adata_mnn_s.obs.reset_index().merge(adata_hvg_s.obs[["sample"]].reset_index(), on="index", how="left").set_index("index")

In [None]:
adata_mnn_s.obs

In [None]:
# Unintegrated data visualization
noint_s_umap = sc.pl.umap(
    adata=adata_hvg_s,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_unintegrated_integration_plots_umap.png"
)

In [None]:
# scVI integrated data visualization
scvi_s_umap = sc.pl.umap(
    adata=adata_scvi_s,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_scVI_integrated-sample_integration_plots_umap.png"
)

In [None]:
# scANVI 1.0 integrated data visualization
scanvi_1_s_umap = sc.pl.umap(
    adata=adata_scanvi_1_s,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_scANVI_anno1.0_integrated-sample_integration_plots_umap.png"
)

In [None]:
# scANVI 2.0 integrated data visualization
scanvi_2_s_umap = sc.pl.umap(
    adata=adata_scanvi_2_s,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_scANVI_anno2.0_integrated-sample_integration_plots_umap.png"
)

In [None]:
# scGen 1.0 integrated data visualization
scgen_1_s_umap = sc.pl.umap(
    adata=adata_scgen_1_s,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_scGen_anno1.0_integrated-sample_integration_plots_umap.png"
)

In [None]:
# scGen 2.0 integrated data visualization
scgen_2_s_umap = sc.pl.umap(
    adata=adata_scgen_2_s,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_scGen_anno2.0_integrated-sample_integration_plots_umap.png"
)

In [None]:
# BBKNN integrated data visualization
bbknn_s_umap = sc.pl.umap(
    adata=adata_bbknn_s,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_BBKNN_integrated-sample_integration_plots_umap.png"
)

In [None]:
# Harmony integrated data visualization
harmony_s_umap = sc.pl.umap(
    adata=adata_harmony_s,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_harmony_integrated-sample_integration_plots_umap.png"
)

In [None]:
# FastMNN integrated data visualization
mnn_s_umap = sc.pl.umap(
    adata=adata_mnn_s,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    legend_loc=None,
    save="Combined_MNN_integrated-sample_integration_plots_umap.png"
)

In [None]:
# Scanorama integrated data visualization
scanorama_s_umap = sc.pl.umap(
    adata=adata_scanorama_s,
    color=["Annotation_1.0", "Annotation_2.0", "sample", "subproject", "timepoint"],
    title=["Cell Type 1.0", "Cell Type 2.0", "Sample", "Subproject", "Timepoint"],
    ncols=1,
    frameon=False,
    save="Combined_scanorama_integrated-sample_integration_plots_umap.png"
)