
# Preprocessing - select best Integration
Hélène Reich

2024-06-28


# Setup


In [None]:
# General
import scipy as sci
import numpy as np
import pandas as pd
import logging
import time
import pickle
from itertools import chain
import session_info
import gc

# Plotting
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import rcParams
from matplotlib.pyplot import rc_context
from matplotlib import cm
import seaborn as sb

# Analysis
import muon as mu
from muon import atac as ac # Import a module with ATAC-seq-related functions
import scanpy as sc
import scanpy.external as sce
import torch
#import scipy.stats as sci
from sklearn.metrics import mean_squared_error

In [None]:
# Settings

import warnings
warnings.filterwarnings("ignore")

## Directory
base_dir = '/mnt/hdd/'
data_dir = 'data/Healthy/'
nb_dir = 'Notebooks/Gut_project/'
sc.settings.figdir = base_dir + nb_dir + 'Figures'
sc.settings.cachedir = base_dir + 'Cache'

## Scanpy settings
sc.settings.verbosity = 3
#sc.logging.print_versions()
#session_info.show()

In [None]:
%run utils.ipynb

In [None]:
mymap = load_RdOrYl_cmap_settings()

# Load Data

## aData

In [None]:
adata = sc.read_h5ad('/mnt/hdd/data/Healthy/adata_markedDoublets_normalized_initialAnno_rmDoublets_integrated_all.h5ad')

In [None]:
adata.obs['initial_cell_type'] = adata.obs['initial_cell_type'].replace('Doublets', 'NA')

In [None]:
adata.obs['enrichment proportion'] =adata.obs['enrichment proportion'].astype('category')

In [None]:
adata

## update metadata

In [None]:
## add metadata
metadata_df =read_excel_metadata(f'/mnt/hdd/data/metadata_mouse_gut.xlsx')
# Ensure folder name is the index in metadata for easier access
metadata_df.drop(metadata_df[metadata_df['kit'] == 'Multiome_ATAC_v1'].index, inplace=True)
metadata_df.drop(metadata_df[~metadata_df['condition'].isin(['Ctr','Ctr/WT'])].index, inplace=True)
metadata_df.set_index('folder name', inplace=True)
metadata_df.drop(['Sample Pooling - confounded with Project?','date','Project Name','Link_id','sample name','Cell Count [cells/µl]','Viable Cells [%]','Lib. Concentration [ng/µl]','Lib. Molarity [nM]','Average Lib. Size [bp]','cDNA Cycles','Lib. Cycles','10x Sample Index','Sequencing Depth [reads/cell]','exclusion, reason','sample number Minas',"modality (confounded with 'sequencing'",'Internal ID', 'SeqID','Read Length'], axis=1, inplace=True)

In [None]:
metadata_df

In [None]:
# Function to update adata.obs with metadata using a lambda function
for col in metadata_df.columns:
    try:
        adata.obs[col] = adata.obs['sample'].apply(lambda x: metadata_df.at[x, col])
    except KeyError as err:
        print(f'no such key: {err} in col {col}')

In [None]:
adata

#### from github HLCA: visualize main covariates

In [None]:
sc.tl.pca(adata)

n_pcs = 50

#specifiy covariates we want to check (we will quantify their correlation with the 1st 50 PCs, to see how much variance they can each explain):

covariates = [
    "sample",
'doublet_calls', 'final_doublets', 'final_doublets_cat', 'phase', 'proliferation', 'initial_cell_type','Project','sequencing','condition','kit','line','strain','enriched','enrichment proportion','diet','Index Type','sequencing machine'
]

Create shuffled assignment of single cell platform (and processing site if included), to compare actual variance explained to variance explained expected by random. We will assign all cells of the same sample to the same value.

In [None]:
include_processing_site =True

In [None]:
# create shuffled version of single cell platform, and of Processing_site:
if include_processing_site:
    sample_to_scplatform = adata.obs.groupby("sample").agg(
        {"Project": "first", "sequencing machine": "first"}
    )
else:
    sample_to_scplatform = adata.obs.groupby("sample").agg(
        {"Project": "first"}
    )
for i in range(10):
    np.random.shuffle(sample_to_scplatform.Project)
    adata.obs["Project_shuffled_" + str(i)] = adata.obs["sample"].map(
        dict(
            zip(
                sample_to_scplatform.index,
                sample_to_scplatform.Project,
            )
        )
    )
    covariates.append("Project_shuffled_" + str(i))
    if include_processing_site:
        np.random.shuffle(sample_to_scplatform['sequencing machine'])
        adata.obs["sequencing machine_shuffled_" + str(i)] = adata.obs["sample"].map(
            dict(zip(sample_to_scplatform.index, sample_to_scplatform['sequencing machine']))
        )
        covariates.append("sequencing machine_shuffled_" + str(i))

Now check for every covariate, for every PC how much variance among the cells' PC scores the covariate can explain. Add this variance explained per PC up across PCs for every covariate. This will give us the total amount of variance explained per covariate.

In [None]:
from sklearn.linear_model import LinearRegression

def check_if_nan(value):
    """return Boolean version of value that is True if value is
    some type of NaN (e.g. np.nan, None, "nan" etc). 
    Example use:
    none_entries = subadata.obs.applymap(check_if_nan)
    subadata.obs = subadata.obs.mask(none_entries.values)
    """
    if value == "nan":
        return True
    elif value == None:
        return True
    if isinstance(value, float):
        if np.isnan(value):
            return True
    if value == "ND":
        return True
    return False

In [None]:
var_explained = pd.DataFrame(index=range(n_pcs), columns=covariates + ["overall"])
for pc in range(n_pcs):
    y_true_unfiltered = adata.obsm["X_pca"][:, pc]
    var_explained.loc[pc, "overall"] = np.var(y_true_unfiltered)
    for cov in covariates:
        x = adata.obs[cov].values.copy()
        x_nans = np.vectorize(check_if_nan)(x)
        x = x[~x_nans]
        if len(x) != 0:
            y_true = y_true_unfiltered[~x_nans].reshape(-1, 1)
            if x.dtype in ["float32", "float", "float64"]:
                x = x.reshape(-1, 1)
            else:
                if len(set(x)) == 1:
                    var_explained.loc[pc, cov] = np.nan
                    continue
                x = pd.get_dummies(x)
            x.columns = x.columns.astype(str)
            lrf = LinearRegression(fit_intercept=True).fit(
                x,
                y_true,
            )
            y_pred = lrf.predict(x)
            var_explained.loc[pc, cov] = np.var(y_pred)
total_variance_explained = np.sum(var_explained, axis=0).sort_values(ascending=False)
total_variance_explained_fractions = (
    total_variance_explained / total_variance_explained["overall"]
)

Do the same for the shuffled covariates. Calculate mean over shuffling instances, add as one value to clean fractions:

In [None]:
total_variance_explained_clean = total_variance_explained_fractions[
    [
        x
        for x in total_variance_explained_fractions.index
        if not x.startswith("sequencing machine_shuffled")
        and not x.startswith("Project_shuffled")
    ]
]
total_variance_explained_clean["Project_shuffled"] = np.mean(
    total_variance_explained_fractions[
        [
            x
            for x in total_variance_explained_fractions.index
            if x.startswith("Project_")
        ]
    ]
)
stdev_Project_shuffled = np.std(
    total_variance_explained_fractions[
        [
            x
            for x in total_variance_explained_fractions.index
            if x.startswith("Project_")
        ]
    ]
)
if include_processing_site:
    total_variance_explained_clean["sequencing machine_shuffled"] = np.mean(
        total_variance_explained_fractions[
            [
                x
                for x in total_variance_explained_fractions.index
                if x.startswith("sequencing machine_shuffled")
            ]
        ]
    )
    stdev_processing_site_shuffled = np.std(
        total_variance_explained_fractions[
            [
                x
                for x in total_variance_explained_fractions.index
                if x.startswith("sequencing machine_shuffled")
            ]
        ]
    )


Sort results:

In [None]:
total_variance_explained_clean.sort_values(ascending=False, inplace=True)

Plot:

In [None]:
plt.figure(figsize=(8, 4))
plt.bar(
    total_variance_explained_clean[::-1].index,
    total_variance_explained_clean[::-1].values,
)
plt.title(
    f"covariate correlation with first 50 PCs",
    fontsize=14,
)  # \n({dominant_type})
plt.xticks(rotation=90)
plt.show()

# select best MultiVI

In [None]:
gc.collect()

In [None]:
#sc.pp.neighbors(adata, use_rep='X_MultiVI', key_added='MultiVI')
sc.pp.neighbors(adata, use_rep='X_MultiVI_meta', key_added='MultiVI_meta')
#sc.pp.neighbors(adata, use_rep='X_MultiVI_rmDoublets', key_added='MultiVI_rmDoublets')
sc.pp.neighbors(adata, use_rep='X_MultiVI_rmDoublets_meta', key_added='MultiVI_rmDoublets_meta')

## MultiVI meta

In [None]:
sc.tl.leiden(adata, neighbors_key='MultiVI_meta')

In [None]:
sc.tl.umap(adata, neighbors_key='MultiVI_meta')

In [None]:
sc.pl.umap(adata, color=['pretty name', 'kit','strain','sequencing','n_counts','log_counts','n_genes','log_genes','mt_frac','rp_frac','doublet_calls'],  size=10, alpha=1, outline_width=(0.3, 0.0), ncols=4, wspace =.95, frameon=False,cmap = mymap)

### save leiden and UMAP into obsm

In [None]:
adata.obs['leiden_MultiVI_meta'] = adata.obs['leiden']

In [None]:
adata.obsm['UMAP_MultiVI_meta'] = adata.obsm['X_umap']

In [None]:
adata

## MultiVI wo dblts and with meta

In [None]:
sc.tl.leiden(adata, neighbors_key='MultiVI_rmDoublets_meta')

In [None]:
sc.tl.umap(adata, neighbors_key='MultiVI_rmDoublets_meta')

In [None]:
sc.pl.umap(adata, color=['pretty name', 'kit','strain','sequencing','n_counts','log_counts','n_genes','log_genes','mt_frac','rp_frac','doublet_calls'],  size=10, alpha=1, outline_width=(0.3, 0.0), ncols=4, wspace =.95, frameon=False,cmap = mymap)

### save leiden and UMAP into obsm

In [None]:
adata.obs['leiden_MultiVI_rmDblts_meta'] = adata.obs['leiden']

In [None]:
adata.obsm['UMAP_MultiVI_rmDblts_meta'] = adata.obsm['X_umap']

In [None]:
adata

In [None]:
gc.collect()

## combine plots

### QC variables

'sample','n_counts','log_counts','n_genes','log_genes','mt_frac','rp_frac','final_doublets_cat','doublet_calls'

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 6
nrows = 2
figsize = 4
wspace = 0.5
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)
# This produces two Axes objects in a single Figure
print("axes:", axs)

# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['n_counts'], size =10, ax=axs[0,0], show=False, title='MultiVI total counts', cmap=mymap, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['log_counts'], size =10, ax=axs[0,1], show=False, cmap=mymap, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['n_genes'], size =10, ax=axs[0,2], show=False, cmap=mymap, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['log_genes'], size =10, ax=axs[0,3], show=False, cmap=mymap, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['mt_frac'], size =10, ax=axs[0,4], show=False, cmap=mymap, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['doublet_calls'], size =10, ax=axs[0,5], show=False, cmap=mymap, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['n_counts'], size =10, ax=axs[1,0], show=False, title='MultiVI wo. doublets total counts', cmap=mymap, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['log_counts'], size =10, ax=axs[1,1], show=False, cmap=mymap, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['n_genes'], size =10, ax=axs[1,2], show=False, cmap=mymap, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['log_genes'], size =10, ax=axs[1,3], show=False, cmap=mymap, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['mt_frac'], size =10, ax=axs[1,4], show=False, cmap=mymap, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['doublet_calls'], size =10, ax=axs[1,5], cmap=mymap, frameon=False)

### metadata

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 6
nrows = 2
figsize = 4
wspace = 1.35
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)
# This produces two Axes objects in a single Figure
print("axes:", axs)

# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments

sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['pretty name'], size =10, ax=axs[0,0], show=False, title='MultiVI samples', frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['sequencing'], size =10, ax=axs[0,1], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['strain'], size =10, ax=axs[0,2], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['Index Type'], size =10, ax=axs[0,3], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['sequencing machine'], size =10, ax=axs[0,4], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['enrichment proportion'], size =10, ax=axs[0,5],show=False, frameon=False)

sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['pretty name'], size =10, ax=axs[1,0], show=False, title='MultiVI wo. doublets samples', frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['sequencing'], size =10, ax=axs[1,1], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['strain'], size =10, ax=axs[1,2], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['Index Type'], size =10, ax=axs[1,3], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['sequencing machine'], size =10, ax=axs[1,4],show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['enrichment proportion'], size =10, ax=axs[1,5], frameon=False)

### other metadata

'Project','condition','kit','line','enriched','diet'

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 6
nrows = 2
figsize = 4
wspace = 0.7
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)
# This produces two Axes objects in a single Figure
print("axes:", axs)

# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments

sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['Project'], size =10, ax=axs[0,0], show=False, title='MultiVI projet', frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['strain'], size =10, ax=axs[0,1], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['kit'], size =10, ax=axs[0,2], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['line'], size =10, ax=axs[0,3], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['diet'], size =10, ax=axs[0,4], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['enriched'], size =10, ax=axs[0,5], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['Project'], size =10, ax=axs[1,0], show=False, title='MultiVI wo. doublets project', frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['strain'], size =10, ax=axs[1,1], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['kit'], size =10, ax=axs[1,2], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['line'], size =10, ax=axs[1,3], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['diet'], size =10, ax=axs[1,4], show=False, frameon=False)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['enriched'], size =10, ax=axs[1,5], frameon=False)

### marker genes

In [None]:
marker_genes = ['Foxa2','Neurog3','Tph1','Isl1','Pou2f3','Lgr5','Dmbt1','Hmgb2','Top2a','Defa24','Gna11','Cd52','Muc2','Fcgbp','Lyz1']

In [None]:
adata

In [None]:
adata.X = adata.layers['sct_logcounts']

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 6
nrows = 2
figsize = 4
wspace = 0.5
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)
# This produces two Axes objects in a single Figure
print("axes:", axs)

umaps = ['UMAP_MultiVI_meta','UMAP_MultiVI_rmDblts_meta']

# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments


sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['Lgr5'], size =10, ax=axs[0,0], show=False, frameon=False, cmap=mymap)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['Neurog3'], size =10, ax=axs[0,1], show=False, frameon=False, cmap=mymap)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['Lyz1'], size =10, ax=axs[0,2], show=False, frameon=False, cmap=mymap)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['Muc2'], size =10, ax=axs[0,3], show=False, frameon=False, cmap=mymap)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['Pou2f3'], size =10, ax=axs[0,4], show=False, frameon=False, cmap=mymap)
sc.pl.embedding(adata, basis='UMAP_MultiVI_meta', color=['Tph1'], size =10, ax=axs[0,5], show=False, frameon=False, cmap=mymap)

sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['Lgr5'], size =10, ax=axs[1,0], show=False, frameon=False, cmap=mymap)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['Neurog3'], size =10, ax=axs[1,1], show=False, frameon=False, cmap=mymap)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['Lyz1'], size =10, ax=axs[1,2], show=False,frameon=False, cmap=mymap)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['Muc2'], size =10, ax=axs[1,3], show=False, frameon=False, cmap=mymap)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['Pou2f3'], size =10, ax=axs[1,4], show=False, frameon=False, cmap=mymap)
sc.pl.embedding(adata, basis='UMAP_MultiVI_rmDblts_meta', color=['Tph1'], size =10, ax=axs[1,5], frameon=False, cmap=mymap)

In [None]:
marker_genes = ['Foxa2','Neurog3','Tph1','Isl1','Pou2f3','Lgr5','Dmbt1','Hmgb2','Top2a','Defa24','Muc2','Lyz1']

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 12
figsize = 4
wspace = 0.5
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)


umaps = ['UMAP_MultiVI_meta','UMAP_MultiVI_rmDblts_meta']


# Iterate over the embeddings and marker genes to create each subplot
for i, umap in enumerate(umaps):
    for j, gene in enumerate(marker_genes):
        sc.pl.embedding(adata, basis=umap, color=[gene], size=10, ax=axs[j, i], show=False, frameon=False, cmap=mymap)

# Display the plot
plt.show()

# add scvi and scANVI

In [None]:
sc.pp.neighbors(adata, use_rep='X_scANVI', key_added='scANVI')
sc.pp.neighbors(adata, use_rep='X_scVI', key_added='scVI')

## scANVI

In [None]:
sc.tl.leiden(adata, neighbors_key='scANVI')

In [None]:
sc.tl.umap(adata, neighbors_key='scANVI')

In [None]:
sc.pl.umap(adata, color=['pretty name','kit','strain','sequencing','n_counts','log_counts','n_genes','log_genes','mt_frac','rp_frac','doublet_calls'],  size=10, alpha=1, outline_width=(0.3, 0.0), ncols=4, wspace =.9, cmap=mymap, frameon=False)

In [None]:
gc.collect()

In [None]:
adata

### save leiden and UMAP into obsm

In [None]:
adata.obs['leiden_scANVI'] = adata.obs['leiden']

In [None]:
adata.obsm['UMAP_scANVI'] = adata.obsm['X_umap']

In [None]:
gc.collect()

### scVI

In [None]:
sc.tl.leiden(adata, neighbors_key='scVI')

In [None]:
sc.tl.umap(adata, neighbors_key='scVI')

In [None]:
sc.pl.umap(adata, color=['pretty name','kit','strain','sequencing','n_counts','log_counts','n_genes','log_genes','mt_frac','rp_frac','doublet_calls'],  size=10, alpha=1, outline_width=(0.3, 0.0), ncols=4, wspace =.9, cmap=mymap, frameon=False)

In [None]:
gc.collect()

In [None]:
adata

### save leiden and UMAP into obsm

In [None]:
adata.obs['leiden_scVI'] = adata.obs['leiden']

In [None]:
adata.obsm['UMAP_scVI'] = adata.obsm['X_umap']

In [None]:
gc.collect()

## combine plots

### all embeddings

In [None]:
umaps = ['UMAP_scVI','UMAP_scANVI','UMAP_MultiVI_meta','UMAP_MultiVI_rmDblts_meta']

#### samples

In [None]:
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

Titles = ['scVI', 'scANVI', 'MultiVI', 'MultiVI w/o doublets']

# Iterate over the embeddings and marker genes to create each subplot
x = 0

# Loop for generating subplots
for i in range(2):
    if i == 1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i + j + x], color=['pretty name'], size=4, 
                        ax=axs[i, j], show=False, title=f'{Titles[i + j + x]}',
                        legend_loc='none', cmap=mymap, frameon=True)
        axs[i, j].set_ylabel('UMAP2')
        axs[i, j].set_xlabel('UMAP1')

# Step 1: Get categories and their corresponding colors directly from adata
categories = adata.obs['pretty name'].cat.categories  # Make sure 'pretty name' is categorical
colors = adata.uns['pretty name_colors']

# Step 2: Create the custom legend manually
legend_patches = [plt.Line2D([0], [0], marker='o', color='w', label=cat, 
                             markersize=10, markerfacecolor=color) 
                  for cat, color in zip(categories, colors)]

# Step 3: Add the legend to the figure
fig.legend(handles=legend_patches, loc='center right', title='sample', frameon=False, fontsize=11)

plt.tight_layout(rect=[0, 0, 0.8, 1])  # Adjust layout to make space for the legend
plt.show()
fig.savefig('Embeddings_sample.png', bbox_inches='tight', dpi=300)


#### kit

In [None]:
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

Titles = ['scVI', 'scANVI', 'MultiVI', 'MultiVI w/o doublets']

# Iterate over the embeddings and marker genes to create each subplot
x = 0

# Loop for generating subplots
for i in range(2):
    if i == 1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i + j + x], color=['kit'], size=4, 
                        ax=axs[i, j], show=False, title=f'{Titles[i + j + x]}',
                        legend_loc='none', cmap=mymap, frameon=True)
        axs[i, j].set_ylabel('UMAP2')
        axs[i, j].set_xlabel('UMAP1')

# Step 1: Get categories and their corresponding colors directly from adata
categories = adata.obs['kit'].cat.categories  # Make sure 'pretty name' is categorical
colors = adata.uns['kit_colors']

# Step 2: Create the custom legend manually
legend_patches = [plt.Line2D([0], [0], marker='o', color='w', label=cat, 
                             markersize=10, markerfacecolor=color) 
                  for cat, color in zip(categories, colors)]

# Step 3: Add the legend to the figure
fig.legend(handles=legend_patches, loc='center right', title='sample', frameon=False, fontsize=11)

plt.tight_layout(rect=[0, 0, 0.8, 1])  # Adjust layout to make space for the legend
plt.show()
fig.savefig('Embeddings_kit.png', bbox_inches='tight', dpi=300)


#### Project

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)


Titles = ['scVI','scANVI','MultiVI','MultiVI w/o doublets']

# Iterate over the embeddings and marker genes to create each subplot
x = 0

# Loop for generating subplots
for i in range(2):
    if i == 1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i + j + x], color=['Project'], size=4, 
                        ax=axs[i, j], show=False, title=f'{Titles[i + j + x]}',
                        legend_loc='none', cmap=mymap, frameon=True)
        axs[i, j].set_ylabel('UMAP2')
        axs[i, j].set_xlabel('UMAP1')

# Step 1: Get categories and their corresponding colors directly from adata
categories = adata.obs['Project'].cat.categories  # Make sure 'pretty name' is categorical
colors = adata.uns['Project_colors']

# Step 2: Create the custom legend manually
legend_patches = [plt.Line2D([0], [0], marker='o', color='w', label=cat, 
                             markersize=10, markerfacecolor=color) 
                  for cat, color in zip(categories, colors)]

# Step 3: Add the legend to the figure
fig.legend(handles=legend_patches, loc='center right', title='Project', frameon=False, fontsize=11)

plt.tight_layout(rect=[0, 0, 0.8, 1])  # Adjust layout to make space for the legend
plt.show()
fig.savefig('Embeddings_Project.png', bbox_inches='tight', dpi=300)

#### sequencing

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

Titles = ['scVI','scANVI','MultiVI','MultiVI w/o doublets']


# Your colormap and unique categories
categories = adata.obs['sequencing'].unique()  # Replace with the actual column name
colors = adata.uns['sequencing_colors']

# Iterate over the embeddings and marker genes to create each subplot
x = 0

for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['sequencing'], size =4, ax=axs[i,j], show=False, title=f'{Titles[i+j+x]}',legend_loc='none', cmap=mymap, frameon=True)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')
        # Capture legend handles from one of the subplots

# Step 1: Create the custom legend manually
legend_patches = [plt.Line2D([0], [0], marker='o', color='w', label=cat, 
                             markersize=10, markerfacecolor=color) 
                  for cat, color in zip(categories, colors)]

# Step 2: Add the legend to the figure
fig.legend(handles=legend_patches, loc='center right', title='sequencing', frameon=False, fontsize= 11)

plt.tight_layout(rect=[0, 0, 0.8, 1])  # Adjust layout to make space for the legend
plt.show()
fig.savefig('Embeddings_sequencing.png', bbox_inches='tight', dpi=300) 

#### strain

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

Titles = ['scVI','scANVI','MultiVI','MultiVI w/o doublets']


# Iterate over the embeddings and marker genes to create each subplot
x = 0

for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['strain'], size =4, ax=axs[i,j], show=False, title=f'{Titles[i+j+x]}',legend_loc='none', cmap=mymap, frameon=True)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')
        # Capture legend handles from one of the subplots

# Step 1: Get categories and their corresponding colors directly from adata
categories = adata.obs['strain'].cat.categories  # Make sure 'pretty name' is categorical
colors = adata.uns['strain_colors']

# Step 2: Create the custom legend manually
legend_patches = [plt.Line2D([0], [0], marker='o', color='w', label=cat, 
                             markersize=10, markerfacecolor=color) 
                  for cat, color in zip(categories, colors)]

# Step 3: Add the legend to the figure
fig.legend(handles=legend_patches, loc='center right', title='strain', frameon=False, fontsize=11)

plt.tight_layout(rect=[0, 0, 0.82, 1])  # Adjust layout to make space for the legend
plt.show()
fig.savefig('Embeddings_strain.png', bbox_inches='tight', dpi=300) 

#### enriched

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

Titles = ['scVI','scANVI','MultiVI','MultiVI w/o doublets']

# Iterate over the embeddings and marker genes to create each subplot
x = 0

for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['enriched'], size =4, ax=axs[i,j], show=False, title=f'{Titles[i+j+x]}',legend_loc='none', cmap=mymap, frameon=True)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')
        # Capture legend handles from one of the subplots

# Step 1: Get categories and their corresponding colors directly from adata
categories = adata.obs['enriched'].cat.categories  # Make sure 'pretty name' is categorical
colors = adata.uns['enriched_colors']

# Step 2: Create the custom legend manually
legend_patches = [plt.Line2D([0], [0], marker='o', color='w', label=cat, 
                             markersize=10, markerfacecolor=color) 
                  for cat, color in zip(categories, colors)]

# Step 3: Add the legend to the figure
fig.legend(handles=legend_patches, loc='center right', title='enriched', frameon=False, fontsize=11)

plt.tight_layout(rect=[0, 0, 0.82, 1])  # Adjust layout to make space for the legend
plt.show()
fig.savefig('Embeddings_enriched.png', bbox_inches='tight', dpi=300) 

#### enrichment proportion

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

Titles = ['scVI','scANVI','MultiVI','MultiVI w/o doublets']

# Iterate over the embeddings and marker genes to create each subplot
x = 0

for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['enrichment proportion'], size =4, ax=axs[i,j], show=False, title=f'{Titles[i+j+x]}',legend_loc='none', cmap=mymap, frameon=True)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')
        # Capture legend handles from one of the subplots

# Step 1: Get categories and their corresponding colors directly from adata
categories = adata.obs['enrichment proportion'].cat.categories  # Make sure 'pretty name' is categorical
colors = adata.uns['enrichment proportion_colors']

# Step 2: Create the custom legend manually
legend_patches = [plt.Line2D([0], [0], marker='o', color='w', label=cat, 
                             markersize=10, markerfacecolor=color) 
                  for cat, color in zip(categories, colors)]

# Step 2: Add the legend to the figure
fig.legend(handles=legend_patches, loc='center right', title='enrichment proportion', frameon=False, fontsize= 11)

plt.tight_layout(rect=[0, 0, 0.72, 1])  # Adjust layout to make space for the legend
plt.show()
fig.savefig('Embeddings_enrichment proportion.png', bbox_inches='tight', dpi=300) 

#### initial ct

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

Titles = ['scVI','scANVI','MultiVI','MultiVI w/o doublets']


# Iterate over the embeddings and marker genes to create each subplot
x = 0

for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['initial_cell_type'], size =4, ax=axs[i,j], show=False, title=f'{Titles[i+j+x]}',legend_loc='none', cmap=mymap, frameon=True)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')
        # Capture legend handles from one of the subplots


# Step 1: Get categories and their corresponding colors directly from adata
categories = adata.obs['initial_cell_type'].cat.categories  # Make sure 'pretty name' is categorical
colors = adata.uns['initial_cell_type_colors']

# Step 2: Create the custom legend manually
legend_patches = [plt.Line2D([0], [0], marker='o', color='w', label=cat, 
                             markersize=10, markerfacecolor=color) 
                  for cat, color in zip(categories, colors)]

# Step 2: Add the legend to the figure
fig.legend(handles=legend_patches, loc='center right', title='initial cell type', frameon=False, fontsize= 11)

plt.tight_layout(rect=[0, 0, 0.85, 1])  # Adjust layout to make space for the legend
plt.show()
fig.savefig('Embeddings_initial_cell_type.png', bbox_inches='tight', dpi=300) 

#### line

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)


Titles = ['scVI','scANVI','MultiVI','MultiVI w/o doublets']




# Iterate over the embeddings and marker genes to create each subplot
x = 0

for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['line'], size =4, ax=axs[i,j], show=False, title=f'{Titles[i+j+x]}',legend_loc='none', cmap=mymap, frameon=True)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')
        # Capture legend handles from one of the subplots

# Your colormap and unique categories
categories = adata.obs['line'].cat.categories
colors = adata.uns['line_colors']

# Step 1: Create the custom legend manually
legend_patches = [plt.Line2D([0], [0], marker='o', color='w', label=cat, 
                             markersize=10, markerfacecolor=color) 
                  for cat, color in zip(categories, colors)]

# Step 2: Add the legend to the figure
fig.legend(handles=legend_patches, loc='center right', title='mouse line', frameon=False, fontsize= 11)

plt.tight_layout(rect=[0, 0, 0.80, 1])  # Adjust layout to make space for the legend
plt.show()
fig.savefig('Embeddings_line.png', bbox_inches='tight', dpi=300) 

#### Lgr5

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)


# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments

# Iterate over the embeddings and marker genes to create each subplot
x = 0
for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['Lgr5'], size =5, ax=axs[i,j], show=False, title=f'{umaps[i+j+x]} - LGR5', frameon=False, cmap = mymap)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')
# Display the plot
plt.show()

#### Ngn3

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)


# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments

# Iterate over the embeddings and marker genes to create each subplot
x = 0
for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['Neurog3'], size =5, ax=axs[i,j], show=False, title=f'{umaps[i+j+x]} - Ngn3', frameon=False, cmap = mymap)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')

# Display the plot
plt.show()

#### Foxa2

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)


# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments

# Iterate over the embeddings and marker genes to create each subplot
x = 0
for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['Foxa2'], size =5, ax=axs[i,j], show=False, title=f'{umaps[i+j+x]} - FoxA2', frameon=False, cmap = mymap)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')

# Display the plot
plt.show()

#### Tph1

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)


# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments

# Iterate over the embeddings and marker genes to create each subplot
x = 0
for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['Tph1'], size =5, ax=axs[i,j], show=False, title=f'{umaps[i+j+x]} - Tph1', frameon=False, cmap = mymap)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')
# Display the plot
plt.show()

#### Lyz1

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)


# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments

# Iterate over the embeddings and marker genes to create each subplot
x = 0
for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['Lyz1'], size =5, ax=axs[i,j], show=False, title=f'{umaps[i+j+x]} - Lyz1', frameon=False, cmap = mymap)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')
# Display the plot
plt.show()

#### initial celltype

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)


# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments

# Iterate over the embeddings and marker genes to create each subplot
x = 0
for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['initial_cell_type'], size =5,legend_fontsize=8, ax=axs[i,j], show=False, title=f'{umaps[i+j+x]} - initial cell type', frameon=False, cmap = mymap)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')

# Display the plot
plt.show()

#### line

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.78
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)


# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments

# Iterate over the embeddings and marker genes to create each subplot
x = 0
for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['line'], size =5, legend_fontsize=8,ax=axs[i,j], show=False, title=f'{umaps[i+j+x]} - mouse line', frameon=False, cmap = mymap)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')

# Display the plot
plt.show()

#### enrichment proportion

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 4
wspace = 0.78
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)


# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments

# Iterate over the embeddings and marker genes to create each subplot
x = 0
for i in range(2):
    if i ==1:
        x = 1
    for j in range(2):
        sc.pl.embedding(adata, basis=umaps[i+j+x], color=['enrichment proportion'], size =5, legend_fontsize=8,ax=axs[i,j], show=False, title=f'{umaps[i+j+x]} - enrichment proportion', frameon=False, cmap = mymap)
        axs[i,j].set_ylabel('UMAP2')
        axs[i,j].set_xlabel('UMAP1')

# Display the plot
plt.show()

### QC variables

In [None]:
QC_vars =['n_counts','log_counts','n_genes','log_genes','mt_frac','rp_frac','doublet_calls']

In [None]:
umaps = ['UMAP_scVI','UMAP_scANVI']

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 7
nrows = 2
figsize = 4
wspace = 0.5
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)
# This produces two Axes objects in a single Figure

# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments

# Iterate over the embeddings and marker genes to create each subplot
for i, umap in enumerate(umaps):
    for j, gene in enumerate(QC_vars): 
        sc.pl.embedding(adata, basis=umap, color=[gene], size=10, ax=axs[i, j], show=False, cmap=mymap,title=f'{umap} - {gene}')

# Display the plot
plt.show()


### metadata

In [None]:
metadata_1 = ['sequencing','strain','Index Type','sequencing machine', 'kit']
metadata_2 = ['Project','condition','line','enriched','diet'] #

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 5
nrows = 2
figsize = 4
wspace = 0.5
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)
# This produces two Axes objects in a single Figure

# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments

# Iterate over the embeddings and marker genes to create each subplot
for i, umap in enumerate(umaps):
    for j, gene in enumerate(metadata_1):
        sc.pl.embedding(adata, basis=umap, color=[gene], size=10, ax=axs[i, j], show=False, title=f'{umap} - {gene}')

# Display the plot
plt.show()

### other metadata

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 6
nrows = 2
figsize = 4
wspace = 0.75
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)
# This produces two Axes objects in a single Figure

# We can use these Axes objects individually to plot on them
# We need to set show=False so that the Figure is not displayed before we
# finished plotting on all Axes and making all plot adjustments

# Iterate over the embeddings and marker genes to create each subplot
for i, umap in enumerate(umaps):
    for j, gene in enumerate(metadata_2):
        sc.pl.embedding(adata, basis=umap, color=[gene], size=10, ax=axs[i, j], show=False, title=f'{umap} - {gene}')

# Display the plot
plt.show()


### marker genes

In [None]:
marker_genes = ['Foxa2','Neurog3','Tph1','Isl1','Pou2f3','Lgr5','Mk67','Dmbt1','Hmgb2','Top2a','Defa24','Gna11','Cd52','Muc2','Fcgbp','Lyz1']

In [None]:
adata.X = adata.layers['sct_logcounts']

In [None]:
marker_genes = ['Foxa2','Neurog3','Tph1','Isl1','Pou2f3','Lgr5','Dmbt1','Hmgb2','Top2a','Defa24','Muc2','Lyz1']

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 12
nrows = 2
figsize = 4
wspace = 0.5
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)


# Iterate over the embeddings and marker genes to create each subplot
for i, umap in enumerate(umaps):
    for j, gene in enumerate(marker_genes):
        sc.pl.embedding(adata, basis=umap, color=[gene], size=10, ax=axs[i, j], show=False, cmap=mymap,  title=f'{umap} - {gene}', frameon=False)

# Display the plot
plt.show()

In [None]:
# Step 1: Extract the raw counts
raw_counts = adata.layers['raw_counts']

# Step 2: Extract the sample column and variable names
samples = adata.obs['sample']
var_names = adata.var_names

In [None]:

# Step 3: Create a new AnnData object
adata_imputation = ad.AnnData(X=raw_counts, obs=adata.obs, var=adata.var)

In [None]:
adata_imputation

In [None]:
adata_imputation.obs.drop(['log_counts', 'n_counts_rank','log_genes', 'is_paneth','final_doublets', 'final_doublets_cat', 'sample_density', 'Project', 'sequencing', "modality (confounded with 'sequencing'", 'condition', 'Internal ID', 'SeqID', 'kit', 'linienhintergrund', 'strain', 'enriched', 'enrichment proportion', 'treatment', 'diet', 'tissue', 'structure', 'target cell number', 'Read Length', 'Index Type', 'sequencing machine', 'conditions_combined'],inplace=True,axis=1)
adata_imputation.var.drop(['n_counts', 'n_cells', 'means', 'dispersions', 'dispersions_norm', 'sct.detection_rate', 'sct.gmean', 'sct.variance', 'sct.residual_variance', 'sct.variable', 'genome', 'binomial_deviance', 'highly_deviant'],inplace=True,axis=1)
adata_imputation.obsm['X_scANVI']=adata.obsm['X_scANVI']

In [None]:
adata_imputation.obsm['X_scANVI']=adata.obsm['X_scANVI']

In [None]:
adata_imputation.layers['sct_counts']=adata.layers['sct_counts']

In [None]:
adata_imputation

In [None]:
adata_imputation.write('adata_for_imputation.h5ad')