## Sample Integration

In [None]:
import os
import pandas as pd
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns
import anndata

# pyscenic env:
#import loompy
#import colorcet as cc
import scvi

# Initialize random seed
import random
random.seed(111)

# set a working directory
#wdir = "/ceph/project/tendonhca/akurjan/analysis/"
wdir = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/embryonic ScAndSp/"
os.chdir( wdir )

# folder structures
NORM_FOLDERNAME = "Normalisation/results/"
RESULTS_FOLDERNAME = "scVI/results/"
FIGURES_FOLDERNAME = "scVI/figures/"

if not os.path.exists(RESULTS_FOLDERNAME):
    os.makedirs(RESULTS_FOLDERNAME)
if not os.path.exists(FIGURES_FOLDERNAME):
    os.makedirs(FIGURES_FOLDERNAME)

def savesvg(fname: str, fig, folder: str=FIGURES_FOLDERNAME) -> None:
    """
    Save figure as vector-based SVG image format.
    """
    fig.tight_layout()
    fig.savefig(os.path.join(folder, fname), format='svg')    
    
# Set folder for saving figures into
sc.settings.figdir = FIGURES_FOLDERNAME

# Print date and time:
import datetime
e = datetime.datetime.now()
print ("Current date and time = %s" % e)

# Set other settings
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.set_figure_params(dpi=150, fontsize=10, dpi_save=600)

In [None]:
def plot_umaps(anndata, parameters: list, filename: str):
    n_plots = len(parameters)
    fig, axs = plt.subplots(n_plots, 1, figsize=(8, 4*n_plots))
    for i, param in enumerate(parameters):
        sc.pl.umap(anndata, color=param, ax=axs[i], show=False, frameon=False, s=50)
        axs[i].set_title(param)
    plt.tight_layout()
    savesvg(filename, fig)
    plt.show()

# EMBRYONIC DATA PREP

In [None]:
adata = sc.read_h5ad(os.path.join(NORM_FOLDERNAME, 'sc_normalized_adata.h5ad'))
adata

In [None]:
plot_umaps(adata, ['hospital_id', 'norm_sample_stage', 'samplename', 'kit', 'seq_protocol', 'sex'], 
           filename = 'sc_UMAP_plots_full_unintegrated.svg')

In [None]:
def convert_uint_to_int_single(adata):
    """
    Convert uint32 and uint64 dtypes in anndata object to int32 and int64 dtypes,
    respectively. Prints a message for each conversion.
    """
    # Check var and obs dtypes
    for dtype in ['var', 'obs']:
        dtype_data = getattr(adata, dtype)
        if isinstance(dtype_data, np.ndarray):
            # If dtype_data is a structured array, convert each field individually
            for field in dtype_data.dtype.names:
                if dtype_data[field].dtype == 'uint32':
                    dtype_data[field] = dtype_data[field].astype('float32')
                    print(f"Converted {dtype}.{field} from uint32 to float32.")
                elif dtype_data[field].dtype == 'uint64':
                    dtype_data[field] = dtype_data[field].astype('float64')
                    print(f"Converted {dtype}.{field} from uint64 to float64.")
        elif isinstance(dtype_data, pd.DataFrame):
            # If dtype_data is a DataFrame, convert each column individually
            for col in dtype_data.columns:
                if dtype_data[col].dtype == 'uint32':
                    dtype_data[col] = dtype_data[col].astype('float32')
                    print(f"Converted {dtype}.{col} from uint32 to float32.")
                elif dtype_data[col].dtype == 'uint64':
                    dtype_data[col] = dtype_data[col].astype('float64')
                    print(f"Converted {dtype}.{col} from uint64 to float64.")
                
    # Update X and layers dtypes
    if adata.X.dtype == 'uint32':
        adata.X = adata.X.astype('int32')
        print("Converted X from uint32 to int32.")
    elif adata.X.dtype == 'uint64':
        adata.X = adata.X.astype('int64')
        print("Converted X from uint64 to int64.")
    for layer_key, layer_val in adata.layers.items():
        if layer_val.dtype == 'uint32':
            adata.layers[layer_key] = layer_val.astype('int32')
            print(f"Converted layer {layer_key} from uint32 to int32.")
        elif layer_val.dtype == 'uint64':
            adata.layers[layer_key] = layer_val.astype('int64')
            print(f"Converted layer {layer_key} from uint64 to int64.")

In [None]:
convert_uint_to_int_single(adata)

In [None]:
adata.X = adata.layers["counts"].copy()
print(adata.X[1:5,1:5])

In [None]:
del adata.raw

In [None]:
adata.obs['norm_sample_stage'].value_counts()

In [None]:
sc.pp.filter_genes(adata, min_cells=20)

In [None]:
adata.var_names = adata.var_names.astype('str')
adata.var_names_make_unique()

In [None]:
adata.obs['samplename'].cat.categories

# Hyperparameter Tuning

In [None]:
import ray
import hyperopt
from ray import tune
from scvi import autotune

model_cls = scvi.model.SCVI
model_cls.setup_anndata(adata)

scvi_tuner = autotune.ModelTuner(model_cls)
scvi_tuner.info()

In [None]:

search_space = {
    "n_latent": tune.choice([10, 14, 6]),
    "n_hidden": tune.choice([256, 128]),
    "n_layers": tune.choice([1, 2, 3]),
    "gene_likelihood": tune.choice(['zinb', 'nb'])
}

In [None]:
ray.init(log_to_driver=False)

In [None]:
results = scvi_tuner.fit(
    adata,
    metric="validation_loss",
    search_space=search_space,
    searcher='hyperopt',
    num_samples=20,
    max_epochs=150,
    resources={"cpu": 20, "gpu": 1},
)

In [None]:
print(results.model_kwargs)
print(results.train_kwargs)

In [None]:
ray.shutdown()

# Integration with scVI
As a first step, we assume that the data is completely unlabelled and we wish to find common axes of variation between the two datasets. There are many methods available in scanpy for this purpose (BBKNN, Scanorama, etc.). In this notebook we present scVI. To run scVI, we simply need to:

1) Register the AnnData object with the correct key to identify the sample and the layer key with the count data.
2) Create an SCVI model object.

In [None]:
scvi.model.SCVI.setup_anndata(adata, #adata_hvg
                              layer="counts", 
                              #batch_key=,
                              categorical_covariate_keys=["samplename", "kit", "seq_protocol", "sex"], 
                              )

#### scVI (single-cell Variational Inference)
*scVI* is a hierarchical Bayesian model for single-cell RNA sequencing data with conditional distributions parametrized by neural networks. Working as a hybrid between a neural network and a bayesian network, scVI performs data harmonization. VAE refers to variational auto-encoders for single-cell gene expression data. scVI is similar to VAE as it tries to bring a more suitable structure to the latent space. While VAE allows users to make observations in a semi-supervised fashion, scVI is easier to train and specific cell-type labels for the dataset are not required in the pure unsupervised case.

Define the scVI model
First, we define the model and its hyperparameters:

n_hidden: number of units in the hidden layer = 128
n_latent: number of dimensions in the shared latent space = 10 (how many dimensions in z)
n_layers: number of layers in the neural network
dispersion: ‘gene’: each gene has its own dispersion parameter; ‘gene-batch’: each gene in each batch has its own dispersion parameter



In [None]:
vae = scvi.model.SCVI(adata, n_hidden = 256, n_latent=14, n_layers=3, dropout_rate=0.1, dispersion="gene-batch", gene_likelihood='nb')
vae

In [None]:
vae.view_anndata_setup(adata)

Then, we define a trainer using the model and the dataset to train it with
in the unsupervised setting, train_size=1.0 and all cells are used for training

In [None]:
%%time
vae.train(max_epochs = 400, train_size = 0.9, validation_size = 0.1, 
          use_gpu=True, accelerator='gpu', 
          check_val_every_n_epoch=1,
          early_stopping=True,
          early_stopping_patience=20,
          early_stopping_monitor="elbo_validation"
         )

In [None]:
# Ensure convergence
train_test_results = vae.history["elbo_train"]
train_test_results["elbo_validation"] = vae.history["elbo_validation"]
train_test_results.iloc[10:].plot(logy=True)  # exclude first 10 epochs
plt.show()

In [None]:
vae.save(os.path.join(RESULTS_FOLDERNAME, "EmbryonicFullscVI_SamplenameKitSeqprotSex/"), overwrite=True)

Once the training is done, we can evaluate the latent representation of each cell in the dataset and add it to the AnnData object

In [None]:
vae = scvi.model.SCVI.load(os.path.join(RESULTS_FOLDERNAME, "EmbryonicFullscVI_SamplenameKitSeqprotSex/"), 
                           use_gpu=True, adata=adata)
vae

In [None]:
adata.obsm["X_scVI"] = vae.get_latent_representation()

Now, we use the scVI latent space to generate the same UMAP plots to see if scVI successfully accounts for batch effects in the data.

In [None]:
# use scVI latent space for UMAP generation
sc.pp.neighbors(adata, use_rep="X_scVI", metric='correlation')
sc.tl.umap(adata)

In [None]:
plot_umaps(adata, ['hospital_id', 'norm_sample_stage', 'samplename', 'kit', 'seq_protocol', 'sex'], 
           filename = 'sc_UMAP_plots_scVIfull_samplecorrected.svg')

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, 'sc_scVI.h5ad'))

# FINDING NEm-... TENDON

In [None]:
# Taken from the Supplementary Table 1 from Teichmann Group's human embryonic limb paper.
teichmann_markers_dict = {
    'Proximal Mesenchyme (ProxMes)': ['MEIS2', 'WT1'],
    'Mesenchyme 1 (Mes1)': ['MEIS2'],
    'Mesenchyme 2 (Mes2)': ['KLF2'],
    'Mesenchyme 3 (Mes3)': ['CITED1'],
    'Mesenchyme 4 (Mes4)': ['PRAC1'],
    'ISL1+Mesenchyme (ISL1+Mes)': ['ISL1'],
    'Transitional Mesenchyme (TransMes)': ['IRX1', 'IRX2'],
    'RDH10+ Distal Mesenchyme (RDH10+DistalMes)': ['RDH10'],
    'Distal Mesenchyme (DistalMes)': ['MSX1', 'LHX2'],
    'Mesencondensation cells (MesCond)': ['PAX9'],
    'Osteochondral progenitor (OCP)': ['PRRX1'],
    'InterZone': ['GDF5'],
    'Chondrogenic Progenitor (ChondroProg)': ['SOX9', 'WWP2'],
    'Resting Chondrocyte (RestingChon)': ['UCMA'],
    'Proliferating Chondrocyte (ProlifChon)': ['SOX9', 'COL2A1', 'G2/M/S cell cycle phase'],
    'Prehypertrophic Chondrocyte (PrehyperChon)': ['IHH'],
    'Hypertrophic Chondrocyte (HyperChon)': ['COL10A1'],
    'Perichondrium (PeriChon)': ['THBS2'],
    'Osteoblast (OsteoB)': ['RUNX2', 'SP7'],
    'Articular Chondrocyte (ArtiChon)': ['PRG4'],
    'Tendon Progenitor (TenoProg)': ['SCX'],
    'Tenocyte (Teno)': ['TNMD'],
    'Perimysium': ['GCG'],
    'PAX3+ Myogenic Progenitor (PAX3+MyoProg)': ['PAX3'],
    'PAX7+ Myogenic Progenitor (PAX7+MyoProg)': ['PAX7'],
    'Myoblast (MyoB1/MyoB2)': ['MYOD1'],
    'Myocyte (MyoC1/MyoC2)': ['MYOG'],
    'MYH3+ Myocyte (MYH3+MyoC)': ['MYH3'],
    'MYL3+ Myocyte (MYL3+MyoC)': ['MYL3'],
    'Intermediate Muscular Fibroblast (InterMusFibro)': ['ALDH1A3', 'MYH3'],
    'ADH+ Fibroblast (ADH+Fibro)': ['ADH1B', 'ALDH1A3'],
    'STMN2+ Fibroblast (STMN2+Fibro)': ['STMN2'],
    'MFAP5+ Fibroblast (MFAP5+Fibro)': ['MFAP5'],
    'HOXC5+ Dermal Fibroblast Progenitor (HOXC5+DermFibroProg)': ['HOXC5'],
    'F10+ Dermal Fibroblast Progenitor (F10+DermFibroProg)': ['F10'],
    'Dermal Fibroblast (DermFibro)': ['TWIST2'],
    'Smooth Muscle Progenitor (SMProg)': ['IGFBP7'],
    'Smooth Muscle (SMC)': ['PLN', 'ACTA2'],
    'Pericyte': ['KCNJ8'],
    'Neural Fibroblast (NeuralFibro)': ['FOXS1', 'PI16'],
    'Neuronal': ['SST'],
    'Schwann Progenitor (SchwannProg)': ['MPZ'],
    'Schwann': ['MPZ', 'MBP'],
    'Synaptic Schwann (SynapSchwann)': ['COL20A1'],
    'Melanocyte (Melano)': ['MLANA'],
    'Periderm': ['KRT4'],
    'Basal': ['GJB6'],
    'Apical Ectodermal Ridge Basal (AER-Basal)': ['FGF8'],
    'Arterial Endothelial (ArterialEndo)': ['PRND'],
    'Venous Endothelial (VenousEndo)': ['PLVAP'],
    'Lymphoendothelial (LymphEndo)': ['CCL21'],
    'LMPP/ELP (Lymphoid-primed multipotent progenitor/Early lymphoid progenitors)': ['IL7R'],
    'GMPs (Common Myeloid Progenitors/Granulocyte-monocyte Progenitors)': ['PRTN3'],
    'Natural Killer (NK)': ['KLRC1'],
    'B Cells (CD79B)': ['CD79B'],
    'Myelocyte': ['CAMP'],
    'DC2 (Dendritic Cell 2)': ['CLEC10A'],
    'Monocyte': ['S100A12'],
    'Macrophage (Macro)': ['C1QA'],
    'Mast': ['TPSB2'],
    'Megakaryocyte (Megakaryo)': ['CLEC1B'],
    'Definite Erythrocyte (DefErythro)': ['HBB'],
    'Definite Reticulocyte (DefReticulo)': ['TSPO2'],
    'Primitive Erythrocyte 1 (PrimErythro1)': ['UCA1'],
    'Primitive Erythrocyte 2 (PrimErythro2)': ['HBE1'],
}

In [None]:
all_genes_in_adata = set(adata.var_names)

for cell_type, genes in teichmann_markers_dict.items():
    filtered_genes = [gene for gene in genes if gene in all_genes_in_adata]
    teichmann_markers_dict[cell_type] = filtered_genes
print(teichmann_markers_dict)

In [None]:
#sc.tl.leiden(adata, resolution = 0.4, key_added = 'leiden_04scvi')
#sc.tl.leiden(adata, resolution = 0.6, key_added = 'leiden_06scvi')
sc.tl.leiden(adata, resolution = 0.8, key_added = 'leiden_08scvi')
sc.pl.umap(adata, color=[#"leiden_04scvi", "leiden_06scvi", 
    "leiden_08scvi"], legend_loc="on data", frameon=False)

In [None]:
sc.pl.dotplot(adata, teichmann_markers_dict, 'leiden_08scvi',
              use_raw=False, layer="log1p_norm", 
              vmax=3, vmin=0, 
             #cmap='RdBu_r',
              #key='wilcoxon_05scvi',
              #save='topDEGs05scvi_norm.svg'
             )

In [None]:
sc.pl.umap(adata, color=['SCX', 'MKX', 'TNMD', 'KERA', 'FMOD'], 
           frameon=False,
           layer='log1p_norm',
           vmin=0,
           vmax="p99",
           sort_order=False,
           cmap="Reds",
           save='_embryonicfull_tenocyte.svg')

# TENDON REFERENCE ANNOTATION FROM SPATIAL DATA

In [None]:
adata=sc.read_h5ad(os.path.join(RESULTS_FOLDERNAME, 'sc_scVI.h5ad'))
adata.var_names = adata.var_names.astype('str')
adata.var_names_make_unique()
#adata.obs.index = adata.obs['samplename'].astype('str') + '_' + adata.obs['barcode'].astype('str') + '-1'
print(adata.X[0:5,0:5])

In [None]:
adata.X = adata.layers['counts'].copy()
sc.pp.normalize_total(adata, inplace=True, target_sum=None)
sc.pp.log1p(adata)
adata.layers['normcounts'] = adata.X.copy()
print(adata.X[0:5,0:5])

In [None]:
adata.var

In [None]:
adata.var.index = adata.var['ensembl_gene_id']

In [None]:
SPATIAL_FOLDERNAME = 'Spatial/results/'
tendonref = sc.read_h5ad(os.path.join(SPATIAL_FOLDERNAME, 'trainingset.h5ad'))
tendonref

In [None]:
print(tendonref.X[0:5,0:5])

In [None]:
tendonref.layers['counts'] = tendonref.X.copy()
sc.pp.normalize_total(tendonref, inplace=True)
sc.pp.log1p(tendonref)
tendonref.layers['normcounts'] = tendonref.X.copy()

In [None]:
print(tendonref.X[0:5,0:5])

In [None]:
tendonref.var

In [None]:
# Need to harmonise genes
#training set: tendonref
#testing set: adata

concatenated = sc.concat((tendonref, adata))
concatenated

In [None]:
sc.pp.highly_variable_genes(concatenated, flavor="cell_ranger", n_top_genes=2500, subset=True)

In [None]:
#del adata.obsm['_scvi_extra_categorical_covs']

In [None]:
var_names = concatenated.var_names
test = adata[:, var_names].copy()
train = tendonref[:, var_names]

In [None]:
test

In [None]:
train.obs['Tendon'].value_counts()

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
import random

#training set: tendonref
#testing set: adata

def label_train(x):
    if x == 'patellar tendon' or x == 'quadriceps tendon':
        return 1
    else:
        return 0
    
y = train.obs.Tendon.map(label_train)
sel = RandomForestClassifier(n_estimators=1000, random_state=666) 
#500 and 50 gives 3115
#1000 and 666 gives 3562 - chosen
#3000 and 666 gives 3395
#3000 and 3 gives 3462
sel.fit(train.X, y)

In [None]:
predictions = sel.predict(test.X)
np.sum(predictions == 1)

In [None]:
test.obs['predictions'] = predictions
sc.pl.umap(test, color=['predictions'], frameon=False,
           layer='normcounts',
           vmin=0,
           vmax="p99",
           sort_order=False,
           cmap="Reds",
           #save='embryonic_tendon_predictions.svg'
          )

In [None]:
test.write(os.path.join(RESULTS_FOLDERNAME, 'predictions.h5ad'))

In [None]:
del concatenated

In [None]:
adata.obs['predictions'] = 0
adata.obs['predictions'] = predictions

In [None]:
adata.obs['predictions'].value_counts()

In [None]:
adata.var.index = adata.var['Gene']

In [None]:
del adata.obs['predictionsn']

In [None]:
adata.obs['predictionsn'] = adata.obs['predictions'].astype('str').astype('category').replace('1', 'Tendon')
adata.obs['predictionsn'] = adata.obs['predictionsn'].replace('0', 'Not Tendon')
adata.obs['predictionsn'].value_counts()

In [None]:
sc.pl.umap(adata, 
           color=['SCX', 'MKX', 'TNMD', 'KERA', 'FMOD', 'THBS2', 'THBS4', 'EGR1', 
                  'ABI3BP', 'GAS2', 'COL3A1', 'COL1A1', 'COL6A6', 'FGF14', 'SOX9', 'predictions'
                 ], 
           layer='normcounts',
           vmin=0,
           vmax="p99",
           sort_order=False,
           frameon=False,
           cmap="Purples",
           save = '_embryonicfull_tenocyte_predictions.png'
          )

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, 'sc_scVI.h5ad'))

In [None]:
adata.var.index = adata.var['ensembl_gene_id']

In [None]:
sc.tl.score_genes(adata, gene_list=tendonref.var_names, ctrl_size=len(tendonref.var_names), score_name='tendon_sp_score',
                  random_state=10)

In [None]:
# Plot the distribution
plt.figure(figsize=(10, 6))
sns.histplot(adata.obs['tendon_sp_score'], bins=50, kde=True, color='skyblue')
plt.title('Distribution of Tendon Sp Score')
plt.xlabel('Tendon Sp Score')
plt.ylabel('Frequency')
plt.show()

In [None]:
sc.pl.umap(adata, color=['tendon_sp_score'], 
           frameon=False,
           vmax=0.225,vmin=0.1,vcenter=0.17,
           sort_order=False,
           cmap="bwr")

# SUBSETTING

In [None]:
adata = sc.read_h5ad(os.path.join(RESULTS_FOLDERNAME, 'sc_scVI.h5ad'))
adata

In [None]:
adata.var_names = adata.var_names.astype('str')
adata.var_names_make_unique()

In [None]:
adata.uns['log1p']['base'] = None
adata.obs['predictions'] = adata.obs['predictions'].astype("category")
sc.tl.rank_genes_groups(adata, groupby='predictions', method='wilcoxon', key_added='wilcoxon_pred', 
                        use_raw=False, layer='log1p_norm')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, key='wilcoxon_pred')

In [None]:
adata = adata[adata.obs['predictions']==1, :].copy()
adata

In [None]:
# List of batch effect columns to check
columns_to_check = ["samplename", "kit", "seq_protocol", "sex"]

# Threshold for minimum number of cells in each batch category
threshold = 20
mask = pd.Series(True, index=adata.obs.index)

for column in columns_to_check:
    value_counts = adata.obs[column].value_counts()
    categories_to_exclude = value_counts[value_counts < threshold].index
    mask &= ~adata.obs[column].isin(categories_to_exclude)
adata = adata[mask]
adata

In [None]:
del adata.raw

In [None]:
adata.X = adata.layers['counts'].copy()
print(adata.X[0:5,0:5])

In [None]:
print(adata.layers['normcounts'][0:5,0:5])

In [None]:
sc.pp.normalize_total(adata, target_sum=None, inplace=True)
sc.pp.log1p(adata)
adata.layers["log1p_norm"] = adata.X.copy()
print(adata.X[0:5,0:5])

In [None]:
adata.layers["log1p_norm"] = adata.X.copy()

In [None]:
sc.pp.filter_genes(adata, min_counts=5, inplace=True)
sc.pp.filter_cells(adata, min_genes=200)

In [None]:
adata

In [None]:
sc.pp.highly_variable_genes(adata, flavor="cell_ranger", n_top_genes=2500, subset=False)

In [None]:
sc.pp.scale(adata)
adata.layers["scaled"] = adata.X.copy()

In [None]:
adata.obsm['X_pca'] = sc.pp.pca(adata[:,adata.var.highly_variable].layers["scaled"], n_comps=50, svd_solver="arpack")

In [None]:
sc.pp.neighbors(adata, use_rep="X_pca", metric='correlation')
sc.tl.umap(adata)
plot_umaps(adata, ['hospital_id', 'norm_sample_stage', 'samplename', 'kit', 'seq_protocol', 'sex'], 
           filename = 'sc_UMAP_plots_unintegrated_PredictedSubset.svg')

In [None]:
sc.pl.umap(adata, 
           color=['SCX', 'MKX', 'TNMD', 'KERA', 'FMOD', 'THBS2', 'THBS4', 'EGR1', 
                  'ABI3BP', 'GAS2', 'COL3A1', 'COL1A1', 'COL6A6', 'FGF14', 'SOX9'], 
           layer='log1p_norm',
           vmin=0,
           vmax="p99",
           sort_order=False,
           frameon=False,
           cmap="Purples",
           save = '_embryonicSubsetUnintegrated_tenocyte.svg')

In [None]:
adata.X = adata.layers["counts"].copy()
print(adata.X[1:5,1:5])

In [None]:
import ray
import hyperopt
from ray import tune
from scvi import autotune

model_cls = scvi.model.SCVI
model_cls.setup_anndata(adata)
scvi_tuner = autotune.ModelTuner(model_cls)
scvi_tuner.info()

In [None]:
search_space = {
    "n_latent": tune.choice([10, 14, 6]),
    "n_hidden": tune.choice([128, 256]),
    "n_layers": tune.choice([1, 3]),
    "gene_likelihood": tune.choice(['nb'])
}

In [None]:
ray.init(log_to_driver=False)

In [None]:
results = scvi_tuner.fit(
    adata,
    metric="validation_loss",
    search_space=search_space,
    searcher='hyperopt',
    num_samples=30,
    max_epochs=120,
    resources={"cpu": 20, "gpu": 1},
)

In [None]:
print(results.model_kwargs)
print(results.train_kwargs)

In [None]:
ray.shutdown()

In [None]:
scvi.model.SCVI.setup_anndata(adata, 
                              layer="counts", 
                              categorical_covariate_keys=["samplename", "seq_protocol", "sex"], 
                              )

In [None]:
vae = scvi.model.SCVI(adata, n_hidden = 128, n_latent=14, n_layers=1, dropout_rate=0.1, dispersion="gene-batch", 
                      gene_likelihood='nb')
vae.view_anndata_setup(adata)

In [None]:
%%time
vae.train(max_epochs = 400, train_size = 0.9, validation_size = 0.1, 
          use_gpu=True, accelerator='gpu', 
          check_val_every_n_epoch=1,
          early_stopping=True,
          early_stopping_patience=23,
          early_stopping_monitor="elbo_validation"
         )

In [None]:
# Ensure convergence
train_test_results = vae.history["elbo_train"]
train_test_results["elbo_validation"] = vae.history["elbo_validation"]
train_test_results.iloc[10:].plot(logy=True)  # exclude first 10 epochs
plt.show()

In [None]:
adata.obsm["X_scVI"] = vae.get_latent_representation()
sc.pp.neighbors(adata, use_rep="X_scVI", metric='correlation')
sc.tl.umap(adata)

In [None]:
def plot_umaps(anndata, parameters: list, filename: str):
    n_plots = len(parameters)
    fig, axs = plt.subplots(n_plots, 1, figsize=(8, 4*n_plots))
    for i, param in enumerate(parameters):
        sc.pl.umap(anndata, color=param, ax=axs[i], show=False, frameon=False, s=50)
        axs[i].set_title(param)
    plt.tight_layout()
    savesvg(filename, fig)
    plt.show()
plot_umaps(adata, ['hospital_id', 'norm_sample_stage', 'samplename', 'kit', 'seq_protocol', 'sex'],
           filename = 'sc_UMAP_plots_scVI_PredictedSubset.svg')

In [None]:
vae.save(os.path.join(RESULTS_FOLDERNAME, "EmbryonicTENDONscVI_SamplenameSeqprotSex/"), overwrite=True)

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, 'predictedsubset_scVI.h5ad'))

# ANNOTATION

In [None]:
adata = sc.read_h5ad(os.path.join(RESULTS_FOLDERNAME, 'predictedsubset_scVI.h5ad'))
adata

In [None]:
sc.pl.umap(adata, 
           color=['SCX', 'MKX', 'TNMD', 'KERA', 'FMOD', 'THBS2', 'THBS4', 'EGR1', 
                  'ABI3BP', 'GAS2', 'COL3A1', 'COL1A1', 'COL6A6', 'FGF14', 'SOX9', 'NEGR1'], 
           layer='log1p_norm',
           vmin=0,
           vmax="p99",
           sort_order=False,
           frameon=False,
           cmap="Purples",
           save = '_embryonicSubsetscVI_tenocyte.svg')

In [None]:
sc.tl.leiden(adata, resolution = 0.3, key_added = 'leiden_03scvi')
sc.pl.umap(adata, color=["leiden_03scvi"], legend_loc="on data", frameon=False)

In [None]:
adata.obs['leiden_03scvi_orig'] = adata.obs['leiden_03scvi']
adata.obs['leiden_03scvi'] = adata.obs['leiden_03scvi'].astype("category")
adata.obs['leiden_03scvi'] = np.where(
    adata.obs['leiden_03scvi'] == '4', '1', adata.obs['leiden_03scvi']
)
adata.obs['leiden_03scvi'].value_counts()

In [None]:
sc.pl.umap(adata, color=["leiden_03scvi"], legend_loc="on data", frameon=False)

In [None]:
#adata.uns['log1p']['base']=None
sc.tl.rank_genes_groups(adata, groupby='leiden_03scvi', method='wilcoxon', key_added='wilcoxon_03scvi', 
                        use_raw=False, layer='log1p_norm')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, key='wilcoxon_03scvi')

In [None]:
sc.tl.dendrogram(adata, 'leiden_03scvi')
sc.pl.dendrogram(adata, groupby='leiden_03scvi')

In [None]:
adata.var_names = adata.var_names.astype('str')
adata.var_names_make_unique()

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=10, groupby='leiden_03scvi',
                                use_raw=False, 
                                layer="log1p_norm", 
                                vmax=3, vmin=0, 
                                #cmap='RdBu_r',
                                key='wilcoxon_03scvi', figsize=(15,3),
                                save='topDEGs03scvi_normupd.svg'
                               )

In [None]:
sc.tl.filter_rank_genes_groups(
    adata,
    min_in_group_fraction=0.25,
    max_out_group_fraction=0.25,
    key="wilcoxon_03scvi",
    key_added="wilcoxon_03scvi_filtered",
)

In [None]:
sc.pl.rank_genes_groups_heatmap(adata, n_genes=15, key="wilcoxon_03scvi_filtered", groupby="leiden_03scvi",
                                layer='log1p_norm', show_gene_labels=True, figsize=(15,10),
                                cmap='YlOrRd', save='heatmap_filteredupd.svg'
                                )

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=10, groupby='leiden_03scvi',
                                use_raw=False, 
                                layer="log1p_norm", 
                                vmax=3, vmin=0, figsize=(15,3),
                                #cmap='RdBu_r',
                                key='wilcoxon_03scvi_filtered',
                                save='topDEGs03scvi_norm_filtered.svg'
                               )

In [None]:
label_mapping = {
     'pcw7.1 -1': 7.1,
     'pcw5.6': 5.6,
     'pcw9.3-1 ': 9.3,
     'pcw6.1': 6.1,
     'pcw9.0-1 ': 9.0,
     'pcw7.2-2 ': 7.2,
     'pcw9.0-2 ': 9.0,
     'pcw8.0': 8.0,
     'pcw6.5': 6.5,
     'pcw9.3-2': 9.3,
     'pcw8.4-1 ': 8.4,
     'pcw7.2-1 ': 7.2,
     'pcw8.4-2': 8.4
}

adata.obs['ageint'] = adata.obs['norm_sample_stage'].map(label_mapping)
adata.obs['ageint'].value_counts()

In [None]:
sc.pl.umap(adata, 
           color=['norm_sample_stage', 'ageint', 'sex', 'samplename'], 
           layer='log1p_norm', sort_order=False,
           frameon=False, cmap='gnuplot',
           save = '_subset_tendongenes_agesexsamplename.svg'
          )

In [None]:
sc.pl.umap(adata, 
           color=['norm_sample_stage', 'ageint', 'phase', 'samplename'], 
           layer='log1p_norm', sort_order=False,
           frameon=False, cmap='gnuplot',
           save = '_subset_tendongenes_agephasesamplename.svg'
          )

In [None]:
sc.pl.umap(adata, 
           color=['SCX', 'MKX', 'TNMD', 'KERA', 'FMOD', 'THBS2', 'THBS4', 'EGR1', 
                  'ABI3BP', 'GAS2', 'COL3A1', 'COL1A1', 'FGF14', 'FSTL5', 'SOX9', 'leiden_03scvi'], 
           layer='log1p_norm',
           vmin=0,
           vmax="p99",
           sort_order=False,
           frameon=False,
           cmap="Reds",
           save = '_subset_scVI_tendongenes.svg'
          )

In [None]:
result = adata.uns['wilcoxon_03scvi']
groups = result['names'].dtype.names
df = pd.DataFrame(
    {group + '_' + key: result[key][group]
    for group in groups 
    for key in ['names','scores','logfoldchanges', 'pvals', 'pvals_adj']})
#df.to_csv(os.path.join(RESULTS_FOLDERNAME, 'wilcoxon_DGE_leiden03scvi.csv'))
df.head(5)

In [None]:
result = adata.uns['wilcoxon_03scvi_filteredupd']
groups = result['names'].dtype.names
df = pd.DataFrame(
    {group + '_' + key: result[key][group]
    for group in groups 
    for key in ['names','scores','logfoldchanges', 'pvals', 'pvals_adj']})
df.to_csv(os.path.join(RESULTS_FOLDERNAME, 'wilcoxon_DGE_leiden03scvi_filtered.csv'))
df.head(5)

In [None]:
sc.pl.umap(adata, 
           color=['FGF7', 'TNXB', 'LUM', 'DCN', 'FST',
                  'COL23A1', 'NAV3', 'NRXN1', 'ERG', 'ADAM22', 'TWIST2', 'CREB5', 'CELF2',
                  'COL9A1', 'ACAN', 'RUNX2', 'SOX5', 'BGN', 'MEOX2',
                  'leiden_03scvi'], 
           layer='log1p_norm',
           vmin=0,
           vmax="p99",
           sort_order=False,
           frameon=False,
           cmap="Reds",
           #save = '_subset_DEGannot.svg'
          )

In [None]:
foetal = sc.read_h5ad(os.path.join('../foetal/results/scVI/dev_scANVI.h5ad'))
foetal

In [None]:
foetal = foetal[foetal.obs['age'] == '12w', :].copy()
foetal.n_obs

In [None]:
foetal.obs['C_scANVI_orig'] = foetal.obs['C_scANVI']

foetal.obs['C_scANVI'] = np.where(
    foetal.obs['C_scANVI'] == 'ABI3BP GAS2 Fibroblasts 1', 'ABI3BP GAS2 Fibroblasts', foetal.obs['C_scANVI']
)
foetal.obs['C_scANVI'] = np.where(
    foetal.obs['C_scANVI'] == 'ABI3BP GAS2 Fibroblasts 2', 'ABI3BP GAS2 Fibroblasts', foetal.obs['C_scANVI']
)
#foetal.obs['C_scANVI'] = np.where(
#    foetal.obs['C_scANVI'] == 'COL6A6 FNDC1 Fibroblasts', 'COL3A1 PI16 Fibroblasts', foetal.obs['C_scANVI']
#)
#foetal.obs['C_scANVI'] = np.where(
#    foetal.obs['C_scANVI'] == 'Smooth Myocytes', 'Mural Cells', foetal.obs['C_scANVI']
#)

print(foetal.obs['C_scANVI'].value_counts())

In [None]:
sc.tl.rank_genes_groups(foetal, groupby='C_scANVI', method='wilcoxon', key_added='wilcoxon_scanvi12w', 
                        use_raw=False, layer='log1p_norm')
sc.pl.rank_genes_groups(foetal, n_genes=25, sharey=False, key='wilcoxon_scanvi12w')

In [None]:
foetal_markers_dict = {}
for cell_type in foetal.obs['C_scANVI'].unique():
    top_degs = foetal.uns['wilcoxon_scanvi12w']['names'][cell_type][0:100]
    # Add the cell type and marker gene to the dictionary
    foetal_markers_dict[cell_type] = top_degs

In [None]:
cell_annotation = sc.tl.marker_gene_overlap(adata, foetal_markers_dict, key='wilcoxon_03scvi')
cell_annotation_norm = sc.tl.marker_gene_overlap(adata, foetal_markers_dict, key='wilcoxon_03scvi', normalize='reference')

fig, ax = plt.subplots(figsize=(6,6))
sns.heatmap(cell_annotation_norm, linewidths=0.5, linecolor='white', cbar=True, annot=True, ax=ax)
plt.grid(False)
# Save the plot to a file (e.g., in PNG format)
savesvg('C_scANVIvsEmbTendonClusters03Leiden.svg', plt)
plt.show()

In [None]:
gene_markers = {'ECM genes':['ABI3BP','COL1A1',"COL12A1",'COL3A1','COL6A6',"SPARC","POSTN","DCN","BGN",'KERA','LUM','FBN1'],
                'Tenocyte genes': ['SCX', 'MKX', 'TNMD', 'FMOD', 'THBS2', 'THBS4', 'EGR1'],
                'Enth Prog': ['SOX9', 'ACAN', 'GLI1']
               }
sc.pl.dotplot(adata, gene_markers, groupby='leiden_03scvi', dendrogram=True, 
                 save='_subset_tenocytemarkers.svg',
                 layer='log1p_norm', vmin=0, vmax=3)

In [None]:
foetal.var_names = foetal.var_names.astype('str')
foetal.var_names_make_unique()

In [None]:
markers_full_dict = {
    'chondrocytes': ['COL2A1', 'SOX9', 'COL9A1', 'ACAN', 'COMP', 'HAPLN1', 'MATN1'],
    'fibroblasts (general)': [
        "COL1A1",
        "COL12A1",
        "SPARC",
        "POSTN",
        "DCN",
        #"BGN"
    ],
    'fibroblasts (type 1)': [
        "SOX5",
        "COL11A1",
        "ABI3BP",
        "GAS2",
        #"COL24A1", # could regulate type I collagen fibrillogenesis, upregulated in human tendinopathy
        "FMOD",
        "TNMD",
        "MKX",
        "KERA",
        "SCX"
    ],
    'fibroblasts (type 2)': [
        "COL3A1", 
        "COL6A6",
        "DCLK1",
        "EBF1",
        "TSHZ2",
        "PLAGL1",
        "VCAN",
        "FBN1",
        "NOVA1",
        "NEGR1",
        "NAV3",
        "LUM",
        "SEMA5A",
        "TNXB",
        "PI16",
        "SCN7A",
        "CDH18",
    ],
    
    'fibroblasts (type 3)': [
        "FGF14",
        "FSTL5",
        "THBS4",
        "BMP5",
        "CDH12"
    ],
    'dividing cells': [
        "MKI67",
        "DIAPH3",
        "CENPK",
        "CENPP",
        "TOP2A"
    ],
    'interfascicular maxtrix (general)': [
        'COL4A1', 
        'COL4A2', 
        'HSPG2', # perlecan
        'NID1',  #nidogen-1
        'LAMB1', 
        'LAMC1', 
        'ITGA6', 
        'ITGB1',
    ],
    'vascular endothelial cells': [
        "CDH5",    # Cadherin 5
        "VWF",     # Von Willebrand factor
        "PECAM1",  # Platelet and endothelial cell adhesion molecule 1
        "CD34",
        "ENG",
        "TEK"
    ],
    'lymphatic endothelial cells': [
        "LYVE1",   # Lymphatic vessel endothelial hyaluronan receptor 1
        "PROX1",   # Prospero homeobox 1
        "FLT4",    # Fms-related tyrosine kinase 4 (VEGFR3)
    ],
    'nervous system cells': [
        "NRXN1",
        "XKR4",
        "SLC35F1",
        "NCAM2",
        "PTPRZ1",
        "FIGN",
        "IL1RAPL2",
        "CDH6",
        "GRID2",
        "SOX10"
    ],
    'smooth myocytes': [
        "ACTA2",   # Alpha-smooth muscle actin
        "MYH11",   # Smooth muscle myosin heavy chain
        "NOTCH3",
        #"DES",     # Desmin
        #"VIM",     # Vimentin
        #"SMTN",    # Smoothelin
        "CALD1",   # Caldesmon
        "PDGFRB",
    ],
    'satellite/myoblast cells': ['PAX7', 'DES', 'CDH15', 'NES', 'DLK1', 'NCAM1', 'MYO18B', 'RYR3', 'MYF5', 'MYOD1', 'MYOG'], 
    'skeletal myocytes': ['TNNT3', 'TTN', 'MYH3', 'COL22A1', 'TNNC1'],
    'immune cells': [
        'PTPRC',#'FOXP3',
        'CD44',
        'IKZF1', 
        'RUNX1', 
        'DOCK2', 
        'INPP5D',
    ],
    'macrophages': [
        'MRC1', 'F13A1', 
        'CSF1R', 'CD163', 
        'CD68', 'CD36',
        #'LRMDA', 'ITPR2',
        'LGMN'
    ], 
    'lymphoid': [
        'SKAP1', 
        'THEMIS', 
        'CD247',
        'EPB41', 
        'PIP4K2A', 
        'PRKCB',
        'CD96',
        'CD38'
    ],
    'unknown progenitors': [
        'KIT',
        'TFRC',
        'IL18R1',
        'MITF',
        'BMP2K', 
        'GATA2',
    ]
}

all_genes_in_adata = set(adata.var_names)
for cell_type, genes in markers_full_dict.items():
    filtered_genes = [gene for gene in genes if gene in all_genes_in_adata]
    markers_full_dict[cell_type] = filtered_genes

sc.pl.dotplot(adata, markers_full_dict, groupby='leiden_03scvi', dendrogram=True, 
                 #save='_subset_tenocytemarkers.svg',
                 layer='log1p_norm', vmin=0, vmax=3)

In [None]:
del foetal

In [None]:
adata.obs['cell_type'] = adata.obs['leiden_03scvi'].astype(int)
cell_type_names = { 
    0: 'FGF14 SCX Progenitors', 
    1: 'SOX9 SCX Progenitors', 
    2: 'COL6A6 TWIST2 Progenitors', 
    3: 'Unknown',
}
adata.obs['cell_type'] = adata.obs['cell_type'].replace(cell_type_names)

In [None]:
adata.obs['cell_type'].value_counts()

# scANVI INTEGRATION

In [None]:
vae = scvi.model.SCVI.load(os.path.join(RESULTS_FOLDERNAME, "EmbryonicTENDONscVI_SamplenameKitSeqprotSex/"), 
                           use_gpu=True, adata=adata)
vae

In [None]:
lvae = scvi.model.SCANVI.from_scvi_model(
    vae,
    adata=adata,
    labels_key="cell_type",
    unlabeled_category="Unknown",
)

In [None]:
lvae.train(max_epochs=20)

In [None]:
lvae.save(os.path.join(RESULTS_FOLDERNAME, "EmbryonicTENDONscVI2_scANVI/"), overwrite=True)

In [None]:
adata.obs["C_scANVI"] = lvae.predict(adata)
adata.obsm["X_scANVI"] = lvae.get_latent_representation(adata)

In [None]:
sc.pp.neighbors(adata, use_rep="X_scANVI", metric='correlation')
sc.tl.umap(adata)

In [None]:
plot_umaps(adata, ['hospital_id', 'norm_sample_stage', 'samplename', 'seq_protocol', 'sex', 'leiden_03scvi', 'cell_type', 'C_scANVI'], 
           filename = 'embryoTendonSubset_scANVImetadata.svg')

In [None]:
plot_umaps(adata, ['hospital_id', 'norm_sample_stage', 'samplename', 'kit', 'seq_protocol', 'sex'], 
           filename = 'embryoTendonSubset_scANVImetadataorig.svg')

In [None]:
adata.obs['cell_type'].value_counts()

In [None]:
adata.obs['C_scANVI'].value_counts()

In [None]:
df = adata.obs.groupby(["cell_type", "C_scANVI"]).size().unstack(fill_value=0)
conf_mat = df / df.sum(axis=1).values[:, np.newaxis]

In [None]:
plt.figure(figsize=(8, 8))
_ = plt.pcolor(conf_mat)
_ = plt.xticks(np.arange(0.5, len(df.columns), 1), df.columns, rotation=90)
_ = plt.yticks(np.arange(0.5, len(df.index), 1), df.index)
plt.xlabel("Predicted")
plt.ylabel("Observed")
#savesvg('scANVI_label_predictions.svg', plt)

In [None]:
sc.tl.leiden(adata, resolution = 0.3, key_added = 'leiden_03scanvi')
sc.pl.umap(adata, color = ['C_scANVI', 'leiden_03scanvi'], add_outline=True, legend_loc='on data',
               legend_fontsize=6, legend_fontoutline=1, frameon=False)

In [None]:
adata

In [None]:
sc.tl.rank_genes_groups(adata, groupby='C_scANVI', method='wilcoxon', key_added='wilcoxon_cscanvi', 
                        use_raw=False, layer='log1p_norm')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, key='wilcoxon_cscanvi')

In [None]:
result = adata.uns['wilcoxon_cscanvi']
groups = result['names'].dtype.names
df = pd.DataFrame(
    {group + '_' + key: result[key][group]
    for group in groups 
    for key in ['names','scores','logfoldchanges', 'pvals', 'pvals_adj']})
df.to_csv(os.path.join(RESULTS_FOLDERNAME, 'wilcoxon_DGE_C_scANVI.csv'))
df.head(5)

In [None]:
sc.tl.filter_rank_genes_groups(
    adata,
    min_in_group_fraction=0.25,
    max_out_group_fraction=0.25,
    key="wilcoxon_cscanvi",
    key_added="wilcoxon_cscanvi_filtered",
)

In [None]:
result = adata.uns['wilcoxon_cscanvi_filtered']
groups = result['names'].dtype.names
df = pd.DataFrame(
    {group + '_' + key: result[key][group]
    for group in groups 
    for key in ['names','scores','logfoldchanges', 'pvals', 'pvals_adj']})
df.to_csv(os.path.join(RESULTS_FOLDERNAME, 'wilcoxon_DGE_C_scANVI_filtered.csv'))
df.head(5)

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=10, groupby='C_scANVI',
                                use_raw=False, 
                                layer="log1p_norm", 
                                vmax=3, vmin=0, 
                                #cmap='RdBu_r',
                                key='wilcoxon_cscanvi', figsize=(15,3),
                                save='topDEGsCscanvi_normupd.svg'
                               )

In [None]:
sc.tl.filter_rank_genes_groups(
    adata,
    min_in_group_fraction=0.25,
    max_out_group_fraction=0.25,
    key="wilcoxon_cscanvi",
    key_added="wilcoxon_cscanvi_filtered",
)

sc.pl.rank_genes_groups_heatmap(adata, n_genes=15, key="wilcoxon_cscanvi_filtered", groupby="C_scANVI",
                                layer='log1p_norm', show_gene_labels=True, figsize=(15,10),
                                cmap='YlOrRd', save='heatmap_filteredCscanvi.svg')

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=15, groupby='C_scANVI',
                                use_raw=False, 
                                layer="log1p_norm", 
                                vmax=3, vmin=0, figsize=(15,3),
                                #cmap='RdBu_r',
                                key='wilcoxon_cscanvi_filtered',
                                save='topDEGsCscanvi_norm_filtered.svg'
                               )

In [None]:
sc.pl.umap(adata, 
           color=['SCX', 'MKX', 'TNMD', 'KERA', 'FMOD', 'THBS2', 'THBS4', 'EGR1', 
                  'ABI3BP', 'GAS2', 'COL3A1', 'COL1A1', 'FGF14', 'FSTL5', 'SOX9', 'C_scANVI'], 
           layer='log1p_norm',
           vmin=0,
           vmax="p99",
           sort_order=False,
           frameon=False,
           cmap="Greens",
           save = '_subset_scANVI_tendongenes.svg'
          )

In [None]:
sc.pl.umap(adata, 
           color=['COL6A6', 'DCLK1', 'CELF2', 'NAV3', 'CREB5', 'FST','TWIST2', 'TNXB',
                  'COL9A1', 'HAPLN1', 'MATN4', 'RUNX2', 
                  'CHODL', 'CDH12', 'GCG',
                  'C_scANVI'], 
           layer='log1p_norm',
           vmin=0,
           vmax="p99",
           sort_order=False,
           frameon=False,
           cmap="Greens",
           save = '_subset_DEGannotCscANVI.svg'
          )

In [None]:
cell_annotation = sc.tl.marker_gene_overlap(adata, foetal_markers_dict, key='wilcoxon_cscanvi')
cell_annotation_norm = sc.tl.marker_gene_overlap(adata, foetal_markers_dict, key='wilcoxon_cscanvi', normalize='reference')

In [None]:
fig, ax = plt.subplots(figsize=(5,6))
sns.heatmap(cell_annotation_norm, linewidths=0.5, linecolor='white', cbar=True, annot=True, ax=ax)
plt.grid(False)
# Save the plot to a file (e.g., in PNG format)
savesvg('C_scANVIvsEmbTendonClustersCscANVI.svg', plt)
plt.show()

In [None]:
sc.pl.umap(adata, 
           color=['phase', 'ageint', 'sex', 'samplename'], 
           layer='log1p_norm', sort_order=False,
           frameon=False, cmap='gnuplot',
           save = '_subset_tendongenes_agesexsamplename.svg'
          )

In [None]:
adata

In [None]:
#del adata.uns['wilcoxon_03scanvi_filtered'], adata.uns['wilcoxon_cscanvi_filtered']
#del adata.uns['wilcoxon_03scanvi']
#del adata.uns['wilcoxon_cscanvi']
#del adata.uns['_scvi_manager_uuid'], adata.uns['_scvi_uuid']
del adata.uns['wilcoxon_03scvi_filtered']

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, 'tendonsubset_scANVI.h5ad'))