# Label Transfer of Healthy Control cell compartments via Haber 2020 to spatial dataset

In [1]:
from pathlib import Path
import scanpy as sc
import squidpy as sq
import pandas as pd
import os
import tempfile

import anndata
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import scvi
import seaborn as sns
import torch
from scvi.model.utils import mde

  from .autonotebook import tqdm as notebook_tqdm


## Set Device

In [2]:
if torch.cuda.is_available():
    print(f"Number of available CUDA devices: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")
else:
    print("No CUDA devices available")

Number of available CUDA devices: 2
Device 0: NVIDIA RTX 6000 Ada Generation
Device 1: NVIDIA RTX 6000 Ada Generation


In [3]:
device = torch.cuda.device(0)

## Data Prep

In [4]:
global_repo_data = Path.cwd() / '..' / '.data'
local_folder_data = Path.cwd() / '.data'
figures_dir = Path.cwd() / 'figures'
figures_dir.mkdir(exist_ok=True)


In [5]:
'''Labelled reference dataset'''
adata_copd = sc.read_h5ad(global_repo_data / 'Marburg_cell_states_locked_ctl240709.raw.h5ad')
adata_copd = adata_copd[adata_copd.obs['group'] == 'healthy_ctrl'].copy()
cell_states = adata_copd.obs['cell_states']
batches_copd = list(adata_copd.obs['batch'].unique())
adata_copd

AnnData object with n_obs × n_vars = 22588 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', 'IAV_score', 'group', 'Viral_score', 'cell_type', 'cell_states', 'leiden', 'cell_compartment', '_scvi_batch', '_scvi_labels', 'C_scANVI', 'viral_counts', 'infected_status', 'seed_labels', 'batch-scANVI'
    var: 'mt', 'ribo'
    uns: 'cell_compartment_colors', 'cell_states_colors', 'disease_colors', 'group_colors', 'infection_colors'
    obsm: 'X_scANVI', 'X_umap'

In [6]:
'''Unlabelled query dataset'''
adata_spatial = sc.read_h5ad(local_folder_data / 'Franzen_L_2024_human.h5ad')
adata_spatial

AnnData object with n_obs × n_vars = 101699 × 27260
    obs: 'in_tissue', 'array_row', 'array_col', 'orig.ident', 'sample_name_x', 'sample_id', 'slide_id_x', 'slide_ca_x', 'condition_x', 'subject_alias_x', 'species_x', 'tissue_x', 'subject_gender_x', 'tissue_alias_x', 'B_tissue_selection_x', 'fibrotic_extent_score_by_pathologist_0.3', 'replicate_x', 'annotation', 'annotation_FF', 'nCount_RNA', 'nFeature_RNA', 'nCount_SCT', 'nFeature_SCT', 'percent.mt', 'percent.hb', 'percent.rp', 'percent.chrY', 'factor_14_cutoff', 'f14_subclusters', 'f14_c0_nbs_clusters', 'd_c0', 'r_dist_F14_C0', 'c2l_AT1', 'c2l_AT2', 'c2l_B.Cells', 'c2l_Basal', 'c2l_Ciliated', 'c2l_Differentiating.Ciliated', 'c2l_Endothelial.Cells', 'c2l_Fibroblasts', 'c2l_HAS1.High.Fibroblasts', 'c2l_KRT5..KRT17.', 'c2l_Lymphatic.Endothelial.Cells', 'c2l_MUC5AC..High', 'c2l_MUC5B.', 'c2l_Macrophages', 'c2l_Mast.Cells', 'c2l_Mesothelial.Cells', 'c2l_Monocytes', 'c2l_Myofibroblasts', 'c2l_NK.Cells', 'c2l_PLIN2..Fibroblasts', 'c2l_Plas