## Notebook for the Cancer reference stem - immune cells preparation

**Developed by**: Anna Maguza  
**Institute of Computational Biology - Computational Health Centre - Hemlholtz Munich**  
**30 June 2023**  

#### Load required packages

In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as an
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import scipy as sci

In [2]:
import scrublet

#### Setup Cells

In [3]:
%matplotlib inline

In [4]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

  from .autonotebook import tqdm as notebook_tqdm


scanpy==1.9.3 anndata==0.8.0 umap==0.5.3 numpy==1.23.5 scipy==1.9.1 pandas==1.3.5 scikit-learn==1.2.2 statsmodels==0.13.5 pynndescent==0.5.8


In [5]:
def X_is_raw(adata):
    return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis=0))

#### Upload Data

In [6]:
adata = sc.read_h5ad('/Users/anna.maguza/Desktop/Data/Gut_project/Joanito_cancer/anndata/Joanito_raw_anndata_tumor_cells.h5ad')
X_is_raw(adata)

True

In [7]:
adata_epi = sc.read_h5ad('/Users/anna.maguza/Desktop/Data/Processed_datasets/Cancer_dataset_integration/Labels_transfer/scBalance/Joanito_predicted_labels_with_scBalance_7000.h5ad')
X_is_raw(adata_epi)

False

In [8]:
# Return to the raw counts
adata_epi = adata_epi.raw.to_adata()

In [9]:
X_is_raw(adata_epi)

True

### Data filtering

In [11]:
adata_epi = adata_epi[adata_epi.obs['Predicted Label'].isin(['Stem cells'])]

In [16]:
adata_epi.obs['Unified_Cell_States'] = adata_epi.obs['Predicted Label']

  adata_epi.obs['Unified_Cell_States'] = adata_epi.obs['Predicted Label']


In [10]:
adata.obs['Unified_Cell_States'] = adata.obs['Cell Type']

In [14]:
adata = adata[adata.obs['Unified_Cell_States'].isin(['T cells', 'McDC', 'Plasma cells', 'B cells', 'Neutrophils', 'Mast', 'pDC'])]

In [17]:
# Merge dataset
adata_final = adata.concatenate(adata_epi, index_unique=None)

  warn(
  [AnnData(sparse.csr_matrix(a.shape), obs=a.obs) for a in all_adatas],


In [22]:
adata.write_h5ad('/Users/anna.maguza/Desktop/Data/Processed_datasets/Cancer_dataset_integration/input_files/Stem_immune_cells/Cancer_stem_immune_cells.h5ad')