# Integration of BAL Samples (v12)

In [1]:
import scanpy as sc
import sc_utils
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
import warnings
warnings.simplefilter(action = 'ignore', category = FutureWarning)

In [2]:
data_dir = "/projects/b1038/Pulmonary/cpuritz/PASC/data"

In [3]:
in_dir = "01BAL/01integrated_BAL_v11"
in_name = "01integrated_BAL_v11"
adata = sc.read_h5ad(f"{data_dir}/{in_dir}/{in_name}.h5ad")
adata

AnnData object with n_obs × n_vars = 241434 × 1000
    obs: 'SC ID', 'is_PASC', 'Status', 'Study_ID', 'old_cell_type', 'n_genes_by_counts', 'total_counts', 'n_genes', '_scvi_batch', 'cluster', 'cell_type'
    var: 'gene_ids', 'feature_types', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection'
    uns: '_scvi_manager_uuid', '_scvi_uuid', 'cell_type_colors', 'cluster_colors', 'hvg', 'leiden', 'leiden_scVI_colors', 'log1p', 'neighbors', 'old_cell_type_colors', 'rank_genes_groups', 'umap'
    obsm: 'X_scVI', 'X_umap'
    layers: 'counts'
    obsp: 'connectivities', 'distances'

In [4]:
del adata.obs["old_cell_type"]

In [5]:
adata.obs['cell_type'] = adata.obs['cell_type'].replace('Classical monocytes', 'Monocytes-1')
adata.obs['cell_type'] = adata.obs['cell_type'].replace('Non-classical monocytes', 'Monocytes-2')

In [6]:
adata.obs['cell_type'] = adata.obs['cell_type'].replace('MoAM-3', 'MoAM-4-temp')
adata.obs['cell_type'] = adata.obs['cell_type'].replace('MoAM-4', 'MoAM-3')
adata.obs['cell_type'] = adata.obs['cell_type'].replace('MoAM-4-temp', 'MoAM-4')

In [7]:
adata.obs.cell_type.cat.categories

Index(['TRAM-1', 'TRAM-2', 'CD4 T cells-1', 'TRAM-3', 'CD8 T cells-1',
       'MoAM-1', 'Monocytes-2', 'Monocytes-1', 'TRAM-4', 'MoAM-2',
       'CD8 T cells-2', 'MoAM-4', 'CD8 T cells-3', 'TRAM-5', 'MoAM-3',
       'CD4 T cells-2', 'Perivascular macrophages',
       'Proliferating macrophages', 'TRAM-6', 'DC2', 'gdT cells and NK cells',
       'TRAM-7', 'Tregs', 'Proliferating T cells', 'B cells', 'DC1',
       'Migratory DC', 'pDC', 'Mast cells', 'Plasma cells', 'Epithelial cells',
       'SARS-CoV-2'],
      dtype='object')

## Recompute cluster markers

In [8]:
adata.uns['log1p']['base'] = None
sc.tl.rank_genes_groups(adata, "cell_type", method = "wilcoxon", n_genes = 200)

  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group

In [9]:
markers = sc_utils.get_markers(adata, "cell_type")
pd.options.display.max_rows = 500
markers.groupby("cluster").apply(lambda x: x.sort_values("avg_logFC", ascending = False).head(10))

Unnamed: 0_level_0,Unnamed: 1_level_0,p_val,avg_logFC,pct.1,pct.2,p_val_adj,cluster,gene
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
B cells,4800,0.0,12.459498,0.935294,0.001331,0.0,B cells,MS4A1
B cells,4801,0.0,10.24131,0.829412,0.003016,0.0,B cells,CD79A
B cells,4987,2.07224e-40,9.794456,0.190588,0.000476,2.07224e-40,B cells,PAX5
B cells,4842,1.1038849999999999e-200,9.614582,0.428235,0.001752,1.1038849999999999e-200,B cells,LINC00926
B cells,4817,2.6670279999999997e-270,9.535253,0.497647,0.00332,2.6670279999999997e-270,B cells,CD19
B cells,4958,4.592954e-56,9.253481,0.225294,0.000784,4.592954e-56,B cells,FCRL5
B cells,4898,6.473911e-97,9.243884,0.297059,0.001064,6.473911e-97,B cells,FCRLA
B cells,4847,2.665895e-190,9.054291,0.417059,0.001485,2.665895e-190,B cells,TNFRSF13B
B cells,4884,2.5549250000000002e-118,8.543936,0.328235,0.000667,2.5549250000000002e-118,B cells,IGHG2
B cells,4944,9.400136e-64,8.533154,0.240588,0.000888,9.400136e-64,B cells,TNFRSF13C


## Write output

In [10]:
out_version = "v12"
out_dir = f"01BAL/01integrated_BAL_{out_version}"
out_name = f"01integrated_BAL_{out_version}"

In [11]:
adata.write(f"{data_dir}/{out_dir}/{out_name}.h5ad")
adata.obs.to_csv(f"{data_dir}/{out_dir}/{out_name}-metadata.csv")
markers.to_csv(f"{data_dir}/{out_dir}/{out_name}-markers.csv")