In [1]:
import scanpy as sc
import numpy as np
import pandas as pd
import os
# import rapids_singlecell as rsc
from tqdm import tqdm

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import session_info
session_info.show()

  mod_version = _find_version(mod.__version__)


In [3]:
sc.settings.set_figure_params(dpi=120)

# Read in

In [4]:
objects_dir = '/home/kk837/rds/rds-teichlab-C9woKbOCf2Y/kk837/Foetal/anndata_objects/Xenium'

In [5]:
sample_id_1 = 'C194-HEA-0-FFPE-1'
path_adata_1 = f'{objects_dir}/{sample_id_1}_5K_filtered_raw.h5ad'

sample_id_2 = 'Hst45-HEA-0-FFPE-1'
path_adata_2 = f'{objects_dir}/{sample_id_2}_5K_filtered_raw.h5ad'

In [6]:
adata_1 = sc.read_h5ad(path_adata_1)
adata_1

AnnData object with n_obs × n_vars = 144916 × 5001
    obs: 'cell_id', 'x_centroid', 'y_centroid', 'transcript_counts', 'control_probe_counts', 'genomic_control_counts', 'control_codeword_counts', 'unassigned_codeword_counts', 'deprecated_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area', 'nucleus_count', 'segmentation_method', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'pct_counts_in_top_10_genes', 'pct_counts_in_top_20_genes', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_150_genes', 'n_counts', 'n_genes'
    var: 'gene_ids', 'feature_types', 'genome', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'
    obsm: 'spatial'

In [7]:
adata_2 = sc.read_h5ad(path_adata_2)
adata_2

AnnData object with n_obs × n_vars = 217361 × 5001
    obs: 'cell_id', 'x_centroid', 'y_centroid', 'transcript_counts', 'control_probe_counts', 'genomic_control_counts', 'control_codeword_counts', 'unassigned_codeword_counts', 'deprecated_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area', 'nucleus_count', 'segmentation_method', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'pct_counts_in_top_10_genes', 'pct_counts_in_top_20_genes', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_150_genes', 'n_counts', 'n_genes'
    var: 'gene_ids', 'feature_types', 'genome', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'
    obsm: 'spatial'

# Concatenate

In [8]:
# tidy obs columns
adata_1.obs = adata_1.obs[['cell_id', 'x_centroid', 'y_centroid', 'transcript_counts',
       'control_probe_counts', 'genomic_control_counts',
       'control_codeword_counts', 'unassigned_codeword_counts',
       'deprecated_codeword_counts', 'total_counts', 'cell_area',
       'nucleus_area', 'nucleus_count', 'segmentation_method',
       'n_genes_by_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts',
       'pct_counts_in_top_10_genes', 'pct_counts_in_top_20_genes',
       'pct_counts_in_top_50_genes', 'pct_counts_in_top_150_genes', 'n_counts',
       'n_genes']]
adata_1.obs['tissue_block_id'] = sample_id_1

adata_2.obs = adata_2.obs[['cell_id', 'x_centroid', 'y_centroid', 'transcript_counts',
       'control_probe_counts', 'genomic_control_counts',
       'control_codeword_counts', 'unassigned_codeword_counts',
       'deprecated_codeword_counts', 'total_counts', 'cell_area',
       'nucleus_area', 'nucleus_count', 'segmentation_method',
       'n_genes_by_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts',
       'pct_counts_in_top_10_genes', 'pct_counts_in_top_20_genes',
       'pct_counts_in_top_50_genes', 'pct_counts_in_top_150_genes', 'n_counts',
       'n_genes']]
adata_2.obs['tissue_block_id'] = sample_id_2

In [9]:
# concatenate
adata = adata_1.concatenate(adata_2, 
            join='inner', 
            batch_key=None, 
            index_unique=None)
adata.obs['tissue_block_id'].value_counts()

  adata = adata_1.concatenate(adata_2,
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")


tissue_block_id
Hst45-HEA-0-FFPE-1    217361
C194-HEA-0-FFPE-1     144916
Name: count, dtype: int64

# Update obs_names and save

In [10]:
adata.obs_names = adata.obs['tissue_block_id'].astype('str') + '_' + adata.obs_names.astype('str')
adata.obs.head()

Unnamed: 0,cell_id,x_centroid,y_centroid,transcript_counts,control_probe_counts,genomic_control_counts,control_codeword_counts,unassigned_codeword_counts,deprecated_codeword_counts,total_counts,...,n_genes_by_counts,log1p_n_genes_by_counts,log1p_total_counts,pct_counts_in_top_10_genes,pct_counts_in_top_20_genes,pct_counts_in_top_50_genes,pct_counts_in_top_150_genes,n_counts,n_genes,tissue_block_id
C194-HEA-0-FFPE-1_aaaafiob-1,aaaafiob-1,782.026245,1025.860474,754,0,0,0,0,0,754.0,...,510,6.23637,6.626718,8.62069,14.323607,26.525199,52.254642,754.0,510,C194-HEA-0-FFPE-1
C194-HEA-0-FFPE-1_aaaajopp-1,aaaajopp-1,784.114746,1034.30835,145,0,0,0,0,0,145.0,...,120,4.795791,4.983607,20.689655,31.034483,51.724138,100.0,145.0,120,C194-HEA-0-FFPE-1
C194-HEA-0-FFPE-1_aaaanald-1,aaaanald-1,791.2005,1028.298462,569,0,0,0,0,0,569.0,...,369,5.913503,6.345636,17.223199,23.374341,35.852373,61.511424,569.0,369,C194-HEA-0-FFPE-1
C194-HEA-0-FFPE-1_aaabdjka-1,aaabdjka-1,819.555664,1032.847168,659,0,0,0,1,0,659.0,...,449,6.109248,6.49224,10.318665,16.691958,28.831563,54.628225,659.0,449,C194-HEA-0-FFPE-1
C194-HEA-0-FFPE-1_aaabglmg-1,aaabglmg-1,819.384583,1007.73291,83,0,0,0,0,0,83.0,...,67,4.219508,4.430817,31.325301,43.373494,79.518072,100.0,83.0,67,C194-HEA-0-FFPE-1


# Save

In [11]:
path_adata = f'{objects_dir}/C194-HEA-0-FFPE-1_Hst45-HEA-0-FFPE-1_concat_5K_filtered_raw.h5ad'

In [12]:
# save
adata.write(path_adata)
adata.obs.head()

Unnamed: 0,cell_id,x_centroid,y_centroid,transcript_counts,control_probe_counts,genomic_control_counts,control_codeword_counts,unassigned_codeword_counts,deprecated_codeword_counts,total_counts,...,n_genes_by_counts,log1p_n_genes_by_counts,log1p_total_counts,pct_counts_in_top_10_genes,pct_counts_in_top_20_genes,pct_counts_in_top_50_genes,pct_counts_in_top_150_genes,n_counts,n_genes,tissue_block_id
C194-HEA-0-FFPE-1_aaaafiob-1,aaaafiob-1,782.026245,1025.860474,754,0,0,0,0,0,754.0,...,510,6.23637,6.626718,8.62069,14.323607,26.525199,52.254642,754.0,510,C194-HEA-0-FFPE-1
C194-HEA-0-FFPE-1_aaaajopp-1,aaaajopp-1,784.114746,1034.30835,145,0,0,0,0,0,145.0,...,120,4.795791,4.983607,20.689655,31.034483,51.724138,100.0,145.0,120,C194-HEA-0-FFPE-1
C194-HEA-0-FFPE-1_aaaanald-1,aaaanald-1,791.2005,1028.298462,569,0,0,0,0,0,569.0,...,369,5.913503,6.345636,17.223199,23.374341,35.852373,61.511424,569.0,369,C194-HEA-0-FFPE-1
C194-HEA-0-FFPE-1_aaabdjka-1,aaabdjka-1,819.555664,1032.847168,659,0,0,0,1,0,659.0,...,449,6.109248,6.49224,10.318665,16.691958,28.831563,54.628225,659.0,449,C194-HEA-0-FFPE-1
C194-HEA-0-FFPE-1_aaabglmg-1,aaabglmg-1,819.384583,1007.73291,83,0,0,0,0,0,83.0,...,67,4.219508,4.430817,31.325301,43.373494,79.518072,100.0,83.0,67,C194-HEA-0-FFPE-1


In [13]:
adata.X.data[:5]

array([5., 1., 1., 2., 2.], dtype=float32)