### Merge BTC DPT Visium Cohorts

In [1]:
import tarfile
import warnings
from glob import glob
import os
import anndata as ad
import pandas as pd
import scanpy as sc
import squidpy as sq
import re
from cirro import DataPortal

# set up cirro data portal access
portal = DataPortal()

sc.set_figure_params(figsize=(4, 4))
sc.settings.verbosity = 2  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
print(f"squidpy=={sq.__version__}")

  return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)


To sign in, use a web browser to open the page https://breakthroughcancer.cirro.bio/authorize?user_code=A4YOUZY2
squidpy==1.6.5


In [2]:
# adapt cirro code into function to pull in h5ad files
def get_cirro_h5ad_files(project_name, dataset_id):
    # access the cirro project
    project = portal.get_project_by_name(project_name)
    all_datasets = project.list_datasets()
    #print(f"The project {project.name} contains {len(all_datasets):,} datasets")
    # access the dataset of interest within the cirro project - updated to get by ID instead of by name
    # dataset = project.get_dataset_by_id(dataset_id)
    dataset = all_datasets.get_by_id(dataset_id)
    # get the complete list of files in that dataset
    files = dataset.list_files()
    #print(f"Dataset {dataset.name} contains {len(files):,} files")
    # parse out the rctd.h5ad files
    rctds = files.filter_by_pattern("*/rctd.h5ad")
    print(f"Selected the file: {rctds.description()}")
    return rctds # cirro data portal object with all rctd.h5ad files

In [3]:
visST = get_cirro_h5ad_files('BTC-DPT-Development', '3a6e2d6c-3167-4c8c-b43c-da50b930ed11')
visHD = get_cirro_h5ad_files('BTC-DPT-Development', 'e87dd656-833c-4433-b7a7-9f2fd38eb10f')

Selected the file: data/rctd/BTC01_visium/rctd.h5ad (298.80 MB)

---

data/rctd/BTC03_visium/rctd.h5ad (343.67 MB)

---

data/rctd/BTC09_visium/rctd.h5ad (415.15 MB)

---

data/rctd/BTC13_visium/rctd.h5ad (451.86 MB)
Selected the file: data/rctd/HC01BTC_visiumHD/rctd.h5ad (640.37 MB)

---

data/rctd/HC03BTC_visiumHD/rctd.h5ad (478.49 MB)

---

data/rctd/HC04BTC_visiumHD/rctd.h5ad (468.65 MB)

---

data/rctd/HC05BTC_visiumHD/rctd.h5ad (302.23 MB)

---

data/rctd/HC07BTC_visiumHD/rctd.h5ad (289.04 MB)

---

data/rctd/HC08BTC_visiumHD/rctd.h5ad (262.03 MB)


In [4]:
def load_cirro_h5ad_files(file_list):
    # file list is a cirro dataset object (output of get_cirro_h5ad_files)
    sample_dict = {}
    for dataset in file_list:
        name = dataset.name.split("/")[-2] # just dataset name
        print(f"Loading dataset: {name}")
        adata = dataset.read_h5ad()
        sample_dict[name] = adata
    return sample_dict

In [5]:
rctd_samples = load_cirro_h5ad_files(visST)
rctd_hd_samples = load_cirro_h5ad_files(visHD)
print("Standard samples:", list(rctd_samples.keys()))
print("HD samples:", list(rctd_hd_samples.keys()))

Loading dataset: BTC01_visium
Loading dataset: BTC03_visium
Loading dataset: BTC09_visium
Loading dataset: BTC13_visium
Loading dataset: HC01BTC_visiumHD
Loading dataset: HC03BTC_visiumHD
Loading dataset: HC04BTC_visiumHD
Loading dataset: HC05BTC_visiumHD
Loading dataset: HC07BTC_visiumHD
Loading dataset: HC08BTC_visiumHD
Standard samples: ['BTC01_visium', 'BTC03_visium', 'BTC09_visium', 'BTC13_visium']
HD samples: ['HC01BTC_visiumHD', 'HC03BTC_visiumHD', 'HC04BTC_visiumHD', 'HC05BTC_visiumHD', 'HC07BTC_visiumHD', 'HC08BTC_visiumHD']


In [6]:
# add metadata to each object for merging (sample name and type)
for name, adata in rctd_samples.items():
    print(name)
    adata.obs["BTC_sample_name"] = name
    adata.obs["sample_type"] = "standard"

for name, adata in rctd_hd_samples.items():
    print(name)
    adata.obs["BTC_sample_name"] = name
    adata.obs["sample_type"] = "HD"

# merge all objects into one
all_adatas = list(rctd_samples.values()) + list(rctd_hd_samples.values()) # list all objects together
# combined_adata = ad.concat(all_adatas,uns_merge="unique",index_unique="-")
#keys = [adata.obs["sample_name"][0] for adata in all_adatas]  # assuming obs is not empty

combined_adata = sc.concat(
    all_adatas,
    index_unique="-",
    uns_merge="unique"  # keeps each spatial entry under its own key
)

BTC01_visium
BTC03_visium
BTC09_visium
BTC13_visium
HC01BTC_visiumHD
HC03BTC_visiumHD
HC04BTC_visiumHD
HC05BTC_visiumHD
HC07BTC_visiumHD
HC08BTC_visiumHD


In [None]:
combined_adata.obs['barcode'] = combined_adata.obs_names
combined_adata.obs["sampleID"] = combined_adata.obs["BTC_sample_name"].str.split("_").str[0]
# paste together sampleID and barcode to make unique cell names
combined_adata.obs["sample_barcode"] = combined_adata.obs["sampleID"].astype(str) + "_" + combined_adata.obs["barcode"].astype(str)
# set as obs names
combined_adata.obs_names = combined_adata.obs["sample_barcode"]

In [8]:
combined_adata

AnnData object with n_obs × n_vars = 1010315 × 18082
    obs: 'in_tissue', 'array_row', 'array_col', 'region', 'cell_type', 'BTC_sample_name', 'sample_type', 'barcode', 'sampleID', 'sample_barcode'
    uns: 'layout', 'spatial', 'spatial_neighbors', 'spatialdata_attrs'
    obsm: 'spatial'

In [9]:
combined_adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,region,cell_type,BTC_sample_name,sample_type,barcode,sampleID,sample_barcode
sample_barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
BTC01_AACACCTACTATCGAA-1-0,1.0,0.0,122.0,BTC01_visium,FIBROBLASTS,BTC01_visium,standard,AACACCTACTATCGAA-1-0,BTC01,BTC01_AACACCTACTATCGAA-1-0
BTC01_AACACGTGCATCGCAC-1-0,1.0,76.0,22.0,BTC01_visium,PDAC,BTC01_visium,standard,AACACGTGCATCGCAC-1-0,BTC01,BTC01_AACACGTGCATCGCAC-1-0
BTC01_AACACTTGGCAAGGAA-1-0,1.0,47.0,71.0,BTC01_visium,FIBROBLASTS,BTC01_visium,standard,AACACTTGGCAAGGAA-1-0,BTC01,BTC01_AACACTTGGCAAGGAA-1-0
BTC01_AACAGGAAGAGCATAG-1-0,1.0,69.0,7.0,BTC01_visium,FIBROBLASTS,BTC01_visium,standard,AACAGGAAGAGCATAG-1-0,BTC01,BTC01_AACAGGAAGAGCATAG-1-0
BTC01_AACAGGATTCATAGTT-1-0,1.0,49.0,43.0,BTC01_visium,FIBROBLASTS,BTC01_visium,standard,AACAGGATTCATAGTT-1-0,BTC01,BTC01_AACAGGATTCATAGTT-1-0
...,...,...,...,...,...,...,...,...,...,...
HC08BTC_s_016um_00193_00227-1-9,1.0,193.0,227.0,HC08BTC_visiumHD_square_016um,,HC08BTC_visiumHD,HD,s_016um_00193_00227-1-9,HC08BTC,HC08BTC_s_016um_00193_00227-1-9
HC08BTC_s_016um_00109_00223-1-9,1.0,109.0,223.0,HC08BTC_visiumHD_square_016um,,HC08BTC_visiumHD,HD,s_016um_00109_00223-1-9,HC08BTC,HC08BTC_s_016um_00109_00223-1-9
HC08BTC_s_016um_00039_00175-1-9,1.0,39.0,175.0,HC08BTC_visiumHD_square_016um,,HC08BTC_visiumHD,HD,s_016um_00039_00175-1-9,HC08BTC,HC08BTC_s_016um_00039_00175-1-9
HC08BTC_s_016um_00037_00193-1-9,1.0,37.0,193.0,HC08BTC_visiumHD_square_016um,,HC08BTC_visiumHD,HD,s_016um_00037_00193-1-9,HC08BTC,HC08BTC_s_016um_00037_00193-1-9


In [10]:
combined_adata.obs["BTC_sample_name"].value_counts()

BTC_sample_name
HC03BTC_visiumHD    175561
HC07BTC_visiumHD    175561
HC01BTC_visiumHD    169550
HC05BTC_visiumHD    164263
HC04BTC_visiumHD    163383
HC08BTC_visiumHD    142989
BTC03_visium          4992
BTC09_visium          4992
BTC13_visium          4601
BTC01_visium          4423
Name: count, dtype: int64

In [11]:
combined_adata.obs["cell_type"].value_counts()

cell_type
FIBROBLASTS        330668
PDAC               107504
ACINAR              38239
PERICYTES           26445
MYELOID             25857
ENDOTHELIAL         18075
ENDOCRINE           12768
TNK                  8529
PLASMA               5061
CYCLING MYELOID      4156
B CELLS              2136
MAST                  621
CYCLING TNK           272
Name: count, dtype: int64

In [12]:
# define cell types of interest
ductal_cells = ["PDAC"]
fibro_ductal_cells = ["FIBROBLASTS","PDAC"]

In [13]:
# susbet combined object to cell types of interest
ductal_adata = combined_adata[combined_adata.obs["cell_type"].isin(ductal_cells)].copy()
fibro_ductal_adata = combined_adata[combined_adata.obs["cell_type"].isin(fibro_ductal_cells)].copy()

In [14]:
ductal_adata

AnnData object with n_obs × n_vars = 107504 × 18082
    obs: 'in_tissue', 'array_row', 'array_col', 'region', 'cell_type', 'BTC_sample_name', 'sample_type', 'barcode', 'sampleID', 'sample_barcode'
    uns: 'layout', 'spatial', 'spatial_neighbors', 'spatialdata_attrs'
    obsm: 'spatial'

In [15]:
ductal_adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,region,cell_type,BTC_sample_name,sample_type,barcode,sampleID,sample_barcode
sample_barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
BTC01_AACACGTGCATCGCAC-1-0,1.0,76.0,22.0,BTC01_visium,PDAC,BTC01_visium,standard,AACACGTGCATCGCAC-1-0,BTC01,BTC01_AACACGTGCATCGCAC-1-0
BTC01_AACATACTCATATGCG-1-0,1.0,50.0,6.0,BTC01_visium,PDAC,BTC01_visium,standard,AACATACTCATATGCG-1-0,BTC01,BTC01_AACATACTCATATGCG-1-0
BTC01_AACCAAGGTATCAGGC-1-0,1.0,38.0,104.0,BTC01_visium,PDAC,BTC01_visium,standard,AACCAAGGTATCAGGC-1-0,BTC01,BTC01_AACCAAGGTATCAGGC-1-0
BTC01_AACCACTGCCATAGCC-1-0,1.0,29.0,49.0,BTC01_visium,PDAC,BTC01_visium,standard,AACCACTGCCATAGCC-1-0,BTC01,BTC01_AACCACTGCCATAGCC-1-0
BTC01_AACCGCATGTATGTTA-1-0,1.0,19.0,119.0,BTC01_visium,PDAC,BTC01_visium,standard,AACCGCATGTATGTTA-1-0,BTC01,BTC01_AACCGCATGTATGTTA-1-0
...,...,...,...,...,...,...,...,...,...,...
HC08BTC_s_016um_00193_00258-1-9,1.0,193.0,258.0,HC08BTC_visiumHD_square_016um,PDAC,HC08BTC_visiumHD,HD,s_016um_00193_00258-1-9,HC08BTC,HC08BTC_s_016um_00193_00258-1-9
HC08BTC_s_016um_00247_00325-1-9,1.0,247.0,325.0,HC08BTC_visiumHD_square_016um,PDAC,HC08BTC_visiumHD,HD,s_016um_00247_00325-1-9,HC08BTC,HC08BTC_s_016um_00247_00325-1-9
HC08BTC_s_016um_00225_00261-1-9,1.0,225.0,261.0,HC08BTC_visiumHD_square_016um,PDAC,HC08BTC_visiumHD,HD,s_016um_00225_00261-1-9,HC08BTC,HC08BTC_s_016um_00225_00261-1-9
HC08BTC_s_016um_00220_00218-1-9,1.0,220.0,218.0,HC08BTC_visiumHD_square_016um,PDAC,HC08BTC_visiumHD,HD,s_016um_00220_00218-1-9,HC08BTC,HC08BTC_s_016um_00220_00218-1-9


In [16]:
ductal_adata.obs["BTC_sample_name"].value_counts()

BTC_sample_name
HC01BTC_visiumHD    45709
HC03BTC_visiumHD    23372
HC04BTC_visiumHD    18410
HC05BTC_visiumHD    12073
HC07BTC_visiumHD     3295
BTC01_visium         1356
BTC03_visium         1230
BTC13_visium          735
BTC09_visium          697
HC08BTC_visiumHD      627
Name: count, dtype: int64

In [17]:
ductal_adata.obs["cell_type"].value_counts()

cell_type
PDAC    107504
Name: count, dtype: int64

In [18]:
fibro_ductal_adata

AnnData object with n_obs × n_vars = 438172 × 18082
    obs: 'in_tissue', 'array_row', 'array_col', 'region', 'cell_type', 'BTC_sample_name', 'sample_type', 'barcode', 'sampleID', 'sample_barcode'
    uns: 'layout', 'spatial', 'spatial_neighbors', 'spatialdata_attrs'
    obsm: 'spatial'

In [19]:
fibro_ductal_adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,region,cell_type,BTC_sample_name,sample_type,barcode,sampleID,sample_barcode
sample_barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
BTC01_AACACCTACTATCGAA-1-0,1.0,0.0,122.0,BTC01_visium,FIBROBLASTS,BTC01_visium,standard,AACACCTACTATCGAA-1-0,BTC01,BTC01_AACACCTACTATCGAA-1-0
BTC01_AACACGTGCATCGCAC-1-0,1.0,76.0,22.0,BTC01_visium,PDAC,BTC01_visium,standard,AACACGTGCATCGCAC-1-0,BTC01,BTC01_AACACGTGCATCGCAC-1-0
BTC01_AACACTTGGCAAGGAA-1-0,1.0,47.0,71.0,BTC01_visium,FIBROBLASTS,BTC01_visium,standard,AACACTTGGCAAGGAA-1-0,BTC01,BTC01_AACACTTGGCAAGGAA-1-0
BTC01_AACAGGAAGAGCATAG-1-0,1.0,69.0,7.0,BTC01_visium,FIBROBLASTS,BTC01_visium,standard,AACAGGAAGAGCATAG-1-0,BTC01,BTC01_AACAGGAAGAGCATAG-1-0
BTC01_AACAGGATTCATAGTT-1-0,1.0,49.0,43.0,BTC01_visium,FIBROBLASTS,BTC01_visium,standard,AACAGGATTCATAGTT-1-0,BTC01,BTC01_AACAGGATTCATAGTT-1-0
...,...,...,...,...,...,...,...,...,...,...
HC08BTC_s_016um_00217_00238-1-9,1.0,217.0,238.0,HC08BTC_visiumHD_square_016um,FIBROBLASTS,HC08BTC_visiumHD,HD,s_016um_00217_00238-1-9,HC08BTC,HC08BTC_s_016um_00217_00238-1-9
HC08BTC_s_016um_00122_00358-1-9,1.0,122.0,358.0,HC08BTC_visiumHD_square_016um,FIBROBLASTS,HC08BTC_visiumHD,HD,s_016um_00122_00358-1-9,HC08BTC,HC08BTC_s_016um_00122_00358-1-9
HC08BTC_s_016um_00197_00110-1-9,1.0,197.0,110.0,HC08BTC_visiumHD_square_016um,FIBROBLASTS,HC08BTC_visiumHD,HD,s_016um_00197_00110-1-9,HC08BTC,HC08BTC_s_016um_00197_00110-1-9
HC08BTC_s_016um_00224_00113-1-9,1.0,224.0,113.0,HC08BTC_visiumHD_square_016um,FIBROBLASTS,HC08BTC_visiumHD,HD,s_016um_00224_00113-1-9,HC08BTC,HC08BTC_s_016um_00224_00113-1-9


In [20]:
fibro_ductal_adata.obs["BTC_sample_name"].value_counts()

BTC_sample_name
HC03BTC_visiumHD    130643
HC01BTC_visiumHD    115094
HC04BTC_visiumHD     88478
HC07BTC_visiumHD     41768
HC05BTC_visiumHD     23314
HC08BTC_visiumHD     21152
BTC03_visium          4785
BTC09_visium          4741
BTC13_visium          4235
BTC01_visium          3962
Name: count, dtype: int64

In [21]:
fibro_ductal_adata.obs["cell_type"].value_counts()

cell_type
FIBROBLASTS    330668
PDAC           107504
Name: count, dtype: int64