In [None]:
import sys
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import seaborn as sns
import scvi

import cell2location

from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text for PDFs

###
results_folder = './results/BPD/'

In [None]:
# path = sp_data_folder + 'rawdata/'
def read_and_qc(path, sample_name, count_file='filtered_feature_bc_matrix.h5'):
    r""" This function reads the data for one 10X spatial experiment into the anndata object.
    It also calculates QC metrics. Modify this function if required by your workflow.

    :param sample_name: Name of the sample
    :param path: path to data
    """

    adata = sc.read_visium(path + str(sample_name) + '/', count_file=count_file, load_images=True)
    adata.obs['sample'] = sample_name
    adata.var['SYMBOL'] = adata.var_names
    adata.var.rename(columns={'gene_ids': 'ENSEMBL'}, inplace=True)
    adata.var_names = adata.var['ENSEMBL']
    adata.var.drop(columns='ENSEMBL', inplace=True)

    # just in case there are non-unique ENSEMBL IDs
    adata.var_names_make_unique()

    # Calculate QC metrics
    sc.pp.calculate_qc_metrics(adata, inplace=True)
    adata.var['mt'] = [gene.startswith('mt-') for gene in adata.var['SYMBOL']]
    adata.obs['mt_frac'] = adata[:, adata.var['mt'].tolist()].X.sum(1).A.squeeze()/adata.obs['total_counts']
    
    # add sample name to obs names
    adata.obs["sample"] = [str(i) for i in adata.obs['sample']]
    adata.obs_names = 's' + adata.obs["sample"] \
                          + '_' + adata.obs_names
    adata.obs.index.name = 'spot_id'
    
    file = list(adata.uns['spatial'].keys())[0]
    adata.uns['spatial'][sample_name] = adata.uns['spatial'][file].copy()
    del adata.uns['spatial'][file]
    print(adata.uns['spatial'].keys())
    
    return adata

In [None]:
from anndata import concat

def read_all_and_qc(path,
    sample_annot, Sample_ID_col, spot_selection, 
    count_file='filtered_feature_bc_matrix.h5',
):
    """
    Read and concatenate all Visium files.
    """

    # read all samples and store them in a list
    adatas = []
    for i, s in enumerate(sample_annot[Sample_ID_col]):
        adata_i = read_and_qc(path, s, count_file=count_file)
        spots_i = spot_selection[spot_selection['Sample'] == 'D'+s]  

        prefix = 's'+s+'_'
        vals = prefix + spots_i['barcode'].astype(str)
        
        adata_i = adata_i[adata_i.obs_names.isin(vals), :]
        adatas.append(adata_i)
    # combine individual samples
    adata = concat(
        adatas,
        merge="unique",
        uns_merge="unique",
        label="batch",
        keys=sample_annot[Sample_ID_col].tolist(), 
        index_unique=None
    )

    sample_annot.index = sample_annot[Sample_ID_col]
    for c in sample_annot.columns:
        sample_annot.loc[:, c] = sample_annot[c].astype(str)
    adata.obs[sample_annot.columns] = sample_annot.reindex(index=adata.obs['sample']).values

    return adata



In [None]:
sp_data_folder = './data/'
sample_data = pd.read_csv(sp_data_folder + 'ST_samples.csv')

sample_data

In [None]:
spot_selection = pd.read_csv('./spots.selected.txt', delimiter="\t")

spot_selection

In [None]:

adata = read_all_and_qc(
    path=sp_data_folder+'/rawdata/', 
    sample_annot=sample_data, 
    Sample_ID_col='sample_name', 
    spot_selection=spot_selection,
    count_file='filtered_feature_bc_matrix.h5',
)

adata

In [None]:
spot_selection.shape

In [None]:
spot_selection['Sample'].value_counts()

In [None]:
adata.obs['sample'].value_counts()

In [None]:
def select_slide(adata, s, s_col='sample'):
    r""" This function selects the data for one slide from the spatial anndata object.

    :param adata: Anndata object with multiple spatial experiments
    :param s: name of selected experiment
    :param s_col: column in adata.obs listing experiment name for each location
    """

    slide = adata[adata.obs[s_col].isin([s]), :]
    s_keys = list(slide.uns['spatial'].keys())
    s_spatial = np.array(s_keys)[[s in k for k in s_keys]][0]

    slide.uns['spatial'] = {s_spatial: slide.uns['spatial'][s_spatial]}

    return slide

In [None]:
adata.write(filename='st.h5ad')