# Run Thor on Visium HD data

In this notebook, we show how to infer cell-level spatial transcriptome based on a Visium HD dataset of a bladder cancer patient sample.

For installation of Thor, please refer to [this installation guide](../installation.rst).

## Import the packages

In [None]:
import sys
import os
import logging
import datetime

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logging.basicConfig(format='%(name)s - %(levelname)s - %(message)s')

now = datetime.datetime.now()
logger.info(f"Current Time: {now}")

In [None]:
%config InlineBackend.figure_format = 'retina'

import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
sc.set_figure_params(scanpy=True, dpi=80, dpi_save=300)
sc.settings.verbosity = 'error'


from thor.pp import WholeSlideImage, Spatial
from thor.finest import fineST
from thor.pl import single_molecule, plot_spot,get_nuclei_pixels
from thor.utils import get_adata_layer_array

from PIL import Image
Image.MAX_IMAGE_PIXELS = None

## Predicting cell-level gene expression using Markov graph diffusion 

The segmentation part will be skip because it will take some time without GPU. We will provide pre-computed segmentation folder (include both feature and segmentation files)

In [3]:
# name = "demo_visiumhd"
# image_path = f"./{name}_Scan1.qptiff.tiff"

# wsi = WholeSlideImage(image_path, name=name)
# wsi.process(method="stardist")

# cell_mask_path = os.path.join(image_process_dir, "nuclei_mask.npz")
# wsi = WholeSlideImage(image_path, name=name, nuclei_seg_path=cell_mask_path, nuclei_seg_format='mask_array_npz')
# wsi.process()

### Preprocessing cell-level spatial transcriptome.
Using the standard SCANPY pipeline, we created the VisiumHD 002um bin `adata` from the Space Ranger output directory (`demo_visiumhd`). This `adata` contains the expression matrix, bin locations mapped as pixel coordinates on the whole slide image (WSI), and both high and low-resolution images with their associated scale factors.

In [4]:
name = "demo_visiumhd"
outdir = os.getcwd()
image_path = f"../{name}_Scan1.qptiff.tiff"
image_process_dir = os.path.join(outdir, f'WSI_{name}')
cell_mask_path = os.path.join(image_process_dir, "nuclei_mask.npz")
cell_feature_path = os.path.join(image_process_dir, "cell_features.csv")
spatial_dir = f"./{name}"
spot_adata_path = os.path.join(spatial_dir, f"{name}_processed_002um.h5ad")

In [5]:
cell_feature = pd.read_csv(cell_feature_path,index_col=0)
spot = sc.read_h5ad(spot_adata_path)

#### We need to use a new function tailor to Visium HD 2 micrometer square bins data (less than cell size).

In [None]:
from thor.utilities.VisiumHD_cell_mapping import HD2cell

adata_cell, assignments = HD2cell(adata_spot=spot, node_feat=cell_feature)

In [None]:
sample = fineST(
    image_path,
    name=name,
    spot_adata_path=spot_adata_path,
    cell_features_csv_path=cell_feature_path
)
sample.prepare_input(mapping_margin=10)

In [8]:
sc.pp.normalize_total(adata_cell,target_sum=10000)
sc.pp.log1p(adata_cell)

In [9]:
sample.adata.X = adata_cell.X
sample.adata.obs_names = adata_cell.obs_names

In [10]:
sc.tl.pca(sample.adata)

In [11]:
sample.genes = [
    "VIM",  # Vimentin
    "ACTA2",  # Alpha Smooth Muscle Actin (αSMA)
    "CAV1",  # Caveolin 1
    "CAV2",  # Caveolin 2
    "PDGFRA",  # Platelet Derived Growth Factor Receptor Alpha
    "CD34",  # CD34 Molecule
    "GJA1",  # Gap Junction Protein Alpha 1 (Connexin 43)
    "KIT",  # KIT Proto-Oncogene, Receptor Tyrosine Kinase
    "CDH11",  # Cadherin 11
    "PDGFRB",  # Platelet Derived Growth Factor Receptor Beta
    "CSPG4",  # Chondroitin Sulfate Proteoglycan 4 (NG2)
    "PECAM1",  # Platelet And Endothelial Cell Adhesion Molecule 1 (CD31)
    "FAP",  # Fibroblast Activation Protein Alpha
    "TNC",  # Tenascin C
    "THY1",  # Thy-1 Cell Surface Antigen (CD90)
    "S100A4",  # S100 Calcium Binding Protein A4 (FSP1)
    "DLL4",  # Delta Like Canonical Notch Ligand 4
    "CCR7"
]

In [12]:
sample.set_genes_for_prediction(genes_selection_key=None)

In [13]:
sample.recipe = 'gene'
sample.set_params(
    is_rawCount=False,
    out_prefix="fineST",
    write_freq=20,
    n_iter=20,
    conn_csr_matrix="force",
    smoothing_scale=0.8,
    node_features_obs_list=['spot_heterogeneity'],
    n_neighbors=10,
    geom_morph_ratio=0.5,
    geom_constraint=0,
    inflation_percentage=None,
    regulate_expression_mean=False,
    stochastic_expression_neighbors_level='spot',
    smooth_predicted_expression_steps=0,
    reduced_dimension_transcriptome_obsm_key="X_pca",
    adjust_cell_network_by_transcriptome_scale=0,
    n_jobs=20)

In [None]:
sample.predict_gene_expression()

In [None]:
ad_thor = sample.load_result('fineST_20.npz')
ad_thor

In [16]:
# save ad_thor
ad_thor.write(f"./{name}/{name}_finesST_20_result_002um.h5ad")

### Compare gene expression profiles between the Thor results with VisiumHD 008μm data (close to the actual cell size).

In [17]:
ad_HD = sc.read_h5ad(f"./{name}/{name}_processed_008um.h5ad")

In [None]:
sc.pl.spatial(ad_thor,color="CCR7", spot_size=50)

In [None]:
sc.pl.spatial(ad_HD,color="CCR7", spot_size=50)