# Spatial Transcriptomics Analysis Pipeline

### This script processes spatial transcriptomics data, filters and analyzes metal abundances, and visualizes correlations with cell abundance and gene expression.

In [None]:
from esda import G_Local
import pandas as pd
import pickle5 as p
import numpy as np
from sklearn.metrics import pairwise_distances
import alphashape
from shapely.geometry import MultiPoint, Point, LineString, LinearRing, Polygon
import geopandas as gpd
import libpysal
from matplotlib.patches import Wedge
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import AxesGrid
import tifffile
import cv2
import seaborn as sns
import tqdm
from scipy.stats import ranksums
import statsmodels.formula.api as smf
import statsmodels.api as sm

## Load metal abundances data

In [None]:
with open("./grouped_ST_metal_spots_records.pkl", "rb") as fh:
    metal_abundances = pd.DataFrame.from_records(p.load(fh)).drop(columns=["index"])

## Compute minimum nonzero pairwise distance

In [None]:
s = pd.Series(pairwise_distances(metal_abundances[["ST_x", "ST_y"]]).flatten())
min_distance = s[s > 0].min()

## Generate Alpha Shape for spatial filtering

In [None]:
np.random.seed(42)
n_neighbors_drop = 2
alpha_shape = alphashape.alphashape(metal_abundances[["ST_x", "ST_y"]].values, 1 / (min_distance * np.sqrt(2)))
ring_coords = Wedge((0, 0), 25000, 237, 292)._path.vertices
ring_coords += np.array(alpha_shape.exterior.buffer(min_distance * np.sqrt(2) * n_neighbors_drop).centroid.coords[0])
ring = Polygon(ring_coords)
alpha_shape_exterior = (alpha_shape.exterior.buffer(min_distance * np.sqrt(2) * n_neighbors_drop)).intersection(ring.buffer(200)).union(alpha_shape.exterior.buffer(min_distance * np.sqrt(2)))

## Filter metal abundances

In [None]:
points = gpd.GeoSeries([Point(*x) for x in metal_abundances[["ST_x", "ST_y"]].values.tolist()])
metal_abundances = metal_abundances[(~points.intersects(alpha_shape_exterior)).values]

## Save filtered data

In [None]:
pd.to_pickle(metal_abundances.to_records(), "./grouped_ST_metal_spots_records_edge_filtered.pkl")

## Load cell abundance and gene counts data

In [None]:
with open("./SC_deconvolved_ST.pkl", "rb") as fh:
    cell_abundance = pd.DataFrame.from_records(p.load(fh)).drop(columns=["index"])
cell_abundance.columns = cell_abundance.columns.map(lambda x: x.split("_")[-1])
cell_abundance = cell_abundance[cell_abundance.index.isin(metal_abundances["ST_spot_ID"].values)]

In [None]:
with open("./gene_counts_JL.pkl", "rb") as fh:
    gene_counts = pd.DataFrame.from_records(p.load(fh)).drop(columns=["index"])
gene_counts = gene_counts[gene_counts.index.isin(metal_abundances["ST_spot_ID"].values)]

## Load histology image

In [None]:
hne_image = tifffile.imread('./_SS12251_092842.svs')
im_small = cv2.resize(hne_image, None, fx=1/15, fy=1/15)

## Perform spatial statistics on metal elements

In [None]:
np.random.seed(42)
element = "Cu63"
threshold_mult = 1
w = libpysal.weights.DistanceBand(metal_abundances[["ST_x", "ST_y"]], threshold=(min_distance * np.sqrt(2)) * threshold_mult + 5)
lg = G_Local(metal_abundances[element], w, star=True)

## Plot metal abundance

In [None]:
vmax = lg.Zs.max()
vmin = lg.Zs.min()
plt.imshow(im_small)
plt.scatter(*(metal_abundances[["ST_x", "ST_y"]].values / 15).T.tolist(), c=lg.Zs, cmap="seismic", s=0.25)
plt.xlabel("x coord")
plt.ylabel("y coord")
plt.colorbar(label=f"{element} HotSpot")
plt.show()

## Statistical analysis with GLM model

In [None]:
formula = 'SFRP2 ~ metal + Q("T cells") + metal * Q("T cells")'
model_data = pd.concat([gene_counts, cell_abundance], axis=1).assign(metal=lambda x: lg.Zs > 0)
model = smf.glm(formula=formula, data=model_data, family=sm.families.NegativeBinomial()).fit()

In [None]:
print(model.params, model.pvalues)