In [None]:
import scanpy as sc
import decoupler as dc

# Only needed for processing and plotting
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import liana as li

#### Pre-process

In [None]:
adata = sc.datasets.visium_sge(sample_id="V1_Human_Lymph_Node")
adata.var_names_make_unique()
adata

In [None]:
# Basic filtering
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=10)

# Annotate the group of mitochondrial genes as 'mt'
adata.var['mt'] = adata.var_names.str.startswith('MT-')
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

# Filter cells following standard QC criteria.
adata = adata[adata.obs.pct_counts_mt < 20, :]

# Normalize the data
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

# Identify highly variable genes
sc.pp.highly_variable_genes(adata)

# Filter higly variable genes
adata.raw = adata
adata = adata[:, adata.var.highly_variable]

# Scale the data
sc.pp.scale(adata, max_value=10)

In [None]:
# Generate PCA features
sc.tl.pca(adata, svd_solver='arpack')

# Compute distances in the PCA space, and find spot neighbors
sc.pp.neighbors(adata)

# Run leiden clustering algorithm
sc.tl.leiden(adata)

# Visualize
sc.pl.spatial(adata, color=[None, 'leiden'], size=1.5, wspace=0)

In [None]:
adata.write_h5ad("processed_visium.h5ad")

In [None]:
model = dc.get_progeny(organism='human', top=1000)
model

In [None]:
dc.run_mlm(mat=adata, net=model, source='source', target='target', weight='weight', verbose=True)

# Store them in a different key
adata.obsm['progeny_mlm_estimate'] = adata.obsm['mlm_estimate'].copy()
adata.obsm['progeny_mlm_pvals'] = adata.obsm['mlm_pvals'].copy()

In [None]:
acts = dc.get_acts(adata, obsm_key='progeny_mlm_estimate')
acts

In [None]:
acts.write_h5ad('progeny_acts.h5ad')

In [None]:
model = dc.get_dorothea(organism='human')

In [None]:
dc.run_mlm(mat=adata, net=model, source='source', target='target', weight='weight', verbose=True)

# Store them in a different key
adata.obsm['dorothea_mlm_estimate'] = adata.obsm['mlm_estimate'].copy()
adata.obsm['dorothea_mlm_pvals'] = adata.obsm['mlm_pvals'].copy()

In [None]:
acts = dc.get_acts(adata, obsm_key='dorothea_mlm_estimate')
acts

In [None]:
acts.write_h5ad('dorothea_acts.h5ad')

In [None]:
acts

## Any X,Y combo

SpatialDM becomes SpatialLRMethod

SpatialMethod is any X,Y

In [1]:
import liana as li

In [2]:

import scanpy as sc
import pandas as pd
import numpy as np

In [3]:
from mudata import MuData

In [4]:
from liana.utils._utils import _get_props
from scipy.sparse import csr_matrix

In [5]:
adata = sc.read_h5ad("processed_visium.h5ad")

In [6]:
progeny = sc.read_h5ad('progeny_acts.h5ad')

In [7]:
dorothea = sc.read_h5ad('dorothea_acts.h5ad')

In [8]:
mdata = MuData({"expr": adata, 'progeny': progeny, 'dorothea': dorothea})



In [9]:
# transfer coordinates
mdata.obsm["spatial"] = adata.obsm["spatial"]

In [31]:
li.mt.get_spatial_proximity(adata=mdata, parameter=100, bypass_diagonal=False, cutoff=0.1)

In [32]:
from liana.method.sp._spatialdm import _global_spatialdm, _local_to_dataframe, _local_spatialdm

In [33]:
from liana.method.sp._spatial_pipe import global_bivariate_pipe, _get_ordered_matrix

In [34]:
from liana.method.sp._bivariate_funs import _masked_coexpressions, _vectorized_correlations, _vectorized_wcosine, _vectorized_jaccard

In [35]:
x_mod = 'progeny'
y_mod = 'dorothea'

In [36]:
xy_stats, x_pos, y_pos = global_bivariate_pipe(mdata, x_mod=x_mod, y_mod=y_mod, nz_threshold=0)

In [37]:
xy_stats

Unnamed: 0,interaction,x_entity,x_means,x_non_zero,y_entity,y_means,y_non_zero
0,Androgen&AHR,Androgen,1.055404,1.0,AHR,-0.134946,1.0
1,EGFR&AHR,EGFR,0.913506,1.0,AHR,-0.134946,1.0
2,Estrogen&AHR,Estrogen,-2.512781,1.0,AHR,-0.134946,1.0
3,Hypoxia&AHR,Hypoxia,1.044591,1.0,AHR,-0.134946,1.0
4,JAK-STAT&AHR,JAK-STAT,6.993168,1.0,AHR,-0.134946,1.0
...,...,...,...,...,...,...,...
4111,TNFa&ZNF740,TNFa,5.252833,1.0,ZNF740,0.856168,1.0
4112,Trail&ZNF740,Trail,-2.640625,1.0,ZNF740,0.856168,1.0
4113,VEGF&ZNF740,VEGF,0.928763,1.0,ZNF740,0.856168,1.0
4114,WNT&ZNF740,WNT,-0.208129,1.0,ZNF740,0.856168,1.0


To SpatialMethod--->

In [38]:
dist = mdata.obsm['proximity']
weight = dist.A.astype(np.float64)

In [39]:
# convert to spot_n x lr_n matrices
x_mat = _get_ordered_matrix(mat=mdata[x_mod].X,
                            pos=x_pos,
                            order=xy_stats['x_entity'])
y_mat = _get_ordered_matrix(mat=mdata[y_mod].X,
                            pos=y_pos,
                            order=xy_stats['y_entity'])

In [40]:
weight.dtype

dtype('float64')

In [127]:
def _vectorized_correlations(x_mat, y_mat, dist, method="pearson"):
    if method not in ["pearson", "spearman"]:
        raise ValueError("method must be one of 'pearson', 'spearman'")
    
    # transpose
    x_mat, y_mat = x_mat.T, y_mat.T
    
    weight = dist.A.T
    weight_sums = np.sum(weight, axis = 0).flatten()
        
    if method=="spearman":
        x_mat = rankdata(x_mat, axis=1)
        y_mat = rankdata(y_mat, axis=1)
    
    # standard pearson
    n1 = (((x_mat * y_mat).dot(weight)) * weight_sums)
    n2 = (x_mat.dot(weight)) * (y_mat.dot(weight))
    numerator = n1 - n2
    
    denominator_x = (weight_sums * (x_mat ** 2).dot(weight)) - (x_mat.dot(weight))**2
    denominator_y = (weight_sums * (y_mat ** 2).dot(weight)) - (y_mat.dot(weight))**2
    denominator = (denominator_x * denominator_y)
    
    # numpy sum is unstable below 1e-6?
    denominator[denominator < 1e-6] = 0
    denominator = denominator ** 0.5
    
    zeros = np.zeros(numerator.shape)
    local_corrs = np.divide(numerator, denominator, out=zeros, where=denominator!=0)
    
    # fix numpy imprecision TODO related to numba rounding issue?
    local_corrs = np.clip(local_corrs, -1, 1, out=local_corrs)
    
    return local_corrs

In [128]:
masked_pc = _vectorized_correlations(x_mat.T.A, y_mat.T.A, dist, method='pearson')

In [129]:
masked_pc = np.nan_to_num(masked_pc, nan=0)

In [131]:
np.max(masked_pc)

1.0

In [132]:
masked_pc[140, 2720]

1.0

In [133]:
np.min(masked_pc)

-1.0

In [135]:
from wcorr import WeightedCorr

In [136]:
x = x_mat.A[3446,:]

In [137]:
y = y_mat.A[3446,:]

In [138]:
w = weight[:, 5]

In [139]:
my_data = pd.DataFrame({'x': x, 'y': y, 'w': w})

In [140]:
WeightedCorr(xyw=my_data[['x', 'y', 'w']])(method='pearson')

0.0926351278505682

In [141]:
masked_pc[3446, 5]

0.09263724088668823

In [None]:
# transpose
x_mat, y_mat = x_mat.A, y_mat.A

weight = dist.A.T
weight_sums = np.sum(weight, axis = 0).flatten()

In [None]:


# standard pearson
n1 = (((x_mat * y_mat) @ weight) * weight_sums)
n2 = (x_mat @ weight) * (y_mat @ weight)
numerator = n1 - n2

denominator_x = (weight_sums * ((x_mat ** 2) @ weight)) - (x_mat @ weight)**2
denominator_y = (weight_sums * ((y_mat ** 2) @ weight)) - (y_mat @ weight)**2
denominator = (denominator_x * denominator_y)**0.5

zeros = np.zeros(numerator.shape)
local_corrs = np.divide(numerator, denominator, out=zeros, where=denominator!=0)


In [None]:
from matplotlib.pyplot import hist

In [None]:
hist(weight_sums)

In [None]:
import squidpy as sq

In [None]:
# load the pre-processed dataset
img = sq.datasets.visium_hne_image()
adata = sq.datasets.visium_hne_adata()

In [None]:
li.method.sp.get_spatial_proximity(adata=adata, parameter=100, bypass_diagonal=True, cutoff=0.1)

In [None]:
adata.obsm["proximity"].sum(axis=0)

In [None]:
hist()

In [None]:
from scipy.stats import rankdata

In [None]:
masked_pc[np.isnan(masked_pc)] = 0

In [None]:
np.where(np.min(masked_pc) == masked_pc)

In [None]:
from matplotlib.pyplot import hist

In [None]:
dist.shape

In [None]:
masked_pc.shape

In [None]:
np.zeros_like(denominator)

In [None]:
hist(masked_pc[:,908])

In [None]:
# n / sum(W) for Moran's I
norm_factor = mdata.obsm['proximity'].shape[0] / mdata.obsm['proximity'].sum()
dist = csr_matrix(norm_factor * mdata.obsm['proximity'])

In [None]:
###  -> bivar metrics (later spatiadm)



In [None]:
# we use the same gene expression matrix for both x and y
xy_stats['global_r'], xy_stats['global_pvals'] = \
    _global_spatialdm(x_mat=progeny.X,
                      y_mat=dorothea.X,
                      x_pos=x_pos,
                      y_pos=y_pos,
                      xy_dataframe=xy_stats,
                      dist=dist,
                      seed=0,
                      n_perm=1000,
                      pvalue_method="analytical",
                      positive_only=False,
                      x_key = 'x_entity',
                      y_key = 'y_entity'
                      )
local_r, local_pvals = _local_spatialdm(x_mat=progeny.X,
                                        y_mat=dorothea.X,
                                        x_pos=x_pos,
                                        y_pos=y_pos,
                                        xy_dataframe=xy_stats,
                                        dist=dist,  # TODO msq?
                                        seed=0,
                                        n_perm=1000,
                                        pvalue_method="analytical",
                                        positive_only=False,
                                        x_key = 'x_entity',
                                        y_key = 'y_entity'
                                        )

In [None]:
# convert to dataframes
local_r = _local_to_dataframe(array=local_r,
                              idx=adata.obs.index,
                              columns=xy_res.interaction)
local_pvals = _local_to_dataframe(array=local_pvals,
                                  idx=adata.obs.index,
                                  columns=xy_res.interaction)

In [None]:
adata.uns['global_res'] = xy_res
adata.obsm['local_r'] = local_r
adata.obsm['local_pvals'] = local_pvals

In [None]:
xy_res.sort_values(by='global_r', ascending=False).head(10)

In [None]:
local_pvals = li.ut.obsm_to_adata(adata, obsm_key='local_pvals')
local_rs = li.ut.obsm_to_adata(adata, obsm_key='local_r')

In [None]:
sc.pl.spatial(local_rs, color=['JAK-STAT&STAT2'], cmap='coolwarm', vmax=5)

In [None]:
sc.pl.spatial(local_pvals, color=['JAK-STAT&STAT2'], cmap='coolwarm_r')

In [None]:
sc.pl.spatial(progeny, color=['JAK-STAT'], use_raw=False, cmap='coolwarm', vmax=5)

In [None]:
sc.pl.spatial(dorothea, color=['STAT2'], use_raw=False, cmap='coolwarm', vmax=5)

In [None]:
adata