In [8]:
import random
from pathlib import Path

import anndata as ad
import igraph as ig
import leidenalg as la
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import scipy as sp
import seaborn as sns
import squidpy as sq
from multispaeti import MultispatiPCA
from scipy.sparse import find
from sklearn.metrics import adjusted_rand_score

from leiden_utils import *

In [2]:
data_dir = Path("./data/LIBD_DLPFC")
result_dir = Path("./results/LIBD_DLPFC")

seed = 42

In [3]:
metadata = pd.read_table(
    data_dir / "samples.tsv", usecols=["directory", "n_clusters"]
).set_index("directory")

# Results

## Impact of layer weight ratio

In [4]:
sample = metadata.loc["Br8100_151673", :]

In [5]:
n_genes = 3_000
n_pcs = 30

out_dir = result_dir / "weightratio_impact" / sample.name

adata = get_anndata(data_dir / sample.name)
preprocess_adata(adata, genes=n_genes, n_pcs=n_pcs, seed=seed)

leiden_df, res = run_leiden(adata, sample.n_clusters, seed=seed)

out_dir.mkdir(parents=True, exist_ok=True)
leiden_df.to_csv(out_dir / "leiden.tsv", sep="\t", index_label="")

sq.gr.spatial_neighbors(adata, coord_type="grid", n_neighs=6)

for weight_ratio in [0, 0.2, 0.4, 0.6, 0.8, 1, 5, 10]:
    multiplex_df, res_multi = run_leiden_multiplex(
        adata,
        sample.n_clusters,
        directed=(False, False),
        scale_graph_weights=(False, False),
        layer_weights=(1, weight_ratio),
        latent_partition_kwargs={"resolution_parameter": res},
        seed=seed,
    )
    multiplex_df.to_csv(
        out_dir / f"spatial_leiden_w{weight_ratio:.1f}.tsv", sep="\t", index_label=""
    )

  from .autonotebook import tqdm as notebook_tqdm

 To achieve the future defaults please pass: flavor="igraph" and n_iterations=2.  directed must also be False to work with igraph's implementation.
  sc.tl.leiden(adata, resolution=res, random_state=seed, **kwargs)


## Cluster all samples

### HVGs

In [6]:
n_pcs = 30
n_genes = 3_000
weight_spatial = 0.8


for name, sample in metadata.iterrows():
    print("Processing " + name)

    sample_dir = data_dir / name
    out_dir = result_dir / name

    adata = get_anndata(sample_dir)
    preprocess_adata(adata, genes=n_genes, n_pcs=n_pcs, seed=seed)

    label_leiden, res = run_leiden(adata, sample.n_clusters, seed=seed)

    # Multiplex
    sq.gr.spatial_neighbors(adata, coord_type="grid", n_neighs=6)
    label_leiden_multi, _ = run_leiden_multiplex(
        adata,
        sample.n_clusters,
        directed=(False, False),
        scale_graph_weights=(False, False),
        layer_weights=(1, weight_spatial),
        latent_partition_kwargs={"resolution_parameter": res},
        seed=seed,
    )

    ## Write output
    out_dir.mkdir(parents=True, exist_ok=True)
    label_leiden.to_csv(out_dir / "leiden.tsv", sep="\t", index_label="")
    label_leiden_multi.to_csv(out_dir / "spatial_leiden.tsv", sep="\t", index_label="")

Processing Br5292_151507
Processing Br5292_151508
Processing Br5292_151509
Processing Br5292_151510
Processing Br5595_151669
Processing Br5595_151670
Processing Br5595_151671
Processing Br5595_151672
Processing Br8100_151673
Processing Br8100_151674
Processing Br8100_151675
Processing Br8100_151676


### SVGs

In [9]:
n_pcs = 30
n_genes = 3_000
weight_spatial = 0.8

for name, sample in metadata.iterrows():
    print("Processing " + name)

    sample_dir = data_dir / name
    out_dir = result_dir / name

    adata = get_anndata(sample_dir)
    preprocess_adata(adata, genes=n_genes, n_pcs=n_pcs, seed=seed)
    sq.gr.spatial_neighbors(adata, coord_type="grid", n_neighs=6)
    sq.gr.spatial_autocorr(adata, genes=adata.var_names, mode="moran", seed=seed)
    genes = adata.uns["moranI"].nlargest(n_genes, columns="I", keep="all").index
    adata.obsm["X_svg_pca"] = sc.tl.pca(
        adata[:, genes].X, n_comps=n_pcs, random_state=seed
    )
    sc.pp.neighbors(adata, use_rep="X_svg_pca", random_state=seed)

    label_leiden, res = run_leiden(adata, sample.n_clusters, seed=seed)

    # Multiplex
    sq.gr.spatial_neighbors(adata, coord_type="grid", n_neighs=6)
    label_leiden_multi, _ = run_leiden_multiplex(
        adata,
        sample.n_clusters,
        directed=(False, False),
        scale_graph_weights=(False, False),
        layer_weights=(1, weight_spatial),
        latent_partition_kwargs={"resolution_parameter": res},
        seed=seed,
    )

    # Multiplex and MULTISPATI-PCA
    adata.obsm["X_mspca"] = MultispatiPCA(
        n_pcs, connectivity=adata.obsp["connectivities"]
    ).fit_transform(adata[:, genes].X)
    sc.pp.neighbors(adata, use_rep="X_mspca", random_state=seed)

    label_leiden_msPCA, res = run_leiden(adata, sample.n_clusters, seed=seed)
    label_leiden_multi_msPCA, _ = run_leiden_multiplex(
        adata,
        sample.n_clusters,
        directed=(False, False),
        scale_graph_weights=(False, False),
        layer_weights=(1, weight_spatial),
        latent_partition_kwargs={"resolution_parameter": res},
        seed=seed,
    )

    ## Write output
    out_dir.mkdir(parents=True, exist_ok=True)
    label_leiden.to_csv(out_dir / "leiden_svg.tsv", sep="\t", index_label="")
    label_leiden_multi.to_csv(
        out_dir / "spatial_leiden_svg.tsv", sep="\t", index_label=""
    )
    label_leiden_msPCA.to_csv(
        out_dir / "leiden_svg_multispati.tsv", sep="\t", index_label=""
    )
    label_leiden_multi_msPCA.to_csv(
        out_dir / "spatial_leiden_svg_multispati.tsv", sep="\t", index_label=""
    )

Processing Br5292_151507
Processing Br5292_151508
Processing Br5292_151509
Processing Br5292_151510
Processing Br5595_151669
Processing Br5595_151670
Processing Br5595_151671
Processing Br5595_151672
Processing Br8100_151673
Processing Br8100_151674
Processing Br8100_151675
Processing Br8100_151676
