In [1]:
import leidenalg
import scanpy, phate
import numpy as np
import scprep
import pandas as pd
import gspa

2024-03-21 17:08:59.398412: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-21 17:08:59.400360: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-21 17:08:59.505938: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-21 17:08:59.518186: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
acute = scanpy.read_h5ad('data/acute_tcells.h5ad')
chronic = scanpy.read_h5ad('data/chronic_tcells.h5ad')
adata = scanpy.concat((acute,chronic))
adata.obs['batch'] = adata.obs['batch'].astype('category')

In [3]:
phate_op = phate.PHATE(random_state=42, n_jobs=-1, knn=30)
adata.obsm['X_phate'] = phate_op.fit_transform(adata.to_df())

Calculating PHATE...
  Running PHATE on 39704 observations and 14152 variables.
  Calculating graph and diffusion operator...
    Calculating PCA...
    Calculated PCA in 54.86 seconds.
    Calculating KNN search...
    Calculated KNN search in 302.97 seconds.
    Calculating affinities...
    Calculated affinities in 37.12 seconds.
  Calculated graph and diffusion operator in 398.18 seconds.
  Calculating landmark operator...
    Calculating SVD...
    Calculated SVD in 40.31 seconds.
    Calculating KMeans...
    Calculated KMeans in 21.40 seconds.
  Calculated landmark operator in 66.39 seconds.
  Calculating optimal t...
    Automatically selected t = 20
  Calculated optimal t in 6.45 seconds.
  Calculating diffusion potential...
  Calculated diffusion potential in 0.91 seconds.
  Calculating metric MDS...
  Calculated metric MDS in 14.97 seconds.
Calculated PHATE in 486.96 seconds.


In [4]:
data, data_hvgs = scprep.select.highly_variable_genes(adata.to_df(), adata.var_names, percentile=90)
data_hvg = data[data_hvgs]

In [None]:
results = {}

gspa_op = gspa.GSPA(graph=phate_op.graph, J=5, qr_decompose=False)
gspa_op.build_diffusion_operator()
gspa_op.build_wavelet_dictionary()

# Embed gene signals from wavelet dictionary
gene_signals = data_hvg.T # embed highly variable genes
gene_ae, gene_pc = gspa_op.get_gene_embeddings(gene_signals)
results['signal_embedding'] = gene_ae
results['localization_score'] = gspa_op.calculate_localization()

### cluster analysis

In [None]:
gene_phate_op = phate.PHATE(random_state=0)
data_phate = gene_phate_op.fit_transform(results['signal_embedding'])

In [None]:
gene_clusters = np.array(leidenalg.find_partition(gene_phate_op.graph.to_igraph(), 
                                                  leidenalg.ModularityVertexPartition,
                                                  seed=0).membership)
gene_clusters = pd.Series(gene_clusters, index=data_hvgs.values)

In [None]:
np.savez('./results/GSPA_gene_embedding.npz', 
         signal_embedding=results['signal_embedding'],
         localization_score=results['localization_score'],
         genes=data_hvgs.values, clusters=gene_clusters.values)