In [6]:
import leidenalg
import scanpy, phate
import numpy as np
import scprep
import pandas as pd
import gspa
from run.run_eigenscore import run_eigenscore

In [2]:
acute = scanpy.read_h5ad('data/acute_tcells.h5ad')
chronic = scanpy.read_h5ad('data/chronic_tcells.h5ad')
adata = scanpy.concat((acute,chronic))
adata.obs['batch'] = adata.obs['batch'].astype('category')

In [3]:
phate_op = phate.PHATE(random_state=42, n_jobs=-1, knn=30)
adata.obsm['X_phate'] = phate_op.fit_transform(adata.to_df())

Calculating PHATE...
  Running PHATE on 39704 observations and 14152 variables.
  Calculating graph and diffusion operator...
    Calculating PCA...
    Calculated PCA in 16.43 seconds.
    Calculating KNN search...
    Calculated KNN search in 116.81 seconds.
    Calculating affinities...
    Calculated affinities in 11.53 seconds.
  Calculated graph and diffusion operator in 145.95 seconds.
  Calculating landmark operator...
    Calculating SVD...
    Calculated SVD in 14.66 seconds.
    Calculating KMeans...
    Calculated KMeans in 4.25 seconds.
  Calculated landmark operator in 20.77 seconds.
  Calculating optimal t...
    Automatically selected t = 20
  Calculated optimal t in 1.67 seconds.
  Calculating diffusion potential...
  Calculated diffusion potential in 0.49 seconds.
  Calculating metric MDS...
  Calculated metric MDS in 3.92 seconds.
Calculated PHATE in 172.82 seconds.


In [4]:
data, data_hvgs = scprep.select.highly_variable_genes(adata.to_df(), adata.var_names, percentile=90)
data_hvg = data[data_hvgs]
data_hvg = data_hvg / np.linalg.norm(data_hvg, axis=0)

In [5]:
uniform_signal = np.ones((1, adata.n_obs))
uniform_signal = uniform_signal / np.linalg.norm(uniform_signal, axis=1).reshape(-1,1)

In [None]:
results = {}
signal_representation = run_eigenscore(phate_op.graph.to_pygsp(), data_hvg.T, args=None)
signal_reduced = gspa.embedding.svd(signal_representation)



In [None]:
results['signal_embedding'] = gspa.embedding.run_ae(signal_reduced)
results['localization_score'] = np.linalg.norm(signal_representation, axis=1)

### cluster analysis

In [None]:
gene_phate_op = phate.PHATE(random_state=0)
data_phate = gene_phate_op.fit_transform(results['signal_embedding'])

In [None]:
gene_clusters = np.array(leidenalg.find_partition(gene_phate_op.graph.to_igraph(), 
                                                  leidenalg.ModularityVertexPartition,
                                                  seed=0).membership)
gene_clusters = pd.Series(gene_clusters, index=data_hvgs.values)

In [None]:
np.savez('./results/Eigenscore_gene_embedding.npz', 
         signal_embedding=results['signal_embedding'],
         localization_score=results['localization_score'],
         genes=data_hvgs.values, clusters=gene_clusters.values)