In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import phate, scprep
import matplotlib.pyplot as plt
import leidenalg
from scipy.spatial.distance import cdist

In [34]:
data = sc.read_h5ad('/home/av622/palmer_scratch/all_cd8_t_cells.h5ad').to_df()
data, data_hvgs = scprep.select.highly_variable_genes(data, data.columns, percentile=90)
genes = data_hvgs.to_list() + ['uniform']

In [37]:
siVAE_results = np.load('results/siVAE_gene_embedding_tcells_hvg_loc.npz', allow_pickle=True)

In [57]:
gene_phate_op = phate.PHATE(random_state=0)
data_phate = gene_phate_op.fit_transform(siVAE_results['signal_embedding'][:1416])

Calculating PHATE...
  Running PHATE on 1416 observations and 128 variables.
  Calculating graph and diffusion operator...
    Calculating PCA...
    Calculated PCA in 0.02 seconds.
    Calculating KNN search...
    Calculated KNN search in 0.10 seconds.
    Calculating affinities...
  Calculated graph and diffusion operator in 0.13 seconds.
  Calculating optimal t...
    Automatically selected t = 23
  Calculated optimal t in 0.80 seconds.
  Calculating diffusion potential...
  Calculated diffusion potential in 0.33 seconds.
  Calculating metric MDS...
  Calculated metric MDS in 2.20 seconds.
Calculated PHATE in 3.48 seconds.


In [61]:
gene_clusters = np.array(leidenalg.find_partition(gene_phate_op.graph.to_igraph(), 
                                                  leidenalg.ModularityVertexPartition,
                                                  seed=0).membership)
gene_clusters = pd.Series(gene_clusters, index=data_hvgs.values)

In [45]:
localization_score = cdist(siVAE_results['signal_embedding'][1416],
                                                             siVAE_results['signal_embedding'][:1416]).reshape(-1,)

In [63]:
np.savez('./results/siVAE_gene_embedding.npz', 
         signal_embedding=siVAE_results['signal_embedding'][:1416],
         localization_score=localization_score,
         genes=data_hvgs.values, clusters=gene_clusters.values)