# Setup and Preprocessing

In [2]:
import numpy as np
import pandas as pd
import hotspot
import matplotlib.pyplot as plt
import matplotlib.colors
import seaborn as sns
import mplscience

from scipy.io import mmread
from scipy.sparse import csr_matrix

import scanpy as sc

In [3]:
counts_raw = mmread("./../data/GSM3302829_embryo3_SeuratNorm.mtx.gz")
counts_raw = csr_matrix(counts_raw)
barcodes = pd.read_table("./../data/GSM3302829_embryo3_SeuratBarcodes.tsv.gz", header=None)[0]
barcodes = [x+'-1' for x in barcodes] # to match the newick file
genes = pd.read_table("./../data/GSM3302829_embryo3_SeuratGenes.tsv.gz", header=None)[0]

In [6]:
# Load the tree and enumerate the leaves
from ete3 import Tree
tree = Tree("./../data/0726_E2-2_tree_greedy_priors.processed.txt", format=1)
leaves = set()
for tn in tree.traverse('postorder'):
    if tn.is_leaf():
        leaves.add(tn.name)
len(leaves)

1756

In [7]:
# Subset the count matrix to only the cells where the lineage was recoverable
is_valid = [x in leaves for x in barcodes]
is_valid_indices = np.nonzero(is_valid)[0]
valid_barcodes = [barcodes[i] for i in is_valid_indices]

In [8]:
adata = sc.AnnData(counts_raw.transpose())
adata.obs_names = barcodes
adata.var_names = genes

adata = adata[valid_barcodes]
sc.pp.filter_genes(adata, min_cells=10)

  adata = sc.AnnData(counts_raw.transpose())
  adata.var['n_cells'] = number


# Using HotSpot to get the pairwise relationships

In [9]:
# Create the Hotspot object and the neighborhood graph

hs = hotspot.Hotspot(adata, model='normal', tree=tree)

# TODO: Replace this with a TE-based calculation?
hs.create_knn_graph(
    weighted_graph=False, n_neighbors=30,
)

100%|██████████| 1756/1756 [00:11<00:00, 156.16it/s]


In [11]:
hs_results = hs.compute_autocorrelations(jobs=20)

100%|██████████| 12440/12440 [00:16<00:00, 751.61it/s]


Unnamed: 0_level_0,C,Z,Pval,FDR
Gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Rhox9,0.397749,67.377366,0.0,0.0
Gjb3,0.324565,58.648372,0.0,0.0
Gm9,0.29976,52.619236,0.0,0.0
Utf1,0.305452,51.4779,0.0,0.0
Ldoc1,0.292335,51.123571,0.0,0.0
Rhox6,0.281248,49.578977,0.0,0.0
Tfap2c,0.275263,45.350145,0.0,0.0
Pramef12,0.234209,43.074083,0.0,0.0
S100g,0.222336,39.79837,0.0,0.0
Apom,0.214624,38.925053,0.0,0.0


In [None]:
hs_results.head(15)

# Extracting the Edges of the Hypergraph

In [None]:
#TODO and doesn't make sense for HotSpot gives you.