In [3]:
import numpy as np
import os
import warnings

In [4]:
from tree_inference.utilities import *
from tree_inference.tree_optimizer import TreeOptimizer
from tree_inference.mutation_detection import filter_mutations, likelihood_matrices
from tree_inference.data_generator import DataGenerator

## Comparison using the demo data of DENDRO

In [3]:
ref_raw = np.loadtxt('dendro_demo_ref.txt')
alt_raw = np.loadtxt('dendro_demo_alt.txt')
ref_raw.shape

(130, 200)

In [4]:
ref, alt, gt1, gt2 = filter_mutations(ref_raw, alt_raw, method = 'threshold', t = 0.5)
likelihoods1, likelihoods2 = likelihood_matrices(ref, alt, gt1, gt2)
ref.shape

(130, 58)

In [5]:
optimizer = TreeOptimizer()
optimizer.fit(likelihoods1, likelihoods2, reversible=True)

In [6]:
optimizer.optimize()

[Cell Tree Space] convergence after 11605 steps and 344 move(s).
[Mutation Tree Space] convergence after 3888 steps and 24 move(s).
[Cell Tree Space] convergence after 650 steps and 0 move(s).
[Mutation Tree Space] convergence after 580 steps and 0 move(s).
[Cell Tree Space] convergence after 650 steps and 0 move(s).
[Mutation Tree Space] convergence after 580 steps and 0 move(s).


In [7]:
import graphviz

labels = np.loadtxt('dendro_demo_labels.txt', dtype=int) - 1
color_map = ['red', 'green', 'blue']
colors = [color_map[label] for label in labels]

g = graphviz.Graph(filename='tree.svg', engine='neato')
for node in optimizer.ct.nodes:
    if node.isleaf:
        g.node(str(node.ID), label='', shape='circle', style='filled', color=colors[node.ID])
    else:
        g.node(str(node.ID), label='', shape='point', style='filled', color='gray')
    
    if not node.isroot:
        g.edge(str(node.parent.ID), str(node.ID))

g.save('./figures/ct_graphviz.txt')

'./figures/ct_graphviz.txt'

## Generate data for comparison

In [7]:
def generate_comparison_data(n_cells=130, n_loci=200, size=100, path='./comparison_data/'):
    #np.random.seed(0)
    if not os.path.exists(path):
        warnings.warn('Target path does not exist and will be created.', UserWarning)
        os.makedirs(path)
    
    for i in range(size):
        generator = DataGenerator(n_cells, n_loci)
        generator.random_tree()
        generator.random_mutations(mut_prop=0.75, genotype_freq=[1/3, 1/3, 1/3])
        ref, alt = generator.generate_reads()
        
        np.savetxt(os.path.join(path, 'ref_%i' % i), ref.T)
        np.savetxt(os.path.join(path, 'alt_%i' % i), alt.T)
        np.savetxt(os.path.join(path, 'parent_vec_%i' % i), generator.tree.parent_vec)
        np.savetxt(os.path.join(path, 'mut_indicator_%i' % i), generator.mut_indicator.T)

In [8]:
generate_comparison_data(size=1)