In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os, sys
from tqdm import tqdm

In [None]:
sys.path.append('../')
from tree_inference.utilities import path_len_dist
from tree_inference.mutation_detection import filter_mutations, likelihood_matrices
from tree_inference.tree import CellTree, MutationTree
from tree_inference.tree_optimizer import TreeOptimizer

In [None]:
def make_boxplot(ax, data, colors = None, positions = None):
    ax.set_facecolor('lightgray')
    bplot = ax.boxplot(data, patch_artist = True, positions = positions)
    if colors is None:
        colors = 'lightblue'
    if type(colors) == str:
        colors = [colors] * data.shape[0]
    for patch, color in zip(bplot['boxes'], colors):
        patch.set_facecolor(color)
    ax.yaxis.grid(color = 'white')
    return bplot

In [None]:
def get_dist_data(n_tests=100):
    ct_true = CellTree(130)
    ct_dendro = CellTree(130)
    ct_inferred = CellTree(130)
    ct_random = CellTree(130)
    ct_random.randomize()

    pair_dist = np.empty((n_tests, 4))

    for i in tqdm(range(n_tests)):
        ct_true.parent_vec = np.loadtxt(f'./comparison_data/parent_vec_{i}.txt', dtype=int)
        ct_dendro.parent_vec = np.loadtxt(f'./comparison_data/dendro_parent_vec_{i}.txt', dtype=int)
        ct_inferred.parent_vec = np.loadtxt(f'./comparison_data/inferred_parent_vec_{i}.txt', dtype=int)

        pair_dist[i,0] = path_len_dist(ct_true, ct_inferred)
        pair_dist[i,1] = path_len_dist(ct_true, ct_dendro)
        pair_dist[i,2] = path_len_dist(ct_true, ct_random)
        pair_dist[i,3] = path_len_dist(ct_inferred, ct_dendro)
    
    return pair_dist

In [None]:
pair_dist = get_dist_data()
np.savetxt('./pair_dist.txt', pair_dist)

In [None]:
pair_dist = np.loadtxt('./figures/pair_dist.txt')

fig, ax = plt.subplots(figsize=(6,6))
bplot = make_boxplot(ax, pair_dist)
ax.set_xticklabels(['true-inferred', 'true-dendro', 'true-random', 'inferred-dendro'])

fig.savefig('./figures/tree_distances.pdf')

In [None]:
def get_likelihood_diff(n_tests=100):
    ct_true = CellTree(130)
    ct_dendro = CellTree(130)
    ct_inferred = CellTree(130)
    ct_random = CellTree(130)
    ct_random.randomize()

    optimizer = TreeOptimizer()

    likelihood_diff = np.empty((n_tests, 3))

    for i in tqdm(range(n_tests)):
        ct_true.parent_vec = np.loadtxt(f'./comparison_data/parent_vec_{i}.txt', dtype=int)
        ct_dendro.parent_vec = np.loadtxt(f'./comparison_data/dendro_parent_vec_{i}.txt', dtype=int)
        ct_inferred.parent_vec = np.loadtxt(f'./comparison_data/inferred_parent_vec_{i}.txt', dtype=int)
        
        ref = np.loadtxt(os.path.join(f'./comparison_data/ref_{i}.txt'))
        alt = np.loadtxt(os.path.join(f'./comparison_data/alt_{i}.txt'))
        
        ref, alt, gt1, gt2 = filter_mutations(ref, alt, method='threshold', t=0.5)
        likelihoods1, likelihoods2 = likelihood_matrices(ref, alt, gt1, gt2)
        optimizer.fit(likelihoods1, likelihoods2)

        optimizer.ct = ct_true
        optimizer.update_ct()
        true_likelihood = optimizer.ct_mean_likelihood

        optimizer.ct = ct_inferred
        optimizer.update_ct()
        likelihood_diff[i,0] = optimizer.ct_mean_likelihood - true_likelihood

        optimizer.ct = ct_dendro
        optimizer.update_ct()
        likelihood_diff[i,1] = optimizer.ct_mean_likelihood - true_likelihood

        optimizer.ct = ct_random
        optimizer.update_ct()
        likelihood_diff[i,2] = optimizer.ct_mean_likelihood - true_likelihood
    
    return likelihood_diff

In [None]:
likelihood_diff = get_likelihood_diff(100)
np.savetxt('./figures/likelihood_diff.txt', likelihood_diff)

In [None]:
np.savetxt('./likelihood_diff.txt', likelihood_diff)

In [None]:
likelihood_diff = np.loadtxt('./likelihood_diff.txt')

fig, ax = plt.subplots(figsize=(6,6))
bplot = make_boxplot(ax, likelihood_diff)
ax.set_xticklabels(['inferred', 'dendro', 'random'])

fig.savefig('./figures/likelihood_diff.pdf')