In [None]:
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
#import seaborn as sns

In [None]:
from random import random
from tqdm.notebook import tqdm

In [None]:
import time
import multiprocess as mp

### Read data

### Mutation detection

In [None]:
from mutation_detection import *
from LOH_detection import *

In [None]:
start_time = time.time()
posteriors = get_posteriors(ref, alt, n_threads = 6)
print('Runtime:', (time.time() - start_time) / 60, 'min')
pd.DataFrame(posteriors, columns = ['R', 'H', 'A', 'RH', 'HA'], index = df_ref.index).to_csv('./posteriors.csv')

In [None]:
df_posteriors = pd.read_csv('./posteriors.csv', index_col = (0,1))
posteriors = df_posteriors.to_numpy()

In [None]:
mutated_idx, mut_type_idx = np.where(posteriors[:,3:] > 1 - 1 / posteriors.shape[0])
n_mutated = mutated_idx.size
homos = [['R', 'A'][i] for i in mut_type_idx]

In [None]:
corr_posteriors = get_corr_posteriors(ref[mutated_idx,:], alt[mutated_idx,:], homos, corr_prior = 1/2) 

In [None]:
pd.DataFrame(data = {'correlation posterior': np.concatenate((corr_posteriors, [0])), 'mutation type': ['H' + h for h in homos]}, 
             index = df_posteriors.index[mutated_idx]).to_csv('./corr_posteriors.csv')

### Data generator

In [None]:
from data_generator import *

In [None]:
dg = DataGenerator(100, 200)
dg.random_tree()

In [None]:
ref_raw, alt_raw = dg.generate_reads()

### Tree inference with generated data

In [None]:
from tree_inference import *
from mutation_detection import likelihood_matrices
from utilities import path_len_dist

In [None]:
import matplotlib.pyplot as plt

In [None]:
ref, alt, gt1, gt2 = filter_mutations(ref_raw, alt_raw)

In [None]:
len(gt1)

In [None]:
likelihoods1, likelihoods2 = likelihood_matrices(ref, alt, gt1, gt2)

#### True tree

In [None]:
optz = TreeOptimizer()
optz.fit(likelihoods1, likelihoods2, reversible = True)
optz.ct = dg.tree.copy()
optz.ct.n_mut = optz.n_mut
optz.update_ct()

In [None]:
optz.ct_joint

In [None]:
print('Distance matrix MSE to real tree:', path_len_dist(optz.ct, dg.tree))

In [None]:
optz.mt.fit_structure(optz.ct)
optz.mt_L[:,optz.mt.root.ID] = np.sum(optz.likelihoods1, axis = 1)
optz.update_mt()

In [None]:
optz.mt_joint / likelihoods1.size

#### All mutations reversible

In [None]:
optz = TreeOptimizer()
optz.fit(likelihoods2, likelihoods1, reversible = True)
print('Distance matrix MSE to real tree:', path_len_dist(optz.ct, dg.tree))

In [None]:
optz.optimize(spaces = ['m'])

In [None]:
mean_likelihoods = np.array(optz.likelihood_history) / likelihoods1.size
plt.plot(mean_likelihoods)

In [None]:
print('MSE to real distance matrix:', path_len_dist(optz.ct, dg.tree))
print('Cell tree mean loglikelihood:', optz.ct_joint / likelihoods1.size)
print('Mutation tree mean loglikelihood:', optz.mt_joint / likelihoods1.size)

#### No mutation reversible

In [None]:
optz = TreeOptimizer()
optz.fit(likelihoods1, likelihoods2, reversible = False)
print('Distance matrix MSE to real tree:', path_len_dist(optz.ct, dg.tree))

In [None]:
optz.optimize()

In [None]:
mean_likelihoods = np.array(optz.likelihood_history) / likelihoods1.size
plt.plot(mean_likelihoods)

In [None]:
print('MSE to real distance matrix:', path_len_dist(optz.ct, dg.tree))
print('Cell tree mean loglikelihood:', optz.ct_joint / likelihoods1.size)
print('Mutation tree mean loglikelihood:', optz.mt_joint / likelihoods1.size)

### Other tests

In [1]:
from data_generator import *
from mutation_detection import *

In [2]:
dg = DataGenerator(25, 100)
dg.random_tree()
dg.random_mutations()
ref, alt = dg.generate_reads()