Used to run the ABC analysis on VAFs using the `SubClonalSelection.jl` package, which is called in a spawned terminal.

To run it, a Julia sysimage file has to be generated first, which includes the `SubClonalSelection.jl` package and all its dependencies. Further information on how to create a sysimage can be found [here](https://docs.julialang.org/en/v1/devdocs/sysimg/).

In [None]:
import numpy as np
import os
from concurrent.futures import ThreadPoolExecutor
import covid_utilities as cu

In [None]:
base_path = cu.DATA_PATH + r"\vafs\{}.vaf"
path_to_sysimage = cu.COVID_PATH.replace("\\", r"\\") + r"\\SubClonalSelection\\sysimage_runabc.so" # your actual path here, backslashes must be escaped
nthreads = 6

In [None]:
def read_vaf(path):
    with open(path, "r") as f:
        return np.array(list(map(lambda l:float(l), f.readlines())))

In [None]:
def write_vafs(path, array):
    if os.path.exists(path):
        raise FileExistsError("VAF file exists already")
    with open(path, "w") as f:
        f.write("\n".join(map(lambda n:str(n), array)))

---

In [None]:
Minvaf = 0.01
Nmax = 2000

In [None]:
def prepare_file(filename):
    if f"filtered{Minvaf}" in filename:
        filtered_filename = ".".join(filename.split(".")[:-1])
        read_depth = int(filtered_filename.split("_")[-2])
        filtered_vafs = read_vaf(base_path.format(filtered_filename))
        n_mutations = len(filtered_vafs)
    elif "filtered" in filename: # sort out files which are filtered, but with different threshold
        return
    else: 
        base_name = ".".join(filename.split(".")[:-1])
        read_depth = int(base_name.split("_")[-1])
        vafs = read_vaf(base_path.format(base_name))
        filtered_vafs = vafs[vafs>=Minvaf]
        n_mutations = len(filtered_vafs)
        filtered_filename = f"{base_name}_{n_mutations}-filtered{Minvaf}"
    if not os.path.exists(base_path.format(filtered_filename)):
        write_vafs(base_path.format(filtered_filename), filtered_vafs)
    if n_mutations >= 100 and not os.path.exists(f"D:/ncbi_dataset/ncbi_dataset/data/ABC_results/{filtered_filename}_Nmax{Nmax}"):
        do_ABC(filtered_filename, read_depth, minvaf = Minvaf, Nmax = Nmax)

In [None]:
def do_ABC(input_file, read_depth, minvaf = 0.01, Nmax = 2000, birth_rate = None): # nparticles = 500, d = 0
    if birth_rate is not None:
        out_file = f"{input_file}_Nmax{Nmax}_b-log({np.exp(birth_rate):.1f})"
    else:
        birth_rate = np.log(3)
        out_file = f"{input_file}_Nmax{Nmax}"
    function_call = f"fitABCmodels(\\\"{cu.DATA_PATH.replace("\\", "/")}/vafs/{input_file}.vaf\\\", \\\"{out_file}\\\", read_depth = {read_depth}, minvaf = {minvaf}, fmax = 1.0, maxiterations = 2*10^5, Nmax = {Nmax}, resultsdirectory = \\\"{cu.DATA_PATH.replace("\\", "/")}/ABC_results\\\", progress = true, verbose = true, save = true, Nmaxinf = 10^6, ploidy = 1, b = {birth_rate}, convergence = 0.005, mincellularity = 0.95, maxcellularity = 1.05)"
    statements = ["using SubClonalSelection", "using Random", "Random.seed!(123)", function_call]
    call = f'julia -J "{path_to_sysimage}" -e "{"; ".join(statements)}"'
    returncode = os.system(f'start /wait cmd /c {call}')
    if returncode != 0:
        print(returncode, input_file)

In [None]:
files = os.listdir(cu.DATA_PATH + r"\vafs")
for mv in [0.01, 0.001, 0.0001]:
    Minvaf = mv
    with ThreadPoolExecutor(max_workers = nthreads) as executor:
        threads = executor.map(prepare_file, files)

---

Run ABC on specific files with specialised parameters

In [None]:
def prepare_file_b(filename, birth_rate, minVAF = Minvaf):
    if f"filtered{minVAF}" in filename:
        filtered_filename = ".".join(filename.split(".")[:-1])
        read_depth = int(filtered_filename.split("_")[-2])
        filtered_vafs = read_vaf(base_path.format(filtered_filename))
        n_mutations = len(filtered_vafs)
    elif "filtered" in filename: # sort out files which are filtered, but with different threshold
        return
    else: 
        base_name = ".".join(filename.split(".")[:-1])
        read_depth = int(base_name.split("_")[-1])
        vafs = read_vaf(base_path.format(base_name))
        filtered_vafs = vafs[vafs>=minVAF]
        n_mutations = len(filtered_vafs)
        filtered_filename = f"{base_name}_{n_mutations}-filtered{minVAF}"
    if not os.path.exists(base_path.format(filtered_filename)):
        write_vafs(base_path.format(filtered_filename), filtered_vafs)
    if n_mutations >= 100 and not os.path.exists(f"{cu.DATA_PATH.replace("\\", "/")}/ABC_results/{filtered_filename}_Nmax{Nmax}_b-log({np.exp(birth_rate):.1f})"):
        do_ABC(filtered_filename, read_depth, minvaf = minVAF, Nmax = Nmax, birth_rate = birth_rate)

In [None]:
# run the ABC with birth rate adapted to R0

In [None]:
files = [
    # alpha
    'm_United-Kingdom_2021-1_42335_150-filtered0.01.vaf',
    'm_United-Kingdom_2021-2_19586_150-filtered0.01.vaf',
    'm_United-Kingdom_2021-3_722_187-filtered0.01.vaf',
    'm_United-Kingdom_2021-4_1090_231-filtered0.01.vaf',
    # delta
    'm_United-Kingdom_2021-5_3445_240-filtered0.01.vaf',
    'm_United-Kingdom_2021-6_4438_131-filtered0.01.vaf',
    'm_United-Kingdom_2021-7_6862_101-filtered0.01.vaf',
    'm_United-Kingdom_2021-9_4463_115-filtered0.01.vaf',
    'm_United-Kingdom_2021-10_4817_151-filtered0.01.vaf',
    'm_United-Kingdom_2021-11_3674_167-filtered0.01.vaf'
]
birth_rates = np.log([
    # alpha
    4.5,
    4.5,
    4.5,
    4.5,
    # delta
    7,
    7,
    7,
    7,
    7,
    7
])
Minvaf = 0.01
with ThreadPoolExecutor(max_workers = 6) as executor:
    threads = executor.map(prepare_file_b, files, birth_rates)

In [None]:
# run ABC with birth rate adapted to Re (will fail, since it is smaller than 1)

In [None]:
files = [
    'm_United-Kingdom_2020-6_4774_115-filtered0.01.vaf',
    'm_United-Kingdom_2020-7_2157_154-filtered0.01.vaf',
    # EU1
    'm_United-Kingdom_2020-8_5185_223-filtered0.01.vaf',
    'm_United-Kingdom_2020-9_9567_233-filtered0.01.vaf',
    'm_United-Kingdom_2020-10_16288_200-filtered0.01.vaf',
    'm_United-Kingdom_2020-11_20799_237-filtered0.01.vaf',
    # alpha
    'm_United-Kingdom_2020-12_20282_141-filtered0.01.vaf',
    'm_United-Kingdom_2021-1_42335_150-filtered0.01.vaf',
    'm_United-Kingdom_2021-2_19586_150-filtered0.01.vaf',
    'm_United-Kingdom_2021-3_722_187-filtered0.01.vaf',
    'm_United-Kingdom_2021-4_1090_231-filtered0.01.vaf',
    # delta
    'm_United-Kingdom_2021-5_3445_240-filtered0.01.vaf',
    'm_United-Kingdom_2021-6_4438_131-filtered0.01.vaf',
    'm_United-Kingdom_2021-7_6862_101-filtered0.01.vaf',
    'm_United-Kingdom_2021-9_4463_115-filtered0.01.vaf',
    'm_United-Kingdom_2021-10_4817_151-filtered0.01.vaf',
    'm_United-Kingdom_2021-11_3674_167-filtered0.01.vaf'
]
birth_rates = np.log([
    0.8,
    0.8,
    # EU1
    0.9,
    1.1,
    1.4,
    1.1,
    # alpha
    1.0,
    1.1,
    0.8,
    0.8,
    0.9,
    # delta
    0.9,
    1.2,
    1.3,
    1.0,
    1.0,
    1.0
])
# with ThreadPoolExecutor(max_workers = 6) as executor:
#     threads = executor.map(prepare_file_b, files, birth_rates)