In [2]:
# import argparse

# parser = argparse.ArgumentParser(description='Supply reference fasta and bam file')
# parser.add_argument('ref',
#                     help='reference fasta')
# parser.add_argument('bam',
#                     help='bam file')


# args = parser.parse_args()
# ref_fname = args.ref
# bam_fname = args.bam
ref_fname = 'refchrm.fa'
bam_fname = 'data/bam/in10.bam'
genomes_fname ='311humans.fasta'

In [3]:
import os
from collections import Counter
import pysam
import numpy as np
from tqdm import tqdm
from IPython.display import clear_output
from scipy.special import binom
import scipy.stats as st
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import subprocess
from multiprocess import Pool
%load_ext cython

In [4]:
#Cython part

In [56]:
%%cython -a --compile-args=-O3
# cython: linetrace=True
# cython: binding=True
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
from tqdm import tqdm
import pysam
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from collections import Counter
cimport cython
import numpy as np
from cython.parallel import prange
from libc.math cimport pow
from scipy.special import binom
# from mc_lib.rndm cimport RndmWrapper


def get_num_reads(str bam_fname):
    samfile = pysam.AlignmentFile(bam_fname, "rb" )
    num_reads = 0
    for read in samfile.fetch('chrM'):
        if not read.is_mapped:
            continue
        num_reads += 1
    samfile.close()
    return num_reads

def bam2consensus(
        ref_fname, bam_fname, double ac_threshold=0, double af_threshold=0):

    cdef str consensus = ''
    cdef int max_count, total_cgeount
    cdef str allele
    cdef str max_allele
    for record in SeqIO.parse(ref_fname, "fasta"):
        assert record.id == 'chrM'
        # consensus = "N" * len(record)

    with pysam.AlignmentFile(bam_fname, "rb") as bam:
        allele_counter = Counter()
        for pileup_column in tqdm(bam.pileup(), total=16569, desc = 'consensus dna'):
            assert pileup_column.reference_name == 'chrM'
            pos = pileup_column.reference_pos

            allele_counter.clear()
            for pileup_read in pileup_column.pileups:
                if pileup_read.is_del:
                    allele = "-"
                else:
                    allele = pileup_read.alignment.query_sequence[
                        pileup_read.query_position]
                allele_counter[allele] += 1

            max_allele = "N"
            max_count, total_count = 0, 0
            for allele, count in allele_counter.items():
                if count > max_count:
                    max_count = count
                    max_allele = allele
                total_count += count

            assert max_allele in "ACGTN-"
            if (max_count >= ac_threshold and
                max_count / total_count >= af_threshold):
                consensus += max_allele

    return consensus


@cython.cdivision(True)
@cython.wraparound(False)
@cython.boundscheck(False)
def get_MN(char[:, :] genomes,str bam_fname):
    bam = pysam.AlignmentFile(bam_fname, "rb")
    cdef double[:, :] N, M
    cdef int k, i, j, pos
    cdef double correct, incorrect, P_cor
    cdef str seq
    cdef long num_reads = get_num_reads(bam_fname)
    cdef int num_genomes = genomes.shape[0]
    N = np.zeros((num_reads, num_genomes))
    M = np.zeros((num_reads, num_genomes))
    i = 0
    j = 0
    for read in tqdm(bam.fetch('chrM'), total = bam.count(), desc = 'MN tables'):
        seq = read.query_sequence
        pos = read.reference_start
        qual = read.query_qualities
        # print(qual)
        if not read.is_mapped:
            continue
        for j in range(num_genomes):
            correct = 0
            incorrect = 0
            for k in range(len(seq)):
                if k+pos >= genomes.shape[1]:
                    break
                if chr(genomes[j][k+pos]).upper() == '-':
                    # print('FFFFF')
                    M[i, j] = -1
                    N[i, j] = -1
                if seq[k] == chr(genomes[j][k+pos]).upper():
                    P_cor = 1 - 10**(- qual[k]/10)
                    correct += P_cor
                    incorrect += 1 - P_cor
                    assert(0 <= P_cor <= 1, 'probabilities error')
                else:
                    # print(seq[k], chr(genomes[j][k+pos]).upper())
                    P_cor = (10**(- qual[k]/10))/3
                    incorrect += 1 - P_cor
                    correct += P_cor
                    assert(0 <= P_cor <= 1, 'probabilities error')
            if M[i, j] != -1:
                M[i, j] = correct
                N[i, j] = incorrect
        i += 1
    bam.close()
    return np.array(M, dtype=np.float64), np.array(N, dtype=np.float64)

@cython.cdivision(True)
@cython.wraparound(False)
@cython.boundscheck(False)
def get_mc(double[:, ::1] m, double[:, ::1] n, double eps):
    cdef double[:,::1] mc
    cdef long num_reads = m.shape[0]
    cdef long num_genomes = m.shape[1] 
    cdef long j
    mc = np.zeros((num_reads, num_genomes))
    for i in range(num_reads):
        for j in range(num_genomes):
            if m[i, j] == -1:
                mc[i, j] = 0
            else:
                mc[i, j] = binom(m[i, j] + n[i, j], m[i, j]) * pow((1 - eps),(m[i, j])) * pow(eps,(n[i, j]))
    return np.asarray(mc)
    




@cython.cdivision(True)
@cython.wraparound(False)
@cython.boundscheck(False)
def get_Zi(double[:,::1] mc, double[::1] p,double eps, long i):
    cdef long num_reads = mc.shape[0]
    cdef long num_genomes = mc.shape[1] 
    cdef long j
    cdef long Z
    cdef double[:] probs
    cdef double s
    s = 0
    probs = np.zeros(num_genomes, dtype = float)
    

    for j in range(num_genomes):
        probs[j] = mc[i, j] * p[j]
        s += probs[j]
    # if s == 0:
    #     return np.random.randint(0, num_genomes)
    # else:
    for j in range(num_genomes):
        probs[j] =probs[j] / s
    
    # Z = np.random.choice(np.arange(0, num_genomes), p = probs)
    Z = random_choice(probs)
    # print(np.asarray(probs))
    return Z


@cython.cdivision(True)
@cython.wraparound(False)
@cython.boundscheck(False)
def get_eta(long[:] z,int num_genomes):
    cdef long[:] eta
    eta = np.zeros(num_genomes, dtype = int)
    cdef int num_reads
    num_reads = z.shape[0]
    cdef int i
    for i in range(num_reads):
        eta[z[i]] += 1
    return np.array(eta)

cdef extern from "stdlib.h":
    double drand48()
    void srand48(long int seedval)

@cython.cdivision(True)
@cython.wraparound(False)
@cython.boundscheck(False)
def random_choice(double[:] probs):
    '''Returns random number from 0 to n-1 according to probs
        Check if probs do not sum up to 1!'''
    
    cdef double s = 0
    cdef int i
    cdef int l = len(probs)
    
    

    
    cdef double x = drand48()
    cdef double cum_probs = 0
    cdef long n = 0
    while x > cum_probs:
        cum_probs += probs[n]
        n += 1
    n -= 1
    return n


In [57]:
def preprocess(ref_fname, genomes_fname, bam_fname):
    base = bam_fname[:-4]
    pysam.index(bam_fname);
    os.system(f"samtools view {bam_fname} chrM -o {base+'_mt.bam'}")
    base = base+'_mt'
    print('#EXTRACTING MTDNA OK')
    consensus = bam2consensus(ref_fname, bam_fname)
    consensus_fa = '>chrM\n'+''.join(consensus) +'\n'
    with open(f'{base}.fa', 'w') as new_genomes:
        new_genomes.write(consensus_fa)
        
    os.system(f'cat {base}.fa {genomes_fname} > {base}_genomes.fa');
    os.system(f'mafft {base}_genomes.fa >  {base}_aligned.fa')
    aligned_genomes = f'{base}_aligned.fa'
    print("#ALL GENOMES ARE READY")
    new_cons = list(SeqIO.parse(f'{base}_aligned.fa', "fasta"))[0]
    SeqIO.write(new_cons, f'{base}.real.fa', "fasta")
    os.system(f'bwa index -a bwtsw {base}.real.fa')
    os.system(f'samtools faidx {base}.real.fa')
    os.system(f'rm {base}.dict')
    os.system(f'picard CreateSequenceDictionary R={base}.fa O={base}.dict')
    os.system(f'samtools fastq {bam_fname} > {base}.fq')
    os.system(f'bwa aln -l 1000 -t 10 {base}.real.fa {base}.fq > {base}_ra.sai')
    os.system(f"bwa samse -r '@RG\\tID:{base}\\tLB:{base}_L1\\tPL:ILLUMINA\\tSM:{base}' {base}.real.fa {base}_ra.sai {base}.fq | samtools view -bh -q 30 | samtools sort -O BAM -o {base}_ra.sort.bam")
    os.system(f'picard MarkDuplicates I={base}_ra.sort.bam O={base}_ra.sort.rmdup.bam METRICS_FILE=metrics.txt REMOVE_DUPLICATES=true ASSUME_SORTED=false VALIDATION_STRINGENCY=LENIENT')
    pysam.index(f'{base}_ra.sort.rmdup.bam');
    os.system(f'samtools calmd -Erb {base}_ra.sort.rmdup.bam {base}.fa > {base}_ra.final.bam 2>/dev/null');
    bam_final = f'{base}_ra.final.bam'
    os.system(f'samtools index {base}_ra.final.bam')
    os.system(f'rm {base}_ra.sai')
    print("#BAM FILE IS READY")
    return bam_final, aligned_genomes

In [58]:
def make_genomes_arr(genomes_fname):
    genomes = list()
    for record in SeqIO.parse(genomes_fname, "fasta"):
            genomes.append(str(record.seq))
    genomes_arr = np.array([list(x) for x in genomes], dtype = 'S1')
    return genomes_arr

In [59]:
def get_same(genomes_arr):
    same_positions = []
    same_bases = []
    for i in range(genomes_arr.shape[1]):
        if len(np.unique(genomes_arr[:,i])) == 1:
            same_positions.append(i)
            same_bases.append(genomes_arr[0,i])
    same = dict(zip( same_positions, same_bases ))
    return same

In [60]:
def get_base_err(bam_fname, same_dict):
    samfile = pysam.AlignmentFile(bam_fname, "rb" )
    same_positions = list(same_dict.keys())
    correct = 0
    total = 0
    samfile = pysam.AlignmentFile(bam_fname, "rb" )
    for pileupcolumn in tqdm(samfile.pileup("chrM")):
        pos = pileupcolumn.pos
        if pos not in same_positions:
            continue

        for pileupread in pileupcolumn.pileups:
            if not pileupread.is_del and not pileupread.is_refskip:
                total += 1
                # query position is None if is_del or is_refskip is set.
                nbase =  pileupread.alignment.query_sequence[pileupread.query_position]
                if nbase == same[pos].decode('ascii').upper():
                    correct += 1
    base_err = 1 - correct/total
    samfile.close()
    return base_err

In [61]:
# samfile.close()
# del genomes

In [62]:
# def Pr_Dep(p, m, n, i, eps):
#     num_reads, num_genomes = M.shape
#     ret = 0
#     for j in range(num_genomes):
#         ret += p[j]* Pr_De(m, n, i, j, eps)
#     return ret

In [63]:
# def get_Z(m, n, p, eps):
#     num_reads, num_genomes = M.shape
#     Z = np.zeros(num_reads)
#     for i in tqdm(range(num_reads), desc = f'gettitng Z'):
#         probs = np.array([Pr_De(m, n, i, j, eps) * p[j] for j in range(num_genomes)])
#         probs = probs/probs.sum()
#         Z[i] = np.random.choice(np.arange(0, num_genomes), p = probs)
#         # print(f'p = {p}\nprobs={probs}\nZ={Z[j]}')
#     return Z

In [64]:
# def get_eta(z, num_genomes):
#     return np.array([len(z[z==j]) for j in range(num_genomes)])

In [65]:
# p = np.array(np.random.dirichlet([1]*num_genomes)
def do_mcmc(n_iterations = 50000, output_file = '', n_threads = 8):
    if output_file != '':
        res = open(output_file,'w')
    num_reads, num_genomes  = MC.shape
    print(MC.shape)
    p = np.random.dirichlet([1]*num_genomes)
    pool = Pool(n_threads)
    for i in tqdm(range(n_iterations) ):
        
        func = lambda x: get_Zi(MC, p, base_err, x)
        
        # Z = np.array(pool.map_async(func, range(num_reads)).get())
        Z = np.array([func(s) for s in range(num_reads) ])
        
        eta = get_eta(Z, num_genomes)
        p0 = np.random.beta(1 + eta[0],1+num_reads-eta[0])
        p_other = np.random.dirichlet(1+ eta[1:])
        p_other *= (1-p0)/p_other.sum()
        p[0] = p0
        p[1:] = p_other
        if output_file != '':
            res.write(f'iteration {i}')
            res.write(str(p[0]))
        if i % 100 == 0:
            print(p[0], p[1:].sum()) 
    pool.close()
    if output_file != '':
        res.close()

In [66]:
# p = np.array([0.5] + [1/(num_genomes-1)]*(num_genomes-1))

In [20]:
bam, genomes = preprocess(ref_fname, genomes_fname, bam_fname)

#EXTRACTING MTDNA OK


consensus dna: 100%|██████████| 16569/16569 [01:03<00:00, 261.18it/s]
nthread = 0
nthreadpair = 0
nthreadtb = 0
ppenalty_ex = 0
stacksize: 8176 kb
generating a scoring matrix for nucleotide (dist=200) ... done
Gap Penalty = -1.53, +0.00, +0.00



Making a distance matrix ..

There are 18 ambiguous characters.
  301 / 312
done.

Constructing a UPGMA tree (efffree=0) ... 
  310 / 312
done.

Progressive alignment 1/2... 
STEP   311 / 311 
done.

Making a distance matrix from msa.. 
  300 / 312
done.

Constructing a UPGMA tree (efffree=1) ... 
  310 / 312
done.

Progressive alignment 2/2... 
STEP   311 / 311 
done.

disttbfast (nuc) Version 7.490
alg=A, model=DNA200 (2), 1.53 (4.59), -0.00 (-0.00), noshift, amax=0.0
0 thread(s)


Strategy:
 FFT-NS-2 (Fast but rough)
 Progressive method (guide trees were built 2 times.)

If unsure which option to use, try 'mafft --auto input > output'.
For more information, see 'mafft --help', 'mafft --man' and the mafft page.

The default gap scoring schem

#ALL GENOMES ARE READY


INFO	2022-07-22 18:53:15	CreateSequenceDictionary	

********** NOTE: Picard's command line syntax is changing.
**********
********** For more information, please see:
********** https://github.com/broadinstitute/picard/wiki/Command-Line-Syntax-Transition-For-Users-(Pre-Transition)
**********
********** The command line looks like this in the new syntax:
**********
**********    CreateSequenceDictionary -R data/bam/in10_mt.fa -O data/bam/in10_mt.dict
**********


18:53:15.882 INFO  NativeLibraryLoader - Loading libgkl_compression.dylib from jar:file:/opt/miniconda3/envs/genomic/share/picard-2.18.29-0/picard.jar!/com/intel/gkl/native/libgkl_compression.dylib
18:53:15.888 WARN  NativeLibraryLoader - Unable to load libgkl_compression.dylib from native/libgkl_compression.dylib (/private/var/folders/6l/0qdfm_q1033f77sclr7_5nrm0000gn/T/nikita/libgkl_compression13826274285572933224.dylib: dlopen(/private/var/folders/6l/0qdfm_q1033f77sclr7_5nrm0000gn/T/nikita/libgkl_compression13826274285572933

#BAM FILE IS READY


In [67]:
genomes_arr = make_genomes_arr(genomes)

In [68]:
same = get_same(genomes_arr)

In [69]:
base_err = get_base_err(bam, same)

16559it [00:02, 5678.38it/s]


In [70]:
M, N = get_MN(genomes_arr, bam)

MN tables: 100%|██████████| 32236/32236 [03:22<00:00, 159.44it/s]


In [71]:
MC = get_mc(M, N, base_err)

In [72]:
do_mcmc(50000, n_threads=1)

(32236, 312)


  0%|          | 2/50000 [00:00<1:02:14, 13.39it/s]

0.04478581957448133 0.9552141804255186


  0%|          | 102/50000 [00:06<58:25, 14.24it/s]

0.8119057716605996 0.18809422833940037


  0%|          | 204/50000 [00:13<53:08, 15.62it/s]

0.805926642132328 0.194073357867672


  1%|          | 304/50000 [00:19<54:57, 15.07it/s]

0.8058115115916894 0.19418848840831057


  1%|          | 404/50000 [00:26<51:20, 16.10it/s]

0.8063268036944478 0.19367319630555224


  1%|          | 504/50000 [00:32<50:30, 16.33it/s]

0.8089566213914774 0.1910433786085226


  1%|          | 604/50000 [00:38<50:43, 16.23it/s]

0.8135745751587284 0.18642542484127153


  1%|▏         | 704/50000 [00:44<52:16, 15.72it/s]

0.8110157583693457 0.1889842416306543


  2%|▏         | 804/50000 [00:51<49:40, 16.50it/s]

0.8115378511746687 0.1884621488253313


  2%|▏         | 904/50000 [00:57<50:12, 16.30it/s]

0.8124239850107307 0.18757601498926935


  2%|▏         | 1004/50000 [01:03<53:02, 15.40it/s]

0.813899167166726 0.18610083283327405


  2%|▏         | 1104/50000 [01:10<52:23, 15.55it/s]

0.8122436531940593 0.18775634680594067


  2%|▏         | 1204/50000 [01:16<52:03, 15.62it/s]

0.8090917923290747 0.19090820767092534


  3%|▎         | 1304/50000 [01:22<51:00, 15.91it/s]

0.8079607784656193 0.1920392215343807


  3%|▎         | 1404/50000 [01:29<52:27, 15.44it/s]

0.8107240521470139 0.18927594785298607


  3%|▎         | 1504/50000 [01:35<49:34, 16.31it/s]

0.8176568254130446 0.18234317458695537


  3%|▎         | 1604/50000 [01:41<49:59, 16.14it/s]

0.8160887828333898 0.1839112171666102


  3%|▎         | 1704/50000 [01:47<49:38, 16.21it/s]

0.8080587667044782 0.19194123329552182


  4%|▎         | 1804/50000 [01:53<48:59, 16.39it/s]

0.8104837718361596 0.18951622816384042


  4%|▍         | 1904/50000 [01:59<48:58, 16.37it/s]

0.8127742204044296 0.1872257795955704


  4%|▍         | 2004/50000 [02:05<50:54, 15.71it/s]

0.8148141852655487 0.18518581473445128


  4%|▍         | 2104/50000 [02:11<48:31, 16.45it/s]

0.8141787057643097 0.1858212942356903


  4%|▍         | 2204/50000 [02:18<48:40, 16.37it/s]

0.8122678061551057 0.18773219384489426


  5%|▍         | 2304/50000 [02:24<48:48, 16.29it/s]

0.8144897499474807 0.18551025005251928


  5%|▍         | 2404/50000 [02:30<48:20, 16.41it/s]

0.8152198145650723 0.18478018543492772


  5%|▌         | 2504/50000 [02:36<48:56, 16.18it/s]

0.8102987833816594 0.18970121661834063


  5%|▌         | 2604/50000 [02:42<48:36, 16.25it/s]

0.8130923328174828 0.18690766718251714


  5%|▌         | 2704/50000 [02:48<49:37, 15.89it/s]

0.810648014568168 0.189351985431832


  6%|▌         | 2804/50000 [02:54<47:36, 16.52it/s]

0.8128236052032913 0.18717639479670867


  6%|▌         | 2904/50000 [03:00<47:35, 16.49it/s]

0.8109403994306982 0.18905960056930185


  6%|▌         | 3004/50000 [03:06<48:19, 16.21it/s]

0.8097405442878006 0.1902594557121994


  6%|▌         | 3104/50000 [03:12<48:06, 16.25it/s]

0.8135378723697977 0.1864621276302023


  6%|▋         | 3202/50000 [03:18<51:24, 15.17it/s]

0.7979198863892839 0.2020801136107161


  7%|▋         | 3304/50000 [03:25<49:58, 15.57it/s]

0.8122412396328237 0.18775876036717634


  7%|▋         | 3402/50000 [03:31<51:22, 15.12it/s]

0.8146493321129529 0.18535066788704713


  7%|▋         | 3502/50000 [03:37<53:01, 14.62it/s]

0.8088153698597497 0.19118463014025033


  7%|▋         | 3604/50000 [03:44<51:01, 15.16it/s]

0.806564580739211 0.193435419260789


  7%|▋         | 3704/50000 [03:50<51:01, 15.12it/s]

0.8138435545794178 0.18615644542058218


  8%|▊         | 3804/50000 [03:57<47:54, 16.07it/s]

0.8141200706124814 0.18587992938751863


  8%|▊         | 3904/50000 [04:03<51:26, 14.93it/s]

0.8164126931535485 0.18358730684645153


  8%|▊         | 4004/50000 [04:10<51:11, 14.98it/s]

0.8152219335003819 0.18477806649961817


  8%|▊         | 4104/50000 [04:16<47:47, 16.01it/s]

0.8112375489926662 0.1887624510073338


  8%|▊         | 4204/50000 [04:22<47:23, 16.11it/s]

0.8118076228313419 0.1881923771686581


  9%|▊         | 4304/50000 [04:29<46:55, 16.23it/s]

0.8035934957739223 0.19640650422607775


  9%|▉         | 4404/50000 [04:35<47:48, 15.89it/s]

0.8165203099223812 0.18347969007761877


  9%|▉         | 4504/50000 [04:41<47:18, 16.03it/s]

0.8162053378130393 0.18379466218696072


  9%|▉         | 4604/50000 [04:47<45:48, 16.52it/s]

0.8182110728983881 0.18178892710161187


  9%|▉         | 4704/50000 [04:53<45:31, 16.59it/s]

0.8125566371268231 0.18744336287317687


 10%|▉         | 4804/50000 [04:59<46:14, 16.29it/s]

0.8008452885063551 0.1991547114936449


 10%|▉         | 4904/50000 [05:05<45:42, 16.44it/s]

0.8064268636717601 0.19357313632823997


 10%|█         | 5004/50000 [05:11<45:05, 16.63it/s]

0.8097188184878207 0.1902811815121793


 10%|█         | 5104/50000 [05:17<46:23, 16.13it/s]

0.8099040643156005 0.19009593568439953


 10%|█         | 5204/50000 [05:23<45:29, 16.41it/s]

0.8100318201493735 0.18996817985062653


 11%|█         | 5304/50000 [05:30<45:40, 16.31it/s]

0.8024469541963614 0.19755304580363864


 11%|█         | 5404/50000 [05:36<46:05, 16.12it/s]

0.8094709498005276 0.19052905019947244


 11%|█         | 5504/50000 [05:42<44:07, 16.81it/s]

0.8142068414549128 0.1857931585450872


 11%|█         | 5604/50000 [05:48<44:48, 16.51it/s]

0.816366809387113 0.18363319061288697


 11%|█▏        | 5704/50000 [05:54<45:43, 16.15it/s]

0.8159325239442694 0.1840674760557306


 12%|█▏        | 5804/50000 [06:00<45:48, 16.08it/s]

0.8090447233463788 0.19095527665362122


 12%|█▏        | 5902/50000 [06:06<47:53, 15.34it/s]

0.8126905673473516 0.18730943265264843


 12%|█▏        | 6004/50000 [06:13<45:08, 16.24it/s]

0.8093063254510879 0.1906936745489121


 12%|█▏        | 6102/50000 [06:19<46:20, 15.79it/s]

0.8079367658749455 0.19206323412505452


 12%|█▏        | 6204/50000 [06:25<46:17, 15.77it/s]

0.8148180038156674 0.18518199618433268


 13%|█▎        | 6304/50000 [06:31<45:31, 15.99it/s]

0.8079853414818061 0.19201465851819385


 13%|█▎        | 6404/50000 [06:38<46:30, 15.62it/s]

0.8124884946670637 0.18751150533293628


 13%|█▎        | 6502/50000 [06:44<50:55, 14.24it/s]

0.8066311549469108 0.19336884505308913


 13%|█▎        | 6604/50000 [06:51<47:57, 15.08it/s]

0.8178094678532337 0.18219053214676628


 13%|█▎        | 6704/50000 [06:57<47:26, 15.21it/s]

0.8090700950225923 0.1909299049774077


 14%|█▎        | 6804/50000 [07:04<43:44, 16.46it/s]

0.8110768759044373 0.1889231240955627


 14%|█▍        | 6904/50000 [07:10<43:51, 16.38it/s]

0.8082328462422365 0.19176715375776357


 14%|█▍        | 6998/50000 [07:15<44:39, 16.05it/s]


KeyboardInterrupt: 

Process ForkPoolWorker-82:
Traceback (most recent call last):
  File "/opt/miniconda3/envs/genomic/lib/python3.10/site-packages/multiprocess/process.py", line 315, in _bootstrap
    self.run()
  File "/opt/miniconda3/envs/genomic/lib/python3.10/site-packages/multiprocess/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/miniconda3/envs/genomic/lib/python3.10/site-packages/multiprocess/pool.py", line 114, in worker
    task = get()
  File "/opt/miniconda3/envs/genomic/lib/python3.10/site-packages/multiprocess/queues.py", line 369, in get
    res = self._reader.recv_bytes()
  File "/opt/miniconda3/envs/genomic/lib/python3.10/site-packages/multiprocess/connection.py", line 224, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/opt/miniconda3/envs/genomic/lib/python3.10/site-packages/multiprocess/connection.py", line 422, in _recv_bytes
    buf = self._recv(4)
  File "/opt/miniconda3/envs/genomic/lib/python3.10/site-packages/multiprocess/co

In [51]:
num_reads, num_genomes = MC.shape

In [53]:
p = np.array([0.5] + [1/(num_genomes-1)]*(num_genomes-1))

In [None]:
%timeit get_Zi(MC, p, num_genomes, 2)