# Summarise Results

## Dependencies

In [1]:
import re, os
import numpy as np
from kagami.comm import smap, pmap, paste, pick, drop, fold, filePath, filePrefix
from kagami.portals import textPortal, tablePortal
from kagami.dtypes import Table

## Import VCFTools Results

Load result tables

In [2]:
def _loadafs(fname, bg, tg):
    dm = np.array(tablePortal.loadtsv(fname))
    scf, pos, af0, af1, _ = dm.T
    tab = Table(
        np.vstack([af0, af1]).astype(float).T,
        rownames = smap(zip(scf,pos), lambda x: paste(x, sep='_')),
        colnames = [f'{tg}_AFs', f'{bg}_AFs'],
    )
    return tab

In [3]:
bmafs = _loadafs('../data/temporal/BMT_filter_withoutAF_90percent_BM_AFs', 'B', 'M')
mtafs = _loadafs('../data/temporal/BMT_filter_withoutAF_90percent_MT_AFs', 'M', 'T')

Check value consistency and merge tables

In [4]:
sids = np.intersect1d(bmafs.rows_, mtafs.rows_)
bmafs, mtafs = bmafs[sids], mtafs[sids]

In [5]:
assert np.all(bmafs[:,'M_AFs'].X_[:,0] == mtafs[:,'M_AFs'].X_[:,0])
aftab = bmafs[:,['B_AFs', 'M_AFs']].append(mtafs[:,'T_AFs'], axis = 1)

Save share loci for gene filtering

In [None]:
tablePortal.savecsv(
    np.array(aftab.rows_).reshape((-1,1)), 
    '../data/temporal/BMT_filter_withoutAF_90percent_BMT_shared_loci',
)

## Import Allele Counts

In [6]:
def _loadcnts(fname, pop):
    dm = np.array(tablePortal.loadtsv(fname)[1:])
    scf, pos, _, _, cnt0, cnt1 = dm.T
    tab = Table(
        np.vstack([cnt0, cnt1]).T.astype(int),
        rownames = smap(zip(scf,pos), lambda x: paste(x, sep='_')),
        colnames = [f'{pop}_Counts0', f'{pop}_Counts1']
    )
    return tab

In [7]:
bcnts = _loadcnts('../data/temporal/BMT_filter_withoutAF_90percent.filtered.B.frq.count', 'B')
mcnts = _loadcnts('../data/temporal/BMT_filter_withoutAF_90percent.filtered.M.frq.count', 'M')
tcnts = _loadcnts('../data/temporal/BMT_filter_withoutAF_90percent.filtered.T.frq.count', 'T')

In [8]:
cntab = bcnts.append(mcnts, axis = 1).append(tcnts, axis = 1)

## Import TAFT Results

In [9]:
_sre = re.compile('(scaffold\d{5}_\d+)')

def _loadtab(ifname):
    lns = textPortal.loadlns(ifname)
    loc = np.array(drop(smap(lns, _sre.findall, lambda x: None if len(x) == 0 else x[0]), None))
    
    rfname = filePrefix(ifname) + '_out.csv'
    rdm = tablePortal.loadcsv(rfname)
    rhd, rdm = rdm[0][1:], np.array(rdm[1:])[:,1:-1]
    
    assert len(loc) == len(rdm), 'loci and results size not match'
    rtab = Table(
        rdm.astype(float), 
        rownames = loc, colnames = smap(rhd, lambda x: x.strip())        
    )
    return rtab

def _loadtfs(idxfile, cmp):
    ipath = filePath(idxfile)
    
    iflst = np.array(tablePortal.loadcsv(idxfile)).flatten()
    ifnames = smap(iflst, lambda x: os.path.join(ipath, x))

    tftabs = pmap(ifnames, _loadtab)
    tftab  = fold(tftabs, lambda x,y: x.append(y))
    tftab.cols_ = smap(tftab.cols_, lambda x: f'{cmp}_{x}')
    
    return tftab

In [10]:
bmtfs = _loadtfs(
    '../../results/temporal/fst_hmm/BMT_count_AF_BM_MT_Fst_waples_AF_filtered_724321_BM_info_files/index.txt',
    'BM',
)
mttfs = _loadtfs(
    '../../results/temporal/fst_hmm/BMT_count_AF_BM_MT_Fst_waples_AF_filtered_724321_MT_info_files/index.txt',
    'MT',
)

sids = np.intersect1d(bmtfs.rows_, mttfs.rows_)
tftab = bmtfs[sids].append(mttfs[sids], axis = 1)

## Import Fst Results

In [11]:
fstdm = np.array(tablePortal.load(
    '../../results/temporal/fst_hmm/final_BM_MT_Fst_724321_AF_filtered_loci', 
    delimiter = ' ',
))

In [12]:
sids, bmfst, mtfst = fstdm[1:].T
bmfst[bmfst == 'NA'] = 'nan'
mtfst[mtfst == 'NA'] = 'nan'

fstab = Table(
    np.vstack([bmfst, mtfst]).T, dtype = float, 
    rownames = sids, colnames = ['BM_Fst', 'MT_Fst'],
)

nrids = np.sum(np.isnan(fstab.X_), axis = 1) > 0
fstab = fstab[~nrids]

Save Fst for HMM modelling

In [None]:
fsts = fstab.X_.flatten(order = 'F')
tablePortal.savecsv(
    np.hstack(['x', fsts]).reshape((-1,1)), 
    '../data/temporal/BMT_filter_withoutAF_90percent_BMT_shared_loci_filtered_fst',
)

## Import HMM States Results

In [13]:
stats = np.array(tablePortal.load(
    '../../results/temporal/fst_hmm/final_BM_MT_Fst_724321_AF_filtered_loci_fst_results/_3state_HMMstates.txt', 
    delimiter = ' ',
)[1:])
stats = stats[:,1].astype(int)

In [14]:
hmms = -np.ones(fstab.nrow * fstab.ncol)

fsts = fstab.X_.flatten(order = 'F')
orids = fsts <= np.quantile(fsts, 0.001)
hmms[~orids] = stats

hmtab = Table(
    hmms.reshape((-1,2), order = 'F'), dtype = float, 
    rownames = fstab.rows_, colnames = ['BM_HMM_State', 'MT_HMM_State'],    
)

## Merge Tables

In [15]:
tabs = [cntab, aftab, tftab, fstab, hmtab]

In [16]:
sids = fold(smap(tabs, lambda x: x.rows_), lambda x,y: np.intersect1d(x,y))
tabs = smap(tabs, lambda x: x[sids])

In [17]:
mtab = fold(tabs, lambda x,y: x.append(y, axis = 1))

Sort loci IDs

In [18]:
sids = sorted(mtab.rows_, key = lambda x: (int(x.split('_',1)[0][8:]), int(x.split('_',1)[1])))
mtab = mtab[sids]

Insert scaffolds and pos

In [19]:
mtab.ridx_ = {
    'scaffold': smap(mtab.rows_, lambda x: x.split('_',1)[0]), 
    'pos': smap(mtab.rows_, lambda x: int(x.split('_',1)[1])),
}

Insert gene info

In [20]:
topg = tablePortal.loadcsv('../data/temporal/BMT_filter_withoutAF_90percent_BMT_shared_loci_filtered_genes')
topg = np.array(topg).flatten()
grng = tablePortal.loadcsv('../data/references/all_scaffolds_all_genes_range')
grng = pick(grng, lambda x: x[-1] in topg)

In [21]:
mtab.ridx_['gene'] = np.array([''] * mtab.nrow, dtype = 'U16')

def _insert(scf):
    srng = np.array(pick(grng, lambda x: x[0] == scf))
    if len(srng) == 0: return
    gids,rngs = srng[:,-1], srng[:,[1,2]].astype(int)

    gpos = np.zeros(np.max(rngs), dtype = np.uint16) 
    for i,(st,ed) in enumerate(rngs): gpos[st-1:ed] = i+1
    
    spos = mtab.ridx_.pos[mtab.ridx_.scaffold == scf]
    glst = smap(spos, lambda x: gpos[x-1] if x <= gpos.shape[0] else 0, lambda x: '' if x == 0 else gids[x-1])
    mtab.ridx_.gene[mtab.ridx_.scaffold == scf] = glst
    
_ = smap(np.unique(mtab.ridx_.scaffold), _insert)

Save to files

In [None]:
mtab.savehdf(
    '../data/temporal/BMT_filter_withoutAF_90percent.filtered.merged_table.hdf',
)

mtab.savecsv(
    '../data/temporal/BMT_filter_withoutAF_90percent.filtered.merged_table.csv',
    transpose = False,
)