# Summarise Results

## Dependencies

In [1]:
import re, os
import numpy as np
from kagami.comm import smap, pmap, paste, drop, fold, filePath, filePrefix
from kagami.portals import textPortal, tablePortal
from kagami.dtypes import Table

## Import VCFTools Results

Load result tables

In [2]:
def _loadafs(fname, bg, tg):
    dm = np.array(tablePortal.loadtsv(fname))
    scf, pos, af0, af1, _ = dm.T
    tab = Table(
        np.vstack([af0, af1]).astype(float).T,
        rownames = smap(zip(scf,pos), lambda x: paste(x, sep='_')),
        colnames = [f'{tg}_AFs', f'{bg}_AFs'],
        rowindex = {'scaffold': scf, 'pos': pos.astype(int)},
    )
    return tab

In [3]:
bmafs = _loadafs('../data/temporal/BMT_filter_withoutAF_90percent_BM_AFs', 'B', 'M')
mtafs = _loadafs('../data/temporal/BMT_filter_withoutAF_90percent_MT_AFs', 'M', 'T')

Check value consistency and merge tables

In [4]:
sids = np.intersect1d(bmafs.rows_, mtafs.rows_)
bmafs, mtafs = bmafs[sids], mtafs[sids]

In [5]:
assert np.all(bmafs[:,'M_AFs'].X_[:,0] == mtafs[:,'M_AFs'].X_[:,0])
aftab = bmafs[:,['B_AFs', 'M_AFs']].append(mtafs[:,'T_AFs'], axis = 1)

Save share loci for gene filtering

In [6]:
tablePortal.savecsv(
    np.array(aftab.rows_).reshape((-1,1)), 
    '../data/temporal/BMT_filter_withoutAF_90percent_BMT_shared_loci',
)



True

## Import TAFT Results

In [7]:
_sre = re.compile('(scaffold\d{5}_\d+)')

def _loadtab(ifname):
    lns = textPortal.loadlns(ifname)
    loc = np.array(drop(smap(lns, _sre.findall, lambda x: None if len(x) == 0 else x[0]), None))
    
    rfname = filePrefix(ifname) + '_out.csv'
    rdm = tablePortal.loadcsv(rfname)
    rhd, rdm = rdm[0][1:], np.array(rdm[1:])[:,1:-1]
    
    assert len(loc) == len(rdm), 'loci and results size not match'
    rtab = Table(
        rdm.astype(float), 
        rownames = loc, colnames = smap(rhd, lambda x: x.strip())        
    )
    return rtab

In [8]:
idxfile = '../data/temporal/taft_inputs/index.csv'

ipath = filePath(idxfile)
iflst = np.array(tablePortal.loadcsv(idxfile)).flatten()

ifnames = smap(iflst, lambda x: os.path.join(ipath, x))

In [9]:
tftabs = pmap(ifnames, _loadtab)
tftab = fold(tftabs, lambda x,y: x.append(y))

OSError: input file [/home/albert/Projects/Pop_genomics_daphnia/revision/definitive/toolkit/../data/temporal/taft_inputs/info_chunk0_out.csv] not found

## Import Fst Results

In [10]:
fstdm = np.array(tablePortal.load(
    '../../results/temporal/fst_hmm/final_BM_MT_Fst_724321_AF_filtered_loci', 
    delimiter = ' ',
))

In [11]:
sids, bmfst, mtfst = fstdm[1:].T
bmfst[bmfst == 'NA'] = 'nan'
mtfst[mtfst == 'NA'] = 'nan'

fstab = Table(
    np.vstack([bmfst, mtfst]).T, dtype = float, 
    rownames = sids, colnames = ['BM_Fst', 'MT_Fst'],
)

nrids = np.sum(np.isnan(fstab.X_), axis = 1) > 0
fstab = fstab[~nrids]

Save Fst for HMM modelling

In [16]:
fsts = fstab.X_.flatten(order = 'F')
tablePortal.savecsv(
    np.hstack(['x', fsts]).reshape((-1,1)), 
    '../data/temporal/BMT_filter_withoutAF_90percent_BMT_shared_loci_filtered_fst',
)

True

## Import HMM States Results

In [100]:
stats = np.array(tablePortal.load(
    '../../results/temporal/fst_hmm/final_BM_MT_Fst_724321_AF_filtered_loci_fst_results/_3state_HMMstates.txt', 
    delimiter = ' ',
)[1:])
stats = stats[:,1].astype(int)

In [101]:
hmms = -np.ones(fstab.nrow * fstab.ncol)

fsts = fstab.X_.flatten(order = 'F')
orids = fsts <= np.quantile(fsts, 0.001)
hmms[~orids] = stats

hmtab = Table(
    hmms.reshape((-1,2), order = 'F'), dtype = float, 
    rownames = fstab.rows_, colnames = ['BM_HMM', 'MT_HMM'],    
)

## Merge Tables

In [105]:
tabs = [aftab, tftab, fstab, hmtab]

In [106]:
sids = fold(smap(tabs, lambda x: x.rows_), lambda x,y: np.intersect1d(x,y))
tabs = smap(tabs, lambda x: x[sids])

In [107]:
mtab = fold(tabs, lambda x,y: x.append(y, axis = 1))

Sort loci IDs

In [108]:
sids = sorted(mtab.rows_, key = lambda x: (int(x.split('_',1)[0][8:]), int(x.split('_',1)[1])))
mtab = mtab[sids]

In [110]:
mtab.savehdf(
    '../data/temporal/BMT_filter_withoutAF_90percent.filtered.merged_table.hdf',
)

mtab.savecsv(
    '../data/temporal/BMT_filter_withoutAF_90percent.filtered.merged_table.csv',
    transpose = False,
)



True