Skip to content

Commit

Permalink
Merge pull request #73 from rjchallis/master
Browse files Browse the repository at this point in the history
support BlobToolKit viewer with experimental view format
  • Loading branch information
DRL committed May 29, 2018
2 parents 5d11dda + 2aa3a7c commit a14630e
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 58 deletions.
184 changes: 127 additions & 57 deletions lib/BtCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from os.path import abspath, isfile, basename, isdir, join
from os import getcwd, mkdir
import json
import re
from collections import defaultdict

class BlobDb():
'''
Expand Down Expand Up @@ -716,63 +718,120 @@ def output(self):
BtIO.writeJson(tax, rank_f, indent=1)

class ExperimentalViewObj():
def __init__(self, name='experimental', view_dir=''):
def __init__(self, name='experimental', view_dir='',blobDb={}):
self.name = name
self.view_dir = view_dir
self.view_dir = re.sub(".blobDB", "", view_dir)
self.length = []
self.gc = []
self.n_count = []
self.names = []
self.tax = {}
self.covs = {}
self.read_covs = defaultdict(list)
self.tax_scores = {}
self.blobDb = blobDb
self.meta = {}
BtIO.create_dir(self.view_dir)

def _format_float(self,l,min_val=-float("inf")):
if min_val:
l = map(lambda x:max(x,min_val),l)
return map(lambda x:float("%.4f" % x),l)

def _remove_cov_suffix(self,id,meta):
rep_list = ['.bam','.bam.cov','.cas','.cas.cov','.cov','.sam','.sam.cov']
rep_list += list(map(lambda x: "%s." % x, self.view_dir.split('.')))
name = id
if id in meta:
name = re.sub("|".join(rep_list), "", basename(meta[id]['f']))
return name if name else id

def get_meta(self):
meta = {
"id": self.view_dir,
"name": self.view_dir,
"datatypes":{
"identifiers": {"name" : "identifiers", "type" : "identifier"},
"len": {"name" : "length","type" : "integer", "min" : min(self.length), "max" : max(self.length)},
"gc": {"name" : "gc","type" : "percentage", "min" : min(self.gc), "max" : max(self.gc)},
"covs": {"name" : "covs","type" : "float", "min" : 0.02, "max" : max(self.covs["covsum"])},
"taxrule":{"name" : "taxrule","type" : "category"},
},
"hierarchy":{
"covs": [cov for cov in self.covs],
"taxrule": [taxrule for taxrule in self.tax],
"besthit": [rank for rank in BtTax.RANKS],
"bestsum": [rank for rank in BtTax.RANKS],
#"superkingdom":["bitscore","cindex","evalue"],
#"phylum":["bitscore","cindex","evalue"]
},
"rows" : len(self.names),
"mainplot":{
"type":"hexbin",
"x":{
"datatype":["gc"],
"bounds":[0,100],
"scale":"linear"
},
"y":{
"datatype": [cov_lib for cov_lib in self.covs],
"bounds":[0.001,1000000],
"scale":"log10"
},
"series":{
"datatype":["taxrule","besthit","superkingdom"],
"scale":"sqrt"
}
"records": len(self.names),
"record_type": "contigs",
"fields":[
{ "id":"length", "name":"Length", "type":"variable", "datatype":"integer", "range":[min(self.length),max(self.length)], "scale":"scaleLog", "preload":True },
{ "id":"gc", "name":"GC", "type":"variable", "datatype":"float", "range":self._format_float([min(self.gc),max(self.gc)]), "scale":"scaleLinear", "preload":True },
],
"plot":{
"x":"gc",
"z":"length"
}
#"besthit": [rank for rank in BtTax.RANKS],
#"bestsum": [rank for rank in BtTax.RANKS],
}
for cov_lib in self.covs:
meta['datatypes'][cov_lib] = {"name" : cov_lib, "type" : "float"}
cov_names = filter(lambda name: name != "covsum",self.covs)
for taxrule in self.tax:
meta['datatypes'][taxrule] = {"name": taxrule, "type":"category"}
for rank in BtTax.RANKS:
meta['datatypes'][rank] = {"name": rank, "type":"category", "levels" : 7}
self.tax_scores[taxrule] = defaultdict(lambda: {'score':[],'c_index':[]})
for id in self.blobDb.order_of_blobs:
blob = self.blobDb.dict_of_blobs[id]
self.read_covs['covsum'].append(0)
for cov_name in cov_names:
self.read_covs[cov_name].append(blob['read_cov'][cov_name])
self.read_covs['covsum'][-1] += blob['read_cov'][cov_name]
for taxrule in self.tax:
for rank in self.tax[taxrule]:
self.tax_scores[taxrule][rank]['score'].append(blob['taxonomy'][taxrule][rank]['score'])
self.tax_scores[taxrule][rank]['c_index'].append(blob['taxonomy'][taxrule][rank]['c_index'])
self.n_count.append(blob['length']-blob['agct_count'])
if max(self.n_count) > 0:
meta['fields'].append({ "id":"ncount", "name":"N count", "type":"variable", "datatype":"integer", "range":[max(0.1,min(self.n_count)),max(self.n_count)], "scale":"scaleLinear"})
if len(self.covs) > 0:
for cov in ['cov','read_cov']:
cov_libs = []
for cov_name in self.covs:
name = self._remove_cov_suffix(cov_name,self.blobDb.covLibs)
id = "%s_%s" % (name,cov)
cov_lib_meta = {"id":id, "name":name }
if cov_name == "cov0" and cov == "cov":
cov_lib_meta["preload"] = True
meta['plot']['y'] = id
cov_libs.append(cov_lib_meta)
cov_meta = {"id":"%s" % cov, "name":"Coverage", "type":"variable", "datatype":"float", "scale":"scaleLog", "range":self._format_float([0.02,max(self.covs["covsum"])])}
if cov == 'read_cov':
cov_meta['name'] = "Read coverage"
cov_meta['datatype'] = "integer"
cov_meta['range'] = [0.2,max(self.read_covs["covsum"])]
cov_meta['children'] = sorted(cov_libs, key=lambda k: k['name'])
meta['fields'].append(cov_meta)
if len(self.tax) > 0:
tax_rules = []
for taxrule in self.tax:
taxrule_meta = {"id":taxrule, "name":taxrule, "children":[] }
for rank in self.tax[taxrule]:
id = "%s_%s" % (taxrule,rank)
tax_rank_data = []
tax_rank_data.append({ "id":"%s_score" % id, "name":"%s score" % id, "type":"variable", "datatype":"float", "scale":"scaleLog", "range":[0.2,max(self.tax_scores[taxrule][rank]['score'])], "preload":False, "active":False })
tax_rank_data.append({ "id":"%s_cindex" % id, "name":"%s c-index" % id, "type":"variable", "datatype":"integer", "scale":"scaleLinear", "range":[0,max(self.tax_scores[taxrule][rank]['c_index'])], "preload":False, "active":False })
tax_rank_meta = { "id":id, "name":id, "data": tax_rank_data }
if rank == "phylum":
tax_rank_meta["preload"] = True
meta['plot']['cat'] = id
taxrule_meta['children'].append(tax_rank_meta)
tax_rules.append(taxrule_meta)
tax_meta = {"id":"taxonomy", "name":"Taxonomy", "type":"category", "datatype":"string"}
tax_meta['children'] = sorted(tax_rules, key=lambda k: k['name'])
meta['fields'].append(tax_meta)
#for taxrule in self.tax:
# meta['datatypes'][taxrule] = {"name": taxrule, "type":"category"}
#for rank in BtTax.RANKS:
# meta['datatypes'][rank] = {"name": rank, "type":"category", "levels" : 7}
return meta

def _keyed_list(self,l):
d = {}
i = 0
o = []
for v in l:
if v not in d:
d[v] = i
i += 1
o.append(d[v])
return {'values':o,'keys':sorted(d, key=d.get)}

def output(self):
# meta
meta = self.get_meta()
Expand All @@ -781,33 +840,44 @@ def output(self):
# gc
gc_f = join(self.view_dir, "gc.json")
print BtLog.status_d['13'] % (gc_f)
BtIO.writeJson(self.gc, gc_f, indent=1)
BtIO.writeJson({"values":self._format_float(self.gc)}, gc_f, indent=1)
# length
length_f = join(self.view_dir, "length.json")
print BtLog.status_d['13'] % (length_f)
BtIO.writeJson(self.length, length_f, indent=1)
# names
names_f = join(self.view_dir, "names.json")
print BtLog.status_d['13'] % (names_f)
BtIO.writeJson(self.names, names_f, indent=1)
BtIO.writeJson({"values":self.length}, length_f, indent=1)
# Ns
if max(self.n_count) > 0:
n_f = join(self.view_dir, "ncount.json")
print BtLog.status_d['13'] % (n_f)
BtIO.writeJson({"values":map(lambda x:max(x,0.2),self.n_count)}, n_f, indent=1)
# identifiers
ids_f = join(self.view_dir, "identifiers.json")
print BtLog.status_d['13'] % (ids_f)
BtIO.writeJson(self.names, ids_f, indent=1)
# cov
cov_d = join(self.view_dir, "covs")
BtIO.create_dir(directory=cov_d)
for cov_lib, cov in self.covs.items():
cov_f = join(cov_d, "%s.json" % cov_lib)
for cov_name, cov in self.covs.items():
name = self._remove_cov_suffix(cov_name,self.blobDb.covLibs)
cov_f = join(self.view_dir, "%s_cov.json" % name)
print BtLog.status_d['13'] % (cov_f)
BtIO.writeJson(cov, cov_f, indent=1)
BtIO.writeJson({"values":self._format_float(cov,0.02)}, cov_f, indent=1)
# read_cov
for cov_name, cov in self.read_covs.items():
name = self._remove_cov_suffix(cov_name,self.blobDb.covLibs)
cov_f = join(self.view_dir, "%s_read_cov.json" % name)
print BtLog.status_d['13'] % (cov_f)
BtIO.writeJson({"values":map(lambda x:max(x,0.2),cov)}, cov_f, indent=1)
# tax
taxrule_d = join(self.view_dir, "taxrule")
BtIO.create_dir(directory=taxrule_d)
for taxrule in self.tax:
tax_d = join(taxrule_d, taxrule)
BtIO.create_dir(directory=tax_d)
for rank in self.tax[taxrule]:
tax = self.tax[taxrule][rank]
rank_f = join(tax_d, "%s.json" % rank)
tax = self._keyed_list(self.tax[taxrule][rank])
rank_f = join(self.view_dir, "%s_%s.json" % (taxrule,rank))
BtIO.writeJson(tax, rank_f, indent=1)

score = self.tax_scores[taxrule][rank]['score']
score_f = join(self.view_dir, "%s_%s_score.json" % (taxrule,rank))
BtIO.writeJson({"values":map(lambda x:max(x,0.2),score)}, score_f, indent=1)
cindex = self.tax_scores[taxrule][rank]['c_index']
cindex_f = join(self.view_dir, "%s_%s_cindex.json" % (taxrule,rank))
BtIO.writeJson({"values":cindex}, cindex_f, indent=1)

if __name__ == '__main__':
pass
2 changes: 1 addition & 1 deletion lib/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def main():
tableView = BtCore.ViewObj(name="table", out_f=out_f, suffix="table.txt", body=[])
viewObjs.append(tableView)
if (experimental):
experimentalView = BtCore.ExperimentalViewObj(name = "experimental", view_dir=out_f)
experimentalView = BtCore.ExperimentalViewObj(name = "experimental", view_dir=out_f, blobDb=blobDb)
viewObjs.append(experimentalView)
if (concoct):
concoctTaxView = None
Expand Down

0 comments on commit a14630e

Please sign in to comment.