In [1]:
import json

from pymongo import MongoClient
from IPython.display import Markdown, display, Image

In [2]:
def print_logs(log,  level='INFO'):
    printmd('##### ' + log.name)
    for d in log.find({'level':level}):
        printmd(json.dumps(d['message']))
        
def printmd(string):
    ''' thanks http://stackoverflow.com/questions/32026727/format-output-of-code-cell-with-markdown '''
    display(Markdown(string))

In [3]:
def test_DLC(database):
    docs = list(database.GNC.find())
    return [d['_id'] for d in docs if not d['lf']['strong_DLC']]

In [4]:
client = MongoClient()

In [5]:
datasets = ['hum_xen_fug', 'mammals', 'ants', 'introns']
ds_names = {'mammals' : 'Mammals',
            'ants' : 'Ants',
            'hum_xen_fug' : 'Human/Xenopus/Fugu',
            'introns' : 'Primate Introns'}
for dataset in datasets:
    printmd('#### ' + ds_names[dataset])
    print_logs(getattr(getattr(client, dataset), 'GNC.log'))

#### Human/Xenopus/Fugu

##### GNC.log

{"function": "ml.ml", "start_over": true, "log_level": "DEBUG", "no_mpi_main_loop": false, "kwargs_file": "../config/GNC.json", "input_collection": "hum_xen_fug.data", "output_collection": "hum_xen_fug.GNC", "output_collections_file": null, "output_collections": ["hum_xen_fug.GNC"], "db_host": "r2081", "input_collections_file": null, "kwargs": {"model": "GNC"}, "log_name": "log", "input_collections": ["hum_xen_fug.data"]}

{"mong": "0.0.10-dev", "monglog": "0.0.1-dev", "map_collection": "0.0.8-dev", "masterslave": "0.0.10-dev", "ml": "0.0.11-dev"}

#### Mammals

##### GNC.log

{"function": "ml.ml", "start_over": true, "log_level": "DEBUG", "no_mpi_main_loop": false, "kwargs_file": "../config/GNC.json", "input_collection": "mammals.data", "output_collection": "mammals.GNC", "output_collections_file": null, "output_collections": ["mammals.GNC"], "db_host": "r2081", "input_collections_file": null, "kwargs": {"model": "GNC"}, "log_name": "log", "input_collections": ["mammals.data"]}

{"mong": "0.0.10-dev", "monglog": "0.0.1-dev", "map_collection": "0.0.8-dev", "masterslave": "0.0.10-dev", "ml": "0.0.11-dev"}

#### Ants

##### GNC.log

{"function": "ml.ml", "start_over": true, "log_level": "DEBUG", "no_mpi_main_loop": false, "kwargs_file": "../config/GNC.json", "input_collection": "ants.data", "output_collection": "ants.GNC", "output_collections_file": null, "output_collections": ["ants.GNC"], "db_host": "r2081", "input_collections_file": null, "kwargs": {"model": "GNC"}, "log_name": "log", "input_collections": ["ants.data"]}

{"mong": "0.0.10-dev", "monglog": "0.0.1-dev", "map_collection": "0.0.8-dev", "masterslave": "0.0.10-dev", "ml": "0.0.11-dev"}

#### Primate Introns

##### GNC.log

{"function": "ml.ml", "start_over": true, "log_level": "DEBUG", "no_mpi_main_loop": false, "kwargs_file": "../config/GNC_no_stop.json", "input_collection": "introns.data", "output_collection": "introns.GNC", "output_collections_file": null, "output_collections": ["introns.GNC"], "db_host": "r2081", "input_collections_file": null, "kwargs": {"model": "GNC", "gc": "FFLLSSSSYYZOCCUWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG"}, "log_name": "log", "input_collections": ["introns.data"]}

{"mong": "0.0.10-dev", "monglog": "0.0.1-dev", "map_collection": "0.0.8-dev", "masterslave": "0.0.10-dev", "ml": "0.0.11-dev"}

In [6]:
dlc_failures = {}
for dataset in datasets:
    dlc_failures[dataset] = test_DLC(getattr(client, dataset))
    printmd('##### Done ' + dataset)

##### Done hum_xen_fug

##### Done mammals

##### Done ants

##### Done introns

In [7]:
for dataset in dlc_failures:
    print dataset, len(dlc_failures[dataset])

introns 0
mammals 17
hum_xen_fug 879
ants 1


In [8]:
with open('dlc-failures.json', 'w') as dout:
    json.dump(dlc_failures, dout)