In [4]:
# preamble
from time import time

import clgen
from clgen import dbutil
from clgen import sampler
from clgen import model
from clgen import preprocess

In [2]:
# evaluation function
def evaluate(model, sampler):
    # sample kernels in 1 batch without checking
    sampler.batch_size = 50 # 5000
    sampler.max_batches = 1
    sampler.static_checker = False
    sampler.dynamic_checker = False

    # clear the caches
    sampler.cache(model).empty()

    # sample kernels and time
    tstart = time()
    sampler.sample(model, quiet=True)
    tend = time()
    elapsed = tend - tstart

    # preprocess sample
    sample_db = sampler.cache(model)["kernels.db"]
    preprocess.preprocess_db(sample_db)

    num_kernels = dbutil.num_rows_in(sample_db, "ContentFiles")
    num_good_kernels = dbutil.num_good_kernels(sample_db)
    num_ugly_kernels = dbutil.num_rows_in(sample_db, "PreprocessedFiles",
                                          "WHERE status=2")
    discard_rate = 1 - (num_good_kernels / num_kernels)
    ugly_rate = 1 - (num_ugly_kernels / num_kernels)


    total_charcount = dbutil.cc(sample_db, "ContentFiles")
    good_charcount = dbutil.cc(sample_db, "PreprocessedFiles",
                               condition="WHERE status=0")

    efficiency = good_charcount / total_charcount
    throughput = good_charcount / elapsed

    return {
        "elapsed": elapsed,
        "num_kernels": num_kernels,
        "num_good_kernels": num_good_kernels,
        "discard_rate": discard_rate,
        "ugly_rate": ugly_rate,
        "total_charcount": total_charcount,
        "good_charcount": good_charcount,
        "efficiency": efficiency,  # good_chars/total_chars
        "throughput": throughput,  # good_chars/second
        "corpus_dir": model.corpus.cache.path,
        "model_dir": model.cache.path,
        "sampler_dir": sampler.cache(model).path,
    }

In [3]:
m = model.from_json({
    "corpus": {
        "path": "~/clgen-data/corpus",
        "github": False
    },
    "train_opts": {
        "model_type": "lstm",
        "rnn_size": 128,
        "num_layers": 2,
        "max_epochs": 5
    }
})
s = sampler.from_json({
    "kernels": {
        "args": [
            "__global float*",
            "__global float*",
            "__global float*",
            "const int"
        ],
        "max_length": 5000,
        "temperature": 1
    },
    "sampler": {}
})

info = evaluate(m, s)
info

beginning batch 1 ...
 - sample 1
 - sample 2
 - sample 3
 - sample 4
 - sample 5
 - sample 6
 - sample 7
 - sample 8
 - sample 9
 - sample 10
 - sample 11
 - sample 12
 - sample 13
 - sample 14
 - sample 15
 - sample 16
 - sample 17
 - sample 18
 - sample 19
 - sample 20
 - sample 21
 - sample 22
 - sample 23
 - sample 24
 - sample 25
 - sample 26
 - sample 27
 - sample 28
 - sample 29
 - sample 30
 - sample 31
 - sample 32
 - sample 33
 - sample 34
 - sample 35
 - sample 36
 - sample 37
 - sample 38
 - sample 39
 - sample 40
 - sample 41
 - sample 42
 - sample 43
 - sample 44
 - sample 45
 - sample 46
 - sample 47
 - sample 48
 - sample 49
 - sample 50


Number of content files:            78

Number of unique content files:     78 (100%)
Total content line count:           1,097
Content file line counts:           min: 2, med: 10, avg: 14, max: 84

Number of good preprocessed files:  0 (0%)
Lines of good preprocessed code:    0 (0%)
Good preprocessed line counts:      min: 0, med: 0

{'corpus_dir': '/Users/cec/.cache/clgen/0.1.7/corpus/db160e3638cf02daa0cde17b260e3337c8f855ed',
 'discard_rate': 0.9871794871794872,
 'efficiency': 0.004441624365482234,
 'elapsed': 31.363599061965942,
 'good_charcount': 119,
 'model_dir': '/Users/cec/.cache/clgen/0.1.7/model/380265ee43862639af2a551c78aa7f2b46dd1166',
 'num_good_kernels': 1,
 'num_kernels': 78,
 'sampler_dir': '/Users/cec/.cache/clgen/0.1.7/sampler/b0616bd26abb92c3127d790b958f586dd40b51b2',
 'throughput': 3.794207411110197,
 'total_charcount': 26792,
 'ugly_rate': 0.8717948717948718}