In [5]:
import sqlalchemy as sql
import cldrive
import sys
from labm8 import crypto

import analyze
import db
from db import *
%run util.py

hostname = "cc1"
db.init(hostname)

'mysql://cc1:3306/project_b'

In [None]:
TIME_LIMIT = 24 * 3600

# Bug Report generator

In [None]:
def comment(*msg, prefix=''):
    return '\n'.join(f'// {prefix}{line}' for line in " ".join(msg).strip().split('\n'))

def get_bug_report(session: session_t, tables: Tableset, result_id: int, report_type: str="bf"):
    with Session(commit=False) as s:
        result = s.query(tables.results).filter(tables.results.id == result_id).first()

        if not result:
            raise KeyError(f"no result with ID {result_id}")

        # generate bug report
        now = datetime.datetime.utcnow().isoformat()
        report_id = crypto.md5_str(tables.name) + "-" + str(result.id)
        bug_type = {
            "bf": "compilation failure",
            "bc": "compiler crash",
            "c": "runtime crash",
            "w": "wrong-code"
        }[report_type]
        
        header = f"""\
// {bug_type} bug report {report_id}.c
// 
// Metadata:
//   OpenCL platform:        {result.testbed.platform}
//   OpenCL device:          {result.testbed.device}
//   Driver version:         {result.testbed.driver}
//   OpenCL version:         {result.testbed.opencl}
//   Host operating system:  {result.testbed.host}
//   OpenCL optimizations:   {result.params.optimizations_on_off}
"""
        if report_type == "bc":
            result_output = comment(result.stderr, prefix='  ')
            header += f"""\
//
// Output:
{result_output}
//   [Return code {result.status}]
//
"""    
        elif report_type == "w":
            stderr = comment(result.stderr, prefix='  ')
            result_output = comment(result.stdout, prefix='  ')
            majority_output = comment(analyze.get_majority_output(session, tables, result), prefix='  ')
            assert majority_output != result_output
            header += f"""\
//
// Expected output:
{majority_output}
// Actual output:
{result_output}
//
// stderr:
{stderr}
//
"""
        elif report_type == "c":
            stdout = comment(result.stderr, prefix='  ')
            stderr = comment(result.stderr, prefix='  ')
            header += f"""\
//
// stdout:
{stdout}
//
// stderr:
{stderr}
//   [Return code {result.status}]
//
"""
            
        if isinstance(result.program, CLgenProgram):
            src = s.query(CLgenHarness).filter(
                CLgenHarness.program_id == result.program.id,
                CLgenHarness.params_id == result.params.id).first().src
        else:
            src = result.program.src
        return (header + src).strip()

# coCLgenResults

In [None]:
TABLE_NAME = "CLgen"
TABLE = coCLgenResult

In [None]:
from collections import defaultdict
from labm8 import fs

outbox = fs.path("outbox", "coCLgenResults", "segfaults")
fs.mkdir(outbox)
with Session(commit=True) as s:
    q = s.query(TABLE)\
        .filter(TABLE.status == 139,
                sql.or_(TABLE.submitted == None, TABLE.submitted == 0),
                sql.or_(TABLE.dupe == None, TABLE.dupe == 0))\
        .order_by(TABLE.id)

    dupes = 0
    errs = set()
    for result in q:
        key = result.testbed_id, result.program_id

        if key in errs:
            dupes += 1
            result.dupe = 1
            continue
        errs.add(key)

        vendor = PLATFORMS_2_VENDORS[result.testbed.platform]
        outpath = fs.path(outbox, f"bug-report-{vendor}-{result.id}.c")            

        report = get_bug_report(**{
            "session": s,
            "tables": { "results": coCLgenResult },
            "result_id": result.id,
            "report_type": "bf",
        })

        with open(outpath, "w") as outfile:
            print(report, file=outfile)
        result.submitted = 1
        print(outpath)
print("done")
print(f"{dupes} duplicates flagged")

# Build-crash bugs

In [6]:
tables = CLGEN_TABLES

In [9]:
outbox = fs.path("outbox", tables.name, "bc")
fs.mkdir(outbox)
with Session(commit=False) as s:
    q = s.query(tables.results)\
        .filter(tables.results.outcome == "bc",
                sql.or_(tables.results.submitted == None, tables.results.submitted == 0),
                sql.or_(tables.results.dupe == None, tables.results.dupe == 0))\
        .order_by(tables.results.id)

    dupes = 0
    errs = set()
    for result in q:
        key = result.testbed_id, result.program_id

        if key in errs:
            dupes += 1
            result.dupe = 1
            continue
        errs.add(key)

        vendor = PLATFORMS_2_VENDORS[result.testbed.platform]
        outpath = fs.path(outbox, f"bug-report-{vendor}-{result.id}.c")            

        report = get_bug_report(**{
            "session": s,
            "tables": tables,
            "result_id": result.id,
            "report_type": "bc",
        })

        with open(outpath, "w") as outfile:
            print(report, file=outfile)
        result.submitted = 1
        print(outpath)
print("done")
print(f"{dupes} duplicates flagged")

NameError: name 'get_bug_report' is not defined

# Runtime crashes

In [2]:
tables = CLSMITH_TABLES

In [3]:
outbox = fs.path("outbox", tables.name, "c")
fs.mkdir(outbox)
with Session(commit=False) as s:
    q = s.query(tables.results)\
        .join(tables.meta)\
        .filter(tables.results.classification == "c",
                tables.meta.cumtime < TIME_LIMIT)\
        .order_by(tables.results.id)

    dupes = 0
    errs = set()
    for result in q:
        key = result.testbed_id, result.program_id

        if key in errs:
            dupes += 1
            result.dupe = 1
            continue
        errs.add(key)

        vendor = vendor_str(result.testbed.platform)
        outpath = fs.path(outbox, f"bug-report-{vendor}-{result.id}.c")
        
        report = get_bug_report(**{
            "session": s,
            "tables": tables,
            "result_id": result.id,
            "report_type": "c",
        })

        with open(outpath, "w") as outfile:
            print(report, file=outfile)
        result.submitted = 1
        print(outpath)
print("done")
print(f"{dupes} duplicates flagged")

NameError: name 'TIME_LIMIT' is not defined

# Wrong-code bug repots

In [None]:
tables = CLGEN_TABLES

In [None]:
outbox = fs.path("outbox", tables.name, "w")
fs.mkdir(outbox)
with Session(commit=False) as s:
    q = s.query(tables.results)\
        .join(tables.meta)\
        .filter(tables.results.classification == "w",
                tables.meta.cumtime < TIME_LIMIT)\
        .order_by(tables.results.id)

    dupes = 0
    errs = set()
    for result in q:
        # FIXME: temp hacks until re-analyzed
        if tables.name == "CLgen":
            if not result.program.gpuverified:
                continue
            if "float" in result.program.src:
                continue
            if "warning" in result.stderr:
                continue
        
        key = result.testbed_id, result.program_id

        if key in errs:
            dupes += 1
            result.dupe = 1
            continue
        errs.add(key)

        vendor = vendor_str(result.testbed.platform)
        outpath = fs.path(outbox, f"bug-report-{vendor}-{result.id}.c")
        
        report = get_bug_report(**{
            "session": s,
            "tables": tables,
            "result_id": result.id,
            "report_type": "w",
        })

        with open(outpath, "w") as outfile:
            print(report, file=outfile)
        result.submitted = 1
        print(outpath)
print("done")
print(f"{dupes} duplicates flagged")