In [61]:
import time
import crash_similarity
import utils
import numpy as np
from collections import Counter
import json

In [66]:
def clean_func(func):
    func = func.lower().replace('\n', '')
    return func[:func.index('@0x') + 3] if '@0x' in func else func


def preprocess(stack_trace):
    return [clean_func(f) for f in stack_trace.split(' | ')]#[:10]  # XXX: 10 bottom frames or all of them?


# Exclude stack traces without symbols.
def should_skip(stack_trace):
    return 'xul.dll@' in stack_trace or 'XUL@' in stack_trace or 'libxul.so@' in stack_trace


def read_corpus(fnames):
    elems = []
    already_selected = set()
    for line in utils.read_files(fnames):
        data = json.loads(line)
        proto_signature = data['proto_signature']

        if should_skip(proto_signature):
            continue

        processed = preprocess(proto_signature)

        if frozenset(processed) not in already_selected:
            elems.append((processed, data['signature']))
        already_selected.add(frozenset(processed))

    return [(trace, signature) for i, (trace, signature) in enumerate(elems)]

In [69]:
paths = ['crashsimilarity_data/firefox-crashes-2016-11-09.json.gz', 'crashsimilarity_data/firefox-crashes-2016-11-08.json.gz', 'crashsimilarity_data/firefox-crashes-2016-11-07.json.gz', 'crashsimilarity_data/firefox-crashes-2016-11-06.json.gz', 'crashsimilarity_data/firefox-crashes-2016-11-05.json.gz', 'crashsimilarity_data/firefox-crashes-2016-11-04.json.gz', 'crashsimilarity_data/firefox-crashes-2016-11-03.json.gz']
corpus = read_corpus(paths)

In [91]:
len(corpus), np.mean([len(i[0]) for i in corpus])

(373196, 20.464399404066494)

In [128]:
def get_windows(words, length):
    rv = []
    for i in range(len(words) - length + 1):
        rv.append(tuple(words[i:i+length]))
    return rv

In [133]:
windows = Counter()
for doc in corpus:
    for window in get_windows(doc[0], 10):
        windows[window] += 1
len(windows)

1738695

In [134]:
windows.most_common(10)

[(('@0x', '@0x', '@0x', '@0x', '@0x', '@0x', '@0x', '@0x', '@0x', '@0x'),
  25163),
 (('npswf32_23_0_0_205.dll@0x',
   'npswf32_23_0_0_205.dll@0x',
   'npswf32_23_0_0_205.dll@0x',
   'npswf32_23_0_0_205.dll@0x',
   'npswf32_23_0_0_205.dll@0x',
   'npswf32_23_0_0_205.dll@0x',
   'npswf32_23_0_0_205.dll@0x',
   'npswf32_23_0_0_205.dll@0x',
   'npswf32_23_0_0_205.dll@0x',
   'npswf32_23_0_0_205.dll@0x'),
  5524),
 (('mozilla::ipc::messagepump::run',
   'mozilla::ipc::messagepumpforchildprocess::run',
   'messageloop::runhandler',
   'messageloop::run',
   'nsbaseappshell::run',
   'nsappshell::run',
   'xre_runappshell',
   'mozilla::ipc::messagepumpforchildprocess::run',
   'messageloop::runhandler',
   'messageloop::run'),
  4299),
 (('mozilla::ipc::messagepumpforchildprocess::run',
   'messageloop::runhandler',
   'messageloop::run',
   'nsbaseappshell::run',
   'nsappshell::run',
   'xre_runappshell',
   'mozilla::ipc::messagepumpforchildprocess::run',
   'messageloop::runhandler',
  

In [74]:
signatures = Counter([i[1] for i in corpus])
signatures.most_common(10)

[('OOM | small', 29698),
 ('IPCError-browser | ShutDownKill', 14256),
 ('moz_abort | pages_commit', 3052),
 ('js::ObjectGroup::sweep', 3014),
 ('BlobSet::Flush', 1769),
 ('js::GCMarker::processMarkStackTop', 1724),
 ('nsXPCWrappedJS::nsXPCWrappedJS', 1667),
 ('chtbrkg.dll@0x1beb1', 1652),
 ('jit | CORRUPT_CODE', 1611),
 ('js::GCMarker::eagerlyMarkChildren', 1590)]

In [79]:
head_calls = dict()
for i in [1,2,3,4,5,6,7,8,9,10]:
    head_calls[i] = Counter([' '.join(words[0][:i]) for words in corpus])

In [90]:
head_calls[10].most_common(10)

[('mozalloc_abort mozalloc_handle_oom moz_xmalloc nstarray_base<t>::ensurecapacity nstarray_impl<t>::appendelements<t> assignrangealgorithm<t>::implementation<t> nstarray_impl<t>::appendelements<t> nsmediaqueryresultcachekey::nsmediaqueryresultcachekey mozilla::makeunique<t> nscssruleprocessor::clonemqcachekey',
  1377),
 ('@0x @0x @0x @0x @0x @0x @0x @0x @0x @0x', 830),
 ('ntusergetmessage ntusergetmessage ntusergetmessage waitforcompletionmessage documentpropertieswthunk printchangeproperties _alloca_probe internalcallwinproc usercalldlgproccheckwow defdlgprocworker',
  645),
 ('kifastsystemcallret zwwaitforsingleobject waitforsingleobjectex waitforsingleobject pr_waitcondvar mozilla::condvar::wait nseventqueue::getevent nsthread::processnextevent ns_processnextevent nsthread::shutdown',
  573),
 ('ipseng32.dll@0x ipseng32.dll@0x ipseng32.dll@0x ipseng32.dll@0x ipseng32.dll@0x ipseng32.dll@0x ipseng32.dll@0x ipseng32.dll@0x ipseng32.dll@0x ipseng32.dll@0x',
  572),
 ('js::autoenteroo