# KaiRo's telemetry tinkering

In [1]:
import IPython

from __future__ import division
from moztelemetry.spark import get_pings, get_pings_properties

%pylab inline
IPython.core.pylabtools.figsize(16, 7)

Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.
Populating the interactive namespace from numpy and matplotlib


In [2]:
PING_OPTIONS = {
    "app": "Firefox",
    "channel": "beta",
    "version": "45.0",
    "build_id": "20160215141016", #("20160215141016", "20160223142613"),
    "submission_date": "20160222", #("20160219", "20160225"),
    "fraction": 1,
}
main_pings = get_pings(sc, doc_type="main", **PING_OPTIONS).filter(lambda p: "clientId" in p)
main_ping_data = get_pings_properties(main_pings,
                                      ["clientId",
                                       "environment/settings/e10sEnabled",
                                       "payload/info/subsessionLength",
                                       "payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content",
                                       "payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin"])
crash_pings = get_pings(sc, doc_type="crash", **PING_OPTIONS).filter(lambda p: "clientId" in p)
crash_ping_data = get_pings_properties(crash_pings,
                                       ["clientId",
                                        "environment/settings/e10sEnabled"])

In [6]:
main_ping_installations = main_ping_data.map(lambda p: (p["clientId"], 0)).groupByKey().count()

In [7]:
crash_ping_installations = crash_ping_data.map(lambda p: (p["clientId"], 0)).groupByKey().count()

In [8]:
print("main pings: " + str(main_ping_data.count()))
print("main installations: " + str(main_ping_installations))
print("crash pings: " + str(crash_ping_data.count()))
print("crash ping installations: " + str(crash_ping_installations))

main pings: 3023372
main installations: 1219772
crash pings: 76034
crash ping installations: 52130


In [10]:
class Accumulators():
    def __init__(self, cx):
        self.main_crashes = cx.accumulator(0)
        self.content_crashes = cx.accumulator(0)
        self.plugin_crashes = cx.accumulator(0)
        self.session_seconds = cx.accumulator(0)
        
    def khours(self):
        return self.session_seconds.value / 3600.0 / 1000
    

non_e10s = Accumulators(sc)
e10s = Accumulators(sc)
counts = Accumulators(sc)

def process_main(d):
    if d["environment/settings/e10sEnabled"] == True:
        acc = e10s
    else:
        acc = non_e10s

    content = d["payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content"]
    if content is not None:
        acc.content_crashes.add(content)
        counts.content_crashes.add(content)

    plugin = d["payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin"]
    if plugin is not None:
        acc.plugin_crashes.add(plugin)
        counts.plugin_crashes.add(plugin)
    
    length = d["payload/info/subsessionLength"]
    if length is not None:
        acc.session_seconds.add(length)
        counts.session_seconds.add(length)

main_ping_data.foreach(process_main)

def process_crash(d):
    if d["environment/settings/e10sEnabled"] == True:
        acc = e10s
    else:
        acc = non_e10s

    acc.main_crashes.add(1)
    counts.main_crashes.add(1)

crash_ping_data.foreach(process_crash)

In [11]:
print("crashes: {0} - m: {1}, c: {2}, p: {3} - kh: {4:.2f} - i: {5}".format(
        counts.main_crashes.value + counts.content_crashes.value,
        counts.main_crashes.value,
        counts.content_crashes.value,
        counts.plugin_crashes.value,
        counts.khours(),
        main_ping_installations))
print("non-e10s: {0} - m: {1}, c: {2}, p: {3} - kh: {4:.2f}".format(
        non_e10s.main_crashes.value + non_e10s.content_crashes.value,
        non_e10s.main_crashes.value,
        non_e10s.content_crashes.value,
        non_e10s.plugin_crashes.value,
        non_e10s.khours()))
print("e10s: {0} - m: {1}, c: {2}, p: {3} - kh: {4:.2f}".format(
        e10s.main_crashes.value + e10s.content_crashes.value,
        e10s.main_crashes.value,
        e10s.content_crashes.value,
        e10s.plugin_crashes.value,
        e10s.khours()))
print("irate: {0:.2f} - m: {1:.2f}, c: {2:.2f}, p: {3:.2f}".format(
        100*(counts.main_crashes.value+counts.content_crashes.value)/main_ping_installations,
        100*counts.main_crashes.value/main_ping_installations,
        100*counts.content_crashes.value/main_ping_installations,
        100*counts.plugin_crashes.value/main_ping_installations))
print("hrate: {0:.2f} - m: {1:.2f}, c: {2:.2f}, p: {3:.2f}".format(
        (counts.main_crashes.value + counts.content_crashes.value)/counts.khours(),
        counts.main_crashes.value/counts.khours(),
        counts.content_crashes.value/counts.khours(),
        counts.plugin_crashes.value/counts.khours()))
print("hrate(non-e10s): {0:.2f} - m: {1:.2f}, c: {2:.2f}, p: {3:.2f}".format(
        (non_e10s.main_crashes.value + non_e10s.content_crashes.value)/non_e10s.khours(),
        non_e10s.main_crashes.value/non_e10s.khours(),
        non_e10s.content_crashes.value/non_e10s.khours(),
        non_e10s.plugin_crashes.value/non_e10s.khours()))
print("hrate(e10s): {0:.2f} - m: {1:.2f}, c: {2:.2f}, p: {3:.2f}".format(
        (e10s.main_crashes.value + e10s.content_crashes.value)/e10s.khours(),
        e10s.main_crashes.value/e10s.khours(),
        e10s.content_crashes.value/e10s.khours(),
        e10s.plugin_crashes.value/e10s.khours()))


crashes: 92530 - m: 76034, c: 16496, p: 33710 - kh: 5263.75 - i: 1219772
non-e10s: 80740 - m: 71968, c: 8772, p: 26926 - kh: 4803.79
e10s: 11790 - m: 4066, c: 7724, p: 6784 - kh: 459.95
irate: 7.59 - m: 6.23, c: 1.35, p: 2.76
hrate: 17.58 - m: 14.44, c: 3.13, p: 6.40
hrate(non-e10s): 16.81 - m: 14.98, c: 1.83, p: 5.61
hrate(e10s): 25.63 - m: 8.84, c: 16.79, p: 14.75
