The following notebook contains various scripts to obtain the static analysis characteristics of the WatchDog data.
Make sure that you have downloaded the two bson files from the server: `users.bson` and `events.bson` and that they exist in the directory this notebook is in.

The first step is to load in all required packages. It should be rarely needed to rerun this cell.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as md
import plotly
import pymongo
import bson
import json
import operator
import re
import copy
import pylab
import scipy.stats
from datetime import datetime
from collections import Counter,defaultdict,OrderedDict

Read the bson files as well as the eclipse messages dictionary obtained from [the internal Eclipse compiler messages.properties](https://github.com/eclipse/eclipse.jdt.core/blob/efc9b650d8590a5670b5897ab6f8c0fb0db2799d/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/problem/messages.properties)

In [None]:
with open('users.bson', 'rb') as user_file:
    users = bson.decode_all(user_file.read())
with open('events.bson', 'rb') as events_file:
    events = bson.decode_all(events_file.read())
with open('eclipse-messages.json', 'r') as messages_file:
    eclipse_messages = json.load(messages_file)

The following two functions are available for general visualization. The first one is used to generate a histogram with the top 25 items, as well as the full histogram thereafter. The second function can output a dictionary in a human-friendly table format

In [None]:
def plot_counts(ylabel, xlabel, count_list, top_n_items = 0, should_sort = True, print_index=False):
    if (should_sort):
        labels, values = zip(*sorted(Counter(count_list).items(), key=lambda tup: tup[1], reverse = True))
    else:
        labels, values = zip(*count_list)
    truncated_labels = list(map(lambda label: label[:50], labels))

    indexes = np.arange(len(labels))
    
    if (top_n_items != 0):
        first_n_indexes = indexes[:top_n_items]
        first_n_values = values[:top_n_items]
        first_n_labels = labels[:top_n_items]
        
        top_dictionary = defaultdict(int)
        for index in range(0, top_n_items):
            top_dictionary[first_n_labels[index]] = first_n_values[index]
        print_dictionary_as_table('Warning category', 'Frequency', OrderedDict(sorted(top_dictionary.items(), key=operator.itemgetter(1), reverse=True)), print_index=print_index)

        truncated_first_n_labels = list(map(lambda label: label[:75], first_n_labels))
        fig = plt.figure(figsize=(15,15))
        plt.barh(first_n_indexes, first_n_values)
        plt.yticks(first_n_indexes, truncated_first_n_labels)
        plt.xlabel(ylabel)
        plt.ylabel(xlabel)
        plt.tight_layout()
        plt.show()
        fig.savefig(('img/top-' + ylabel + xlabel + '.png').replace(' ', '-'))

    fig = plt.figure(figsize=(13,10))
    plt.barh(indexes, values)
    plt.yticks(indexes, truncated_labels)
    plt.xlabel(ylabel)
    plt.ylabel(xlabel)
    plt.tight_layout()
    plt.show()
    fig.savefig(('img/' + ylabel + xlabel + '.png').replace(' ', '-'))

def print_dictionary_as_table(header1, header2, dictionary, anonymize=False, print_index=False):
    print(('Index & ' if print_index else '') + header1 + ' & ' + header2 + ' \\\\ \hline')
    for row in [(str(index + 1) + ' & ' if print_index else '') + ('{:<' + str(len(header1)) + '}').format(index if anonymize else key.replace('{', '\\{').replace('}', '\\}')) + ' & ' + str(dictionary[key] if ((type(dictionary[key]) == int) or (type(dictionary[key]) == float)) else len(dictionary[key])) + ' \\\\' for index,key in enumerate(dictionary)]:
        print(row)

# -*- coding: utf-8 -*-
"""
Created on Thu Jun 15 17:09:21 2017

@author: apbar
Copied source code from https://github.com/BarataAP/dunn
"""

def makeRanks(*args):
    """
    Converts tuple of arrays of values into ranks.

    Parameters
    ----------
    sample1, sample2, ... : tuple_array_like
        The sample data, possibly with different lengths
        
    Returns
    -------
    sample1, sample2, ... : tuple_array_like
        The ranked sample data 

    """
    Ranks = []
    RanksF = []
    try:
        for data in sorted(args[0]):
            ranks = {}
            rank = 0
            for pt in data:
                rank = rank + 1 
                if pt in ranks.keys():
                    ranks[pt] = ranks[pt] + [rank]
                else:
                    ranks[pt] = [rank]
            Ranks.append(ranks)
        for ranks in Ranks:
            keys = sorted(ranks.keys())
            protoRanks = []
            for key in keys:
                value = np.mean(ranks[key])
                for i in range(0, len(ranks[key])):
                    protoRanks.append(value)
            RanksF.append(protoRanks)
    except:
        for data in sorted(args):
            ranks = {}
            rank = 0
            for pt in data:
                rank = rank + 1 
                if pt in ranks.keys():
                    ranks[pt] = ranks[pt] + [rank]
                else:
                    ranks[pt] = [rank]
            Ranks.append(ranks)
        for ranks in Ranks:
            keys = sorted(ranks.keys())
            protoRanks = []
            for key in keys:
                value = np.mean(ranks[key])
                for i in range(0, len(ranks[key])):
                    protoRanks.append(value)
            RanksF.append(protoRanks)
    return tuple(RanksF)
    
def dunn(*args, **kwargs):
    """
    Performs a two-tailed Dunn's test for stochastic dominance.

    Dunn’s test (1964) tests for stochastic dominance and reports the results
    among multiple pairwise comparisons after a rejected null hypothesis for a 
    Kruskal-Wallis test for stochastic dominance among k groups.

    Parameters
    ----------
    sample1, sample2, ... : array_like
        The sample data, possibly with different lengths
    "none", "fdr", ... : string_like
        Type of correction to use.
        Default is correction="none",
        "bonferroni" -> bonferroni correction,
        "fdr" -> (Benjaminyi-Hochberg false discovery rate method)
    label1, label2, ... : array_string_like
        Group labels to use when displaying or saving results
        Default is labels=(0, 1, ..., n)
        n = len(groups)
    True, False: bool_like
        Prints results on screen when True
        Default is display=True
    False, True, "fileName": bool_string_like
        Saves results onto csv file
        Default is save=False
        True -> labels will be used as filename
        "myFile" -> myFile.csv will be created
    
    Returns
    -------
    dunn: hash_like
        Dunn's multiple pairwaise test statistics, p-values, and q-values (corrections)

    References
    ----------
    .. [1]  https://stats.stackexchange.com/tags/dunn-test/info
    .. [2]  Dunn, O. J. (1961). Multiple comparisons among means.
            Journal of the American Statistical Association, 56(293):52–64.
    .. [3]  Dunn, O. J. (1964). Multiple comparisons using rank sums.
            Technometrics, 6(3):241–252.
    
    Examples
    --------
    >>> a = [0.28551035, 0.338524035, 0.088631321, 0.205930807, 0.363240102]
    >>> b = [0.52173913, 0.763358779, 0.325436786, 0.425305688, 0.378071834]
    >>> c = [0.98911968, 1.192718142, 0.788288288, 0.549176236, 0.544588155]
    >>> d = [1.26705653, 1.625320787, 1.266108976, 1.154187629, 1.268489431]
    >>> e = [1.25697569, 1.265897356, 1.237814561, 0.954612564, 2.365415457]
    >>> f = dunn(a,b,c,d,e)
    
          1       2       3       4       
       0  -0.9882 -2.1054 -3.8241 -3.3944 0
       1  -       -1.1171 -2.8358 -2.4061 1
       2  -       -       -1.7187 -1.2890 2
       3  -       -       -       0.42967 3
          1       2       3       4       
    
    Dunn test H0 z-statistic
    
    
          1       2       3       4       
       0  0.32304 0.03526 0.00013 0.00069 0
       1  -       0.26393 0.00457 0.01612 1
       2  -       -       0.08567 0.19740 2
       3  -       -       -       0.66744 3
          1       2       3       4       
    
    Adjustment method for p-value: none
    
    >>> groups = a,b,c,d,e
    >>> g = dunn(groups,correction="fdr",labels=("a","b","c","d","e"),display=True,save=False)
    
          b       c       d       e       
       a  -0.9882 -2.1054 -3.8241 -3.3944 a
       b  -       -1.1171 -2.8358 -2.4061 b
       c  -       -       -1.7187 -1.2890 c
       d  -       -       -       0.42967 d
          b       c       d       e       
    
    Dunn test H0 z-statistic
    
    
          b       c       d       e       
       a  0.35893 0.07052 0.00131 0.00344 a
       b  -       0.32992 0.01524 0.04030 b
       c  -       -       0.14279 0.28199 c
       d  -       -       -       0.66744 d
          b       c       d       e       
    
    Adjustment method for p-value: fdr
    
    >>> g
    {0: {'ID': 'a-b',
      'p-value': 0.32303584413413144,
      'q-value': 0.35892871570459051,
      'statistic': -0.98823852617441732},
     1: {'ID': 'a-c',
      'p-value': 0.035258440790219898,
      'q-value': 0.070516881580439797,
      'statistic': -2.1053777296759324},
     2: {'ID': 'a-d',
      'p-value': 0.00013127544861251964,
      'q-value': 0.0013127544861251965,
      'statistic': -3.8240534273705715},
     3: {'ID': 'a-e',
      'p-value': 0.0006878304609215692,
      'q-value': 0.0034391523046078459,
      'statistic': -3.3943845029469117},
     4: {'ID': 'b-c',
      'p-value': 0.26393481049044942,
      'q-value': 0.32991851311306175,
      'statistic': -1.1171392035015151},
     5: {'ID': 'b-d',
      'p-value': 0.0045708928878404912,
      'q-value': 0.015236309626134972,
      'statistic': -2.8358149011961538},
     6: {'ID': 'b-e',
      'p-value': 0.016121821274057219,
      'q-value': 0.040304553185143047,
      'statistic': -2.4061459767724944},
     7: {'ID': 'c-d',
      'p-value': 0.085673439552316863,
      'q-value': 0.14278906592052812,
      'statistic': -1.7186756976946389},
     8: {'ID': 'c-e',
      'p-value': 0.19739573184449921,
      'q-value': 0.28199390263499891,
      'statistic': -1.2890067732709791},
     9: {'ID': 'd-e',
      'p-value': 0.66743649170988251,
      'q-value': 0.66743649170988251,
      'statistic': 0.42966892442365973}}
     
    """  
    dunn = {}
    groups = copy.deepcopy(args) #tuple of len k
    if str(type(groups[0][0])) == "<type 'list'>" or str(type(groups[0][0])) == "<type 'tuple'>":
        groups = groups[0]
    if "labels" not in kwargs.keys():
        kwargs["labels"] = []
        for i in range(0, len(groups)):
            protoL = str(i)
            kwargs["labels"].append(protoL)
    else:
        if len(kwargs["labels"]) != len(groups):
            raise ValueError("length of groups and length of labels must be the same")
        else:
            for label in kwargs["labels"]:
                if str(type(label)) != "<type 'str'>":
                    raise ValueError("each label must be a string")
    for i in range(0, len(groups)):
        group = groups[i]
        while group.count(None) > 0 :
            group.remove(None)
        while group.count(np.nan) > 0 :
            group.remove(np.nan)
        if len(group) < 5:
            print(Warning("WARNING: at least one group has fewer than 5 proper elements"))
            print(kwargs["labels"][i], group)
        if len(group) == 0:
            raise ValueError("at least one group has no proper values")
    key = 0
    metaG = []
    for i in range(0, len(groups)):
        metaG = metaG + groups[i]
    metaGR = makeRanks(metaG)[0]
    n = len(metaGR)
    ties = 0.0
    uniqueR = list(set(metaGR))
    for elem in uniqueR:
        if metaGR.count(elem) > 1:
            ties = ties + (metaGR.count(elem)**3 - metaGR.count(elem))
        else:
            pass
    for i in range(0, len(groups)-1): #for every group in groups, excluding last
        grp1 = sorted(list(groups[i]))
        n1 = float(len(grp1))
        ranks1 = []
        for k1 in range(0, len(grp1)):
            point1 = grp1[k1]
            idx1 = metaG.index(point1)
            rank1 = metaGR[idx1]
            ranks1.append(rank1)   
        meanR1 = np.mean(ranks1)
        for j in range(i+1, len(groups)): #for every group following grp1
            grp2 = sorted(list(groups[j]))
            n2 = float(len(grp2))
            ranks2 = []
            for k2 in range(0, len(grp2)):
                point2 = grp2[k2]
                idx2 = metaG.index(point2)
                rank2 = metaGR[idx2]
                ranks2.append(rank2)
            meanR2 = np.mean(ranks2)
            y = meanR1 - meanR2
            g = ((((n*(n+1))/12.0) - (ties/(12.0*(n-1)))) * (1.0/n1 + 1.0/n2))**0.5
            stat = y/g
            if scipy.stats.norm.cdf(stat) > 0.5:
                p = 2*(1 - scipy.stats.norm.cdf(stat))
            else:
                p = 2*(scipy.stats.norm.cdf(stat))
            dunn[key] = {}
            dunn[key]["ID"] = kwargs["labels"][i]+"-"+kwargs["labels"][j]
            dunn[key]["statistic"] = stat
            dunn[key]["p-value"] = p
            key = key + 1
    if "correction" not in kwargs.keys():
        kwargs["correction"] = "none"  
    if kwargs["correction"] != "none":
        m = float(len(dunn))
        if kwargs["correction"] == "bonferroni":
            keys = sorted(dunn.keys())
            for key in keys:
                dunn[key]["q-value"] = dunn[key]["p-value"] * m
                if dunn[key]["q-value"] > 1:
                    dunn[key]["q-value"] = 1.0
        elif kwargs["correction"] == "fdr":
            ps = []
            keys = sorted(dunn.keys())
            for key in keys:
                ps.append(dunn[key]["p-value"])
                
            ps = sorted(ps, reverse=True)
            pTop = ps[0]
            for key in keys:
                i = ps.index(dunn[key]["p-value"]) + 1
                q = dunn[key]["p-value"] * (m/(m+1-i))
                if q > pTop:
                    q = pTop
                else:
                    pass
                dunn[key]["q-value"] = q
        else:
            raise ValueError("correction keyword must be 'bonferroni' or 'fdr'")
    if "display" not in kwargs.keys():
        kwargs["display"] = True
    if kwargs["display"] == True:
        print("")
        lenLabels = []
        for label in kwargs["labels"]:
            lenLabels.append(len(label))
        maxLen = max(lenLabels)
        if maxLen < 3:
            maxLen = 4
        line1 = "  "
        for i in range(0, maxLen):
            line1 = line1 + " "
        for i in range(1, len(groups)):
            variable = kwargs["labels"][i]
            while len(variable) < maxLen:
                variable = variable + " "
            variable = variable + "    "
            line1 = line1 + variable
        print(line1)
        k = 0
        for i in range(0, len(groups)-1):
            line = kwargs["labels"][i]
            while len(line) < maxLen:
                line = " " + line
            line = line + "  "
            if i != 0:
                for some in range(0, i):
                    blank = "-"
                    while len(blank) < maxLen+4:
                        blank = blank + " "
                    line = line + blank
            for j in range(i+1, len(groups)):
                if maxLen < 4 :
                    decimalNeg = "{0:.4f}"
                    decimalPos = "{0:.5f}"
                else:    
                    decimalNeg = "{0:." + str(maxLen) + "f}"
                    decimalPos = "{0:." + str((maxLen+1)) + "f}"
                if dunn[k]["statistic"] < 0:
                    line = line + decimalNeg.format(dunn[k]["statistic"]) + " "
                else:
                    line = line + decimalPos.format(dunn[k]["statistic"]) + " "
                k = k + 1
            line = line + kwargs["labels"][i]
            print(line)
        line1 = "  "
        for i in range(0, maxLen):
            line1 = line1 + " "
        for i in range(1, len(groups)):
            variable = kwargs["labels"][i]
            while len(variable) < maxLen:
                variable = variable + " "
            variable = variable + "    "
            line1 = line1 + variable
        print(line1)
        print("\nDunn test H0 z-statistic\n")
        print("")
        line1 = "  "
        for i in range(0, maxLen):
            line1 = line1 + " "
        for i in range(1, len(groups)):
            variable = kwargs["labels"][i]
            while len(variable) < maxLen:
                variable = variable + " "
            variable = variable + "    "
            line1 = line1 + variable
        print(line1)
        k = 0
        for i in range(0, len(groups)-1):
            line = kwargs["labels"][i]
            while len(line) < maxLen:
                line = " " + line
            line = line + "  "
            if i != 0:
                for some in range(0, i):
                    blank = "-"
                    while len(blank) < maxLen+4:
                        blank = blank + " "
                    line = line + blank
            for j in range(i+1, len(groups)):
                if maxLen < 4 :
                    decimalNeg = "{0:.4f}"
                    decimalPos = "{0:.5f}"
                else:    
                    decimalNeg = "{0:." + str(maxLen) + "f}"
                    decimalPos = "{0:." + str((maxLen+1)) + "f}"
                if kwargs["correction"] == "none":
                    if dunn[k]["p-value"] < 0:
                        line = line + decimalNeg.format(dunn[k]["p-value"]) + " "
                    else:
                        line = line + decimalPos.format(dunn[k]["p-value"]) + " "
                else:
                    if dunn[k]["q-value"] < 0:
                        line = line + decimalNeg.format(dunn[k]["q-value"]) + " "
                    else:
                        line = line + decimalPos.format(dunn[k]["q-value"]) + " "
                k = k + 1
            line = line + kwargs["labels"][i]
            print(line)
        line1 = "  "
        for i in range(0, maxLen):
            line1 = line1 + " "
        for i in range(1, len(groups)):
            variable = kwargs["labels"][i]
            while len(variable) < maxLen:
                variable = variable + " "
            variable = variable + "    "
            line1 = line1 + variable
        print(line1)
        print("\nAdjustment method for p-value:", kwargs["correction"], "\n")
    if "save" in kwargs.keys():
        if kwargs["save"] != False:    
            if kwargs["save"] == True:
                fileName = ""
                for label in kwargs["labels"]:
                    fileName = fileName + str(label) + "_vs_"
                fileName = fileName[:-4] + ".csv"
            elif str(type(kwargs["save"])) == "<type 'str'>":
                fileName = kwargs["save"]
                if fileName[-4:] != ".csv":
                    fileName = fileName + ".csv"
            else:
                raise ValueError("save arg must be either True, or string")
            op = open(fileName, 'w')
            labels = kwargs["labels"]
            line1 = "statistic,"
            for label in labels[1:]:
                line1 = line1 + label + ","
            line1 = line1[:-1] + "\n"
            op.write(line1)
            k = 0
            for i in range(0, len(groups)-1):
                line = labels[i] + ","
                if i != 0:
                    for blank in range(0, i):
                        line = line + ","
                for j in range(i+1, len(groups)):
                    line = line + str(dunn[k]["statistic"]) + ","
                    k = k + 1
                line = line[:-1] + "\n"
                op.write(line)    
            op.write("\n")
            line1 = "p-value,"
            for label in labels[1:]:
                line1 = line1 + label + ","
            line1 = line1[:-1] + "\n"
            op.write(line1)
            k = 0
            for i in range(0, len(groups)-1):
                line = labels[i] + ","
                if i != 0:
                    for blank in range(0, i):
                        line = line + ","
                for j in range(i+1, len(groups)):
                    if kwargs["correction"] == "none":
                        line = line + str(dunn[k]["p-value"]) + ","
                    else:
                        line = line + str(dunn[k]["q-value"]) + ","
                    k = k + 1
                line = line[:-1] + "\n"
                op.write(line)    
            op.close()               
    return dunn

First obtain the static analysis events we are interested in. The types are `sa-wc` and `sa-wr`. A previous version of WatchDog was deployed for IntelliJ, but this version did not include the full data characteristics that we needed. Therefore, we have to filter for `'warning' in event`, as this version of WatchDog does not have this field in the event. Later versions of WatchDog do.

The events can be filtered by `userId`. This is used as in previous analyses 1 user generated a significant portion of the warnings, which would result in a misreprentation of the full developer population.

In [None]:
all_sa_events = list(filter(lambda event: (event['userId'] != '5a08e78c0e305bfcd5865a105ca44fc9f042b1d7') and (event['userId'] != 'd407d447189a3aa9f047339934db73bb48b80054'), events))
sa_events = list(filter(lambda event: (event['et'] == 'sa-wc' or event['et'] == 'sa-wr') and ('warning' in event), all_sa_events))
sa_snapshot_events = list(filter(lambda event: (event['et'] == 'sa-snap'), all_sa_events))
num_snapshot_events = len(sa_snapshot_events)
num_warnings_in_snapshot_events = len([warning for event in sa_snapshot_events for warning in event['warnings']])
num_empty_snapshot_events = len(list(filter(lambda event: len(event['warnings']) > 0, sa_snapshot_events)))
print('Number of static analysis events: ' + str(len(sa_events)))
print('Number of warning creation events: ' + str(len(list(filter(lambda event: event['et'] == 'sa-wc', sa_events)))))
print('Number of warning removal events: ' + str(len(list(filter(lambda event: event['et'] == 'sa-wr', sa_events)))))
print('Number of warning snapshot events: ' + str(num_snapshot_events))
print('Number of warnings in warning snapshot events: ' + str(num_warnings_in_snapshot_events))
print('Number of warning snapshot events with zero warnings: ' + str(num_empty_snapshot_events))
print('Percentage of non-empty snapshot events: ' + str(round((num_snapshot_events - num_empty_snapshot_events) / num_snapshot_events * 100, 2)))
print('Average number of warnings per snapshot: ' + str(round(num_warnings_in_snapshot_events / (num_snapshot_events - num_empty_snapshot_events), 2)))

Run the following query to obtain the total number of development hours generated by the developers:
```
db.intervals.aggregate([
  {
    $match: {
      it: "eo",
      ss: {
        $in: db.events.distinct("ss", {et: {$in: ["sa-wc", "sa-wr", "sa-snap"]}})
      }
    }
  },
  {
    $project: {
      duration: {
        $divide: [
          {
            $subtract: ["$te", "$ts"]
          },
          60 * 60 * 1000
        ]
      }
    }
  },
  {
    $group: {
      _id: null,
      timeTotal: {
        $sum: "$duration"
      }
    }
  }
])

```

The very first analysis we do is plotting a histogram of the warning categories. The y-axis shows the warning that is being generated. Since Eclipse normally uses integers to represent a category, use the previously loaded `eclipse-messages.json` data to map back to the full message pattern. This makes reading the graph significantly easier.

In [None]:
sa_events_with_classifications = list(filter(lambda event: event['warning']['type'] != 'unknown', sa_events))

print('Number of classified warnings: ' + str(len(sa_events_with_classifications)))
len_no_classifications = len(sa_events) - len(sa_events_with_classifications)
print('Fraction of events with unknown classification: ' + str(round(len_no_classifications / len(sa_events) * 100, 2)))

def stringify_warning(warning):
    return eclipse_messages[str(int(warning) - 1)] if warning.isdigit() else warning

def stringify_warnings(warnings):
    return map(stringify_warning, warnings)

sa_events_labeled_classifications = list(stringify_warnings(map(lambda event: event['warning']['type'], sa_events_with_classifications)))

print('Number of CheckStyle warnings: ' + str(len(list(filter(lambda warning: warning.startswith('checkstyle'), sa_events_labeled_classifications)))))

print()

plot_counts('Number of events in category', 'Warning category', sa_events_labeled_classifications, 25, print_index=True)

sa_events_removed = Counter(stringify_warnings(map(lambda event: event['warning']['type'], filter(lambda event: event['et'] == 'sa-wr', sa_events_with_classifications))))
sa_events_created = Counter(stringify_warnings(map(lambda event: event['warning']['type'], filter(lambda event: event['et'] == 'sa-wc', sa_events_with_classifications))))

percentage_removed = []
top_warnings, values = zip(*sorted(Counter(sa_events_labeled_classifications).items(), key=lambda tup: tup[1], reverse = True))
indexes_top_warnings = np.arange(len(top_warnings))
top_warning_names = [top_warnings[index] for index in indexes_top_warnings[:25]]


for key in top_warning_names:
    if (key in sa_events_created):
        percentage = min(1.0, sa_events_removed[key] / sa_events_created[key])
        percentage_removed.insert(0, (key, percentage))

plot_counts('Fraction of warnings resolved', 'Warning category', percentage_removed, should_sort = False)
print('Number of warning categories with majority resolved: ' + str(len(list(filter(lambda p: p >=0.5, map(lambda tup: tup[1], percentage_removed))))))

The second analysis is regarding the location of the warnings in a file. The location is relative, meaning that we take the line as percentage of the full file length. Since some files do not have the file length information, disregard these values. There is also a heatmap for the warning snapshots, which thus includes the same information but then for unresolved warnings.

In [None]:
def showHeatMap(name, typetext, warnings):
    hist, edges = np.histogram(warnings, np.arange(0, 1.01, 0.01))
    hist=hist[np.newaxis,:]
    plt.imshow(hist, aspect = "auto", cmap="viridis", extent=[0,1,0,100])
    plt.gca().set_yticks([])
    plt.xlabel('Position of ' + typetext + ' relative to total file length')
    plt.ylabel('Frequency of occurrence')
    fig = plt.gcf()
    plt.show()
    fig.savefig('img/file-heatmap-' + name)

def get_relative_line(event):
    if (event['warning']['doctotal'] == -1):
        return round(event['warning']['line'] / event['doc']['sloc'], 2)
    return round(event['warning']['line'] / event['warning']['doctotal'], 2)

print('Warnings added/removed relative to file')
sa_events_doctotal = filter(lambda event: 'doctotal' in event['warning'] and abs(event['doc']['sloc']) != 1 and event['doc']['sloc'] != 0 and event['warning']['line'] != -1, sa_events)
showHeatMap('created-removed', 'warning', list(map(get_relative_line, sa_events_doctotal)))

print('Warning snapshots of all warnings')
sa_snapshots = list(filter(lambda event: event['et'] == 'sa-snap', events))
snapshots_relative_loc = []
for event in sa_snapshots:
    for warning in event['warnings']:
        if abs(event['doc']['sloc'] != -1 and event['doc']['sloc'] != 0):
            percentage = round(warning['line'] / event['doc']['sloc'], 2)
            if (percentage < 1):
                snapshots_relative_loc.append(percentage)
showHeatMap('snapshots', 'warning', snapshots_relative_loc)

relative_positions_projects = np.genfromtxt('positions_relative.csv', delimiter=',')
print('Relative positions of class declaration in ' + str(len(relative_positions_projects)) + ' java files')
showHeatMap('open-source-projects', 'class declaration', relative_positions_projects)

To get a quick overview of our developer population, plot the number of events per developer. Use this data to spot potential data skews and act accordingly.

In [None]:
plot_counts('Number of events per user', 'User ID', list(map(lambda event: event['userId'] , sa_events)))

Next we are tracking warning time. We only have this data for warning removals, as these events have a previous creation timestamp to compare to. We split the data into two: for the removals that have a creation time and for those that do not. Print the population percentage of time-calculated warnings, to get a sense of how many warnings are actually resolved without previous information.

In [None]:
created_warning_events = list(filter(lambda event: event['et'] == 'sa-wr', sa_events))
life_time_events = list(map(lambda event: event['warning']['diff'], created_warning_events))
has_time_diff = list(filter(lambda time: time != -1, life_time_events))
has_no_time_diff = list(filter(lambda time: time == -1, life_time_events))

number_of_time_diff = len(has_time_diff)
number_of_no_time_diff = len(has_no_time_diff)
print('Number of warnings which have a time diff: ' + str(number_of_time_diff))
print('Number of warnings which do not have a time diff: ' + str(number_of_no_time_diff))
print('Relative percentage of time diff of no time diff: ' + str(round(number_of_time_diff / (number_of_time_diff + number_of_no_time_diff) * 100, 2)) + ' %')
print('Maximum recorded resolution time: ' + str(max(has_time_diff)))
print('Lowest recorded resolution time: ' + str(min(has_time_diff)))

In [None]:
fig = plt.figure(figsize=(10,5))
ax = plt.axes()
bp = plt.boxplot(life_time_events, sym='+', vert=False, showfliers=False,whis=[5, 95])
plt.ylabel('Distribution of resolution time')
plt.xlabel('Time in seconds to resolve a warning')
plt.xlim([-5, 400])
ax.set_yticklabels('')
plt.show()
fig.savefig('img/life-time-warning-events.png')

top_warning_lifetime = defaultdict(list)

for event in list(filter(lambda event: event['warning']['diff'] != -1, created_warning_events)):
    if (stringify_warning(event['warning']['type']) in top_warning_names):
        top_warning_lifetime[stringify_warning(event['warning']['type'])].append(event['warning']['diff'])
        
category_time_diff_labels, category_time_diff_values = zip(*sorted(top_warning_lifetime.items(), key=lambda tup: -top_warnings.index(tup[0])))
category_time_diff_labels = list(map(lambda label: label[:75], category_time_diff_labels))

fig = plt.figure(figsize=(10,10))
ax = plt.axes()
bp = plt.boxplot(category_time_diff_values, sym='+', vert=False, showfliers=False,whis=[5, 95])
plt.ylabel('Distribution of resolution time')
plt.xlabel('Time in seconds to resolve a warning')
plt.xlim([-25, 1000])
ax.set_yticklabels(category_time_diff_labels)
plt.tight_layout()
plt.show()
fig.savefig('img/life-time-per-category.png')

type_resolution_categories = ('Type resolution', [1,  6, 7, 8, 11, 12, 16, 19, 21, 22, 23, 24, 25])
unused_declaration_categories = ('Unused declarations/tokens', [9, 14, 15])
import_management_categories = ('Import management', [4, 5, 13])

def filter_from_category_indices(indices, category_values):
    return [value for (index, values) in enumerate(category_values) if ((25 - index) in indices) for value in values]

high_level_categories = [import_management_categories, unused_declaration_categories, type_resolution_categories]

high_level_category_values = [filter_from_category_indices(categories, category_time_diff_values) for (name, categories) in high_level_categories]
high_level_category_labels = [name for (name, categories) in high_level_categories]
# print(category_time_diff_labels)

fig = plt.figure(figsize=(10,5))
ax = plt.axes()
bp = plt.boxplot(high_level_category_values, sym='+', vert=False, showfliers=False,whis=[5, 95])
plt.ylabel('Distribution of resolution time')
plt.xlabel('Time in seconds to resolve a warning')
plt.xlim([-25, 500])
ax.set_yticklabels(high_level_category_labels)
plt.tight_layout()
plt.show()
fig.savefig('img/life-time-per-high-level-category.png')

Next we print out the number of events per user, as well as the time distribution per user. Filter out all users that do not have enough data yet (e.g. less than 25 events), to obtain a fair representation of their activity.

In [None]:
life_time_per_user = defaultdict(list)
for event in created_warning_events:
    if event['warning']['diff'] != -1:
        life_time_per_user[event['userId']].append(event['warning']['diff'])

ordered_life_time_per_user = OrderedDict(sorted(life_time_per_user.items(), key=lambda tup: len(tup[1])))
life_time_per_user_values = list(filter(lambda values: len(values) > 25, [ordered_life_time_per_user[user] for user in ordered_life_time_per_user.keys()]))

print_dictionary_as_table('{:<40}'.format('User ID'), 'Number of events', ordered_life_time_per_user)
print_dictionary_as_table('{:<40}'.format('User ID'), 'Number of events', ordered_life_time_per_user, True)

fig = plt.figure(figsize=(10,10))
ax = plt.axes()
bp = plt.boxplot(life_time_per_user_values, sym='+', vert=False, showfliers=False,notch=False)
plt.ylabel('Distribution of resolution time for each user')
plt.xlabel('Time in seconds to resolve a warning')
plt.xlim([-10,800])
ax.set_yticklabels(list(map(lambda user: str(len(ordered_life_time_per_user[user])) + ' events', filter(lambda user: len(ordered_life_time_per_user[user]) > 25, ordered_life_time_per_user.keys()))))
plt.show()
fig.savefig('img/life-time-for-number-of-events.png')

Similarly to time distribution per user, we also plot the time distribution for the programming experience.

In [None]:
unique_users = defaultdict(str)
life_time_per_programming_experience = defaultdict(list)
for event in created_warning_events:
    if event['warning']['diff'] != -1:
        user = list(filter(lambda user: user['id'] == event['userId'], users))
        if (len(user) > 0):
            unique_users[event['userId']] = user[0]['programmingExperience']
            life_time_per_programming_experience[user[0]['programmingExperience']].append(event['warning']['diff'])
        else:
            print('Could not find user with id: ' + str(event['userId']))

life_time_per_programming_experience['N/A'] = life_time_per_programming_experience['N/A'] + life_time_per_programming_experience['NA']
del life_time_per_programming_experience['NA']

def get_digit_from_string(string):
    matcher = re.search("(\d+)", string)
    index = -1
    
    if "<" == string[0]:
        index = 0
    else:
        if ">" == string[0]:
            index = 15
        else:
            if (matcher):
                index = matcher.group(0)

    return -int(index)

def get_digit_from_tuple(tup):
    return get_digit_from_string(tup[0])

sorted_keys = sorted(life_time_per_programming_experience.keys(), key=get_digit_from_string)

counts_per_exp = [life_time_per_programming_experience[exp] for exp in sorted_keys]

print_dictionary_as_table('Programming experience', 'Number of events', OrderedDict(sorted(life_time_per_programming_experience.items(), key=get_digit_from_tuple)))

print()

programming_exp_user_count = defaultdict(int)
for user, exp in unique_users.items():
    programming_exp_user_count[exp] = programming_exp_user_count[exp] + 1

programming_exp_user_count['N/A'] = programming_exp_user_count['N/A'] + programming_exp_user_count['NA']
del programming_exp_user_count['NA']
print_dictionary_as_table('Programming experience', 'Number of users', OrderedDict(sorted(programming_exp_user_count.items(), key=get_digit_from_tuple)))

print()


relative_frequency_per_exp = defaultdict(float)
for exp, num_users in programming_exp_user_count.items():
    relative_frequency_per_exp[exp] = round(len(life_time_per_programming_experience[exp]) / num_users, 2)

print_dictionary_as_table('Programming experience', 'Average number of events per user', OrderedDict(sorted(relative_frequency_per_exp.items(), key=get_digit_from_tuple)))

fig = plt.figure(figsize=(10,10))
ax = plt.axes()
bp = plt.boxplot(counts_per_exp, sym='+', vert=False, showfliers=False,notch=False)
plt.ylabel('Distribution of resolution time per user')
plt.xlabel('Time in seconds to resolve a warning')
ax.set_yticklabels(sorted_keys)
plt.show()
fig.savefig('img/programming-experience-lifetime.png')

In [None]:
sampled = [np.array(sorted(np.random.choice(counts, 150))) for counts in counts_per_exp]
print(len(counts_per_exp))
print(scipy.stats.kruskal(*counts_per_exp[:-1]))
print(scipy.stats.ttest_ind(sampled[0],sampled[1]))
print(scipy.stats.spearmanr(sampled[:-1], axis=1))
print([scipy.stats.normaltest(counts) for counts in sampled])
# print(p, p < 3.27207e-11)
print(dunn(*counts_per_exp[:-1]))
print([np.median(counts) for counts in counts_per_exp[:-1]])

In [None]:
states = ["Yes", "No", "Yes", "No", "Yes", "No", "Yes", "No", "Yes", "No", "Yes", "No", "Yes", "Yes", "No", "Yes", "No", "No", "Yes", "No"]
colors = list(map(lambda state: "green" if state == "Yes" else "red", states))

sankey_data = dict(
    type='sankey',
    node = dict(
      pad = 15,
      thickness = 58,
      line = dict(
        color = "black",
        width = 0.5
      ),
      label = states,
      color = colors
    ),
    link = dict(
      source = [0 , 0 ,   1, 1,   2 , 2 ,   3, 3,   4 , 4 ,   5 ,   6 , 6 ,   7 , 7 ,   8 ,   9 ,   10,   11,   12,   13, 13,   14, 14,   15,   16,   17],
      target = [2 , 3 ,   4, 5,   6 , 7 ,   8, 9,   10, 11,   12,   13, 14,   15, 16,   17,   17,   17,   17,   17,   18, 19,   18, 19,   19,   19,   19],
      value =  [38, 11,   5, 4,   18, 20,   6, 5,   2 , 3 ,   4 ,   3 , 15,   2 , 18,   6 ,   5 ,   2 ,   3 ,   4 ,   2 , 1 ,   2 , 13,   2 ,   18,   20]
  ))

sankey_layout =  dict(
    title = "Basic Sankey Diagram",
    font = dict(
      size = 10
    )
)

fig = dict(data=[sankey_data], layout=sankey_layout)
plotly.plotly.iplot(fig, validate=False)

In [None]:
states = ["Yes - 1", "Yes - 2", "No - 2", "Yes - 3", "No - 3", "Yes - 3", "No - 3", "Yes - 4", "No - 4", "Yes - 4", "No - 4", "Yes - 5", "No - 5"]
colors = list(map(lambda state: "green" if state[0:3] == "Yes" else "red", states))

sankey_data = dict(
    type='sankey',
    node = dict(
      pad = 15,
      thickness = 58,
      line = dict(
        color = "black",
        width = 0.5
      ),
      label = states,
      color = colors
    ),
    link = dict(
source = [0,0,1,1,2,2,3,3,4,4,5,6,7,7,8,8,9,10],
target = [1,2,3,4,5,6,7,8,9,10,10,10,11,12,11,12,12,12],
value =  [38,11,18,20,6,5,3,15,2,18,6,5,2,1,2,13,2,29]
  ))

sankey_layout =  dict(
    title = "Basic Sankey Diagram",
    font = dict(
      size = 10
    ),
    showlegend = True
)

fig = dict(data=[sankey_data], layout=sankey_layout)
plotly.plotly.iplot(fig)

In [None]:
sourcecode = np.array((3, 11, 18, 21, 8))
project = np.array((3, 9, 14, 16, 19))
ide = np.array((1, 7, 11, 11, 31))

fig, (ax,lax) = plt.subplots(nrows=2, gridspec_kw={"height_ratios":[10,1]},figsize=(10,5))
dataframe = pd.DataFrame(np.asmatrix([ide, project, sourcecode]))
fig=dataframe.plot(ax=ax, kind='barh', stacked=True, legend=False)
ax.yaxis.set_ticklabels(['IDE', 'Project', 'Source code'])
ax.set_xlabel('Number of respondents', fontsize=16)

for label in ax.get_yticklabels() + ax.get_xticklabels():
    label.set_fontsize(16)
plt.tight_layout()

h,l = ax.get_legend_handles_labels()
lax.legend(h,('Always', 'Often', 'Sometimes', 'Seldom', 'Never'),
           borderaxespad=0, mode="expand", ncol=5, prop={'size': 16})
lax.axis("off")

ax.figure.savefig('img/ignore-configuration-options.png')