In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as md
import pymongo
import bson
import json
from datetime import datetime
from collections import Counter,defaultdict

In [None]:
with open('users.bson', 'rb') as user_file:
    users = bson.decode_all(user_file.read())
with open('events.bson', 'rb') as events_file:
    events = bson.decode_all(events_file.read())
with open('eclipse-messages.json', 'r') as messages_file:
    eclipse_messages = json.load(messages_file)

In [None]:
sa_events = list(filter(lambda event: (event['et'] == 'sa-wc' or event['et'] == 'sa-wr') and ('warning' in event), events))
print('Number of static analysis events: ' + str(len(sa_events)))

def plot_counts(ylabel, xlabel, count_list, top_n_items = 0):
    labels, values = zip(*sorted(Counter(count_list).items(), key=lambda tup: tup[1], reverse = True))

    indexes = np.arange(len(labels))
    
    if (top_n_items != 0):
        first_n_indexes = indexes[:top_n_items]
        first_n_values = values[:top_n_items]
        first_n_labels = labels[:top_n_items]

        plt.figure(figsize=(20,10))
        plt.bar(first_n_indexes, first_n_values)
        plt.xticks(first_n_indexes, first_n_labels,rotation=90)
        plt.ylabel(ylabel)
        plt.xlabel(xlabel)
        plt.show()

    plt.figure(figsize=(20,10))
    plt.bar(indexes, values)
    plt.xticks(indexes, labels,rotation=90)
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    plt.show()

def print_dictionary_as_table(header1, header2, dictionary):
    print(header1 + ' | ' + header2)
    for row in [('{:<' + str(len(header1)) + '}').format(key) + ' | ' + str(value if type(value) == int else len(value)) for key,value in dictionary.items()]:
        print(row)

In [None]:
sa_events_with_classifications = list(filter(lambda event: event['warning']['type'] != 'unknown', sa_events))
plot_counts('Number of events in category', 'Warning category', list(map(lambda warning: eclipse_messages[str(int(warning) - 1)] if warning.isdigit() else warning , map(lambda event: event['warning']['type'], sa_events_with_classifications))), 25)

In [None]:
def showHeatMap(warnings):
    hist, edges = np.histogram(warnings, np.arange(0, 1.01, 0.01))
    hist=hist[np.newaxis,:]
    plt.imshow(hist, aspect = "auto", cmap="inferno", extent=[0,1,0,100])
    plt.gca().set_yticks([])
    plt.xlabel('Location of warning relative to total file length')
    plt.ylabel('Frequency of occurrence')
    plt.show()

def get_relative_line(event):
    if (event['warning']['doctotal'] == -1):
        return round(event['warning']['line'] / event['doc']['sloc'], 2)
    return round(event['warning']['line'] / event['warning']['doctotal'], 2)

print('Warnings added/removed relative to file')
sa_events_doctotal = filter(lambda event: 'doctotal' in event['warning'] and abs(event['doc']['sloc']) != 1 and event['doc']['sloc'] != 0 and event['warning']['line'] != -1, sa_events)
showHeatMap(list(filter(lambda count: count != 0.5, map(get_relative_line, sa_events_doctotal))))

print('Warning snapshots of all warnings')
sa_snapshots = list(filter(lambda event: event['et'] == 'sa-snap', events))
snapshots_relative_loc = []
for event in sa_snapshots:
    for warning in event['warnings']:
        if abs(event['doc']['sloc'] != -1):
            snapshots_relative_loc.append(round(warning['line'] / event['doc']['sloc'], 2))
showHeatMap(snapshots_relative_loc)

In [None]:
plot_counts('Number of events per user', 'User ID', list(map(lambda event: event['userId'] , sa_events)))

In [None]:
created_warning_events = list(filter(lambda event: event['et'] == 'sa-wr', sa_events))
life_time_events = list(map(lambda event: event['warning']['diff'], created_warning_events))
has_time_diff = list(filter(lambda time: time != -1, life_time_events))
has_no_time_diff = list(filter(lambda time: time == -1, life_time_events))

print('Number of warnings which have a time diff: ' + str(len(has_time_diff)))
print('Number of warnings which do not have a time diff: ' + str(len(has_no_time_diff)))

In [None]:
life_time_per_user = defaultdict(list)
for event in created_warning_events:
    if event['warning']['diff'] != -1:
        life_time_per_user[event['userId']].append(event['warning']['diff'])

values = list(filter(lambda values: len(values) > 25, [life_time_per_user[user] for user in life_time_per_user.keys()]))

print_dictionary_as_table('{:<40}'.format('User ID'), 'Number of events', life_time_per_user)

plt.figure(figsize=(10,10))
ax = plt.axes()
bp = plt.boxplot(values, sym='+', vert=False, showfliers=False,notch=False)
plt.ylabel('Distribution of resolution time per user')
plt.xlabel('Time in seconds to resolve a warning')
ax.set_yticklabels(list(filter(lambda user: len(life_time_per_user[user]) > 25, life_time_per_user.keys())))
plt.show()

In [None]:
unique_users = defaultdict(str)
life_time_per_programming_experience = defaultdict(list)
for event in created_warning_events:
    if event['warning']['diff'] != -1:
        user = list(filter(lambda user: user['id'] == event['userId'], users))
        if (len(user) > 0):
            unique_users[event['userId']] = user[0]['programmingExperience']
            life_time_per_programming_experience[user[0]['programmingExperience']].append(event['warning']['diff'])
        else:
            print('Could not find user with id: ' + str(event['userId']))

counts_per_exp = [life_time_per_programming_experience[exp] for exp in life_time_per_programming_experience.keys()]
values = list(filter(lambda values: len(values) > 25, counts_per_exp))

print_dictionary_as_table('Programming experience', 'Number of events', life_time_per_programming_experience)

print()

programming_exp_user_count = defaultdict(int)
for user, exp in unique_users.items():
    programming_exp_user_count[exp] = programming_exp_user_count[exp] + 1

print_dictionary_as_table('Programming experience', 'Number of users', programming_exp_user_count)

plt.figure(figsize=(10,10))
ax = plt.axes()
bp = plt.boxplot(values, sym='+', vert=False, showfliers=False,notch=False)
plt.ylabel('Distribution of resolution time per user')
plt.xlabel('Time in seconds to resolve a warning')
ax.set_yticklabels(list(filter(lambda exp: len(life_time_per_programming_experience[exp]) > 25, life_time_per_programming_experience.keys())))
plt.show()