In [1]:
import glob
import pickle
import tqdm
from pathlib import Path

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [2]:
import collect_data as colda

In [3]:
Path("plots").mkdir(parents=True, exist_ok=True)

# read data from cache

In [4]:
results_profcov = {}
for tool in colda.TOOLS:
    try:
        results_profcov[tool] = colda.read_from_cache_profcov(tool)
    except colda.CacheNotFoundError:
        pass

In [5]:
results_acvtool = {}
for tool in colda.TOOLS:
    try:
        results_acvtool[tool] = colda.read_from_cache_acvtool(tool)
    except colda.CacheNotFoundError:
        pass

# Profile coverage over time - all runs seperate

In [6]:
# see https://seaborn.pydata.org/generated/seaborn.lineplot.html#seaborn.lineplot

plt.rcParams.update({'font.size': 12})

def lineplot_relative(results, title):
    ax = sns.lineplot(
            x='seconds since first probe hit',
            y='profile coverage',
            hue='app id',
            units='run id',
            estimator=None,
            data=results,
            legend=False)
    #ax.set_title(title)
    ax.set(xlim=(0,600), ylim=(0,1))
    for line in ax.lines:
        line.set_alpha(0.6)
    ax.set_xlabel("Seconds since first probe hit")
    ax.set_ylabel("Profile Coverage")
    plt.savefig(f"plots/profile-coverage_lineplot_relative_{title}.pdf", bbox_inches='tight')
    plt.savefig(f"plots/profile-coverage_lineplot_relative_{title}.png", bbox_inches='tight')
    #plt.savefig(f"plots/profile-coverage_lineplot_relative_{title}.png", bbox_inches='tight')
    plt.show()

# this takes ~3m to plot

In [7]:
if "time" in results_profcov:
    lineplot_relative(results_profcov['time'], 'baseline')

In [8]:
if "monkey" in results_profcov:
    lineplot_relative(results_profcov['monkey'], 'monkey')

In [9]:
if "droidbot" in results_profcov:
    lineplot_relative(results_profcov['droidbot'], 'droidbot')

In [10]:
if "fastbot" in results_profcov:
    lineplot_relative(results_profcov['fastbot'], 'fastbot')

# Profile coverage over time - maximum precomputed

In [11]:
def precompute_max(results):
    """
    we precompute the maximum by selecting the run with the highest coverage at any point, not whether it's the last value
    (max can mean highest or longest - we use highest)
    """
    res = {}
    for tool in colda.TOOLS:
        if tool not in results:
            print(f"{tool} not in dataset, skipping")
            continue

        # subframe with only relevant columns
        sf = results[tool][['app id','run id','profile coverage','seconds since first probe hit']].dropna()
        
        # find run id for max
        idx = sf.groupby('app id')['profile coverage'].idxmax()

        max_vals = sf.loc[idx][['app id', 'run id']]

        selection = sf[sf.set_index(['app id', 'run id']).index.isin(max_vals.set_index(['app id', 'run id']).index)]

        res[tool] = selection[['app id', 'profile coverage', 'seconds since first probe hit']].copy()
    return res

results_profcov_precomputed_max = precompute_max(results_profcov)

In [12]:
def lineplot_max(results, title):
    ax = sns.lineplot(
            x='seconds since first probe hit',
            y='profile coverage',
            hue='app id',
            estimator=None,
            data=results,
            legend=False)
    #ax.set_title(title)
    ax.set(xlim=(0,600), ylim=(0,1))
    for line in ax.lines:
        line.set_alpha(0.6)
    ax.set_xlabel("Seconds since first probe hit")
    ax.set_ylabel("Profile Coverage")
    plt.savefig(f"plots/profile-coverage_lineplot_max_{title}.pdf", bbox_inches='tight')
    plt.savefig(f"plots/profile-coverage_lineplot_max_{title}.png", bbox_inches='tight')
    plt.show()

In [13]:
if "time" in results_profcov:
    lineplot_max(results_profcov_precomputed_max['time'], 'baseline')

In [14]:
if "monkey" in results_profcov:
    lineplot_max(results_profcov_precomputed_max['monkey'], 'monkey')

In [15]:
if "droidbot" in results_profcov:
    lineplot_max(results_profcov_precomputed_max['droidbot'], 'droidbot')

In [16]:
if "fastbot" in results_profcov:
    lineplot_max(results_profcov_precomputed_max['fastbot'], 'fastbot')

# comparative plots with acvtool

In [23]:
def generate_boxplot_data_profcov():
    tools = []
    appids = []
    mean_max_coverages = []
    for tool in colda.TOOLS:
        try:
            # from max values, create average
            _mmc = results_profcov[tool][['app id', 'run id', 'profile coverage']].groupby(['app id', 'run id'], as_index=False).max().groupby('app id').mean()
            mean_max_coverages.extend(list(_mmc['profile coverage']))
            appids.extend(list(_mmc.index))
            tools.extend([tool for _ in range(len(_mmc))])
        except KeyError:
            # for partial results, skip missing tool results
            pass
    
    return pd.DataFrame({'tool': tools, 'mean max coverage': mean_max_coverages, "app id": appids})

mean_max_profcov = generate_boxplot_data_profcov()

In [24]:
def generate_boxplot_data_acvtool():
    tools = []
    appids = []
    mean_max_coverages = []
    for tool in colda.TOOLS:
        try:
            _mmc = results_acvtool[tool][['app id', 'run id', 'code coverage']].groupby(['app id', 'run id'], as_index=False).max().groupby('app id').mean()
            mean_max_coverages.extend(list(_mmc['code coverage']))
            appids.extend(list(_mmc.index))
            tools.extend([tool for _ in range(len(_mmc))])
        except KeyError:
            # for partial results, skip missing tool results
            pass
        
    return pd.DataFrame({'tool': tools, 'mean max coverage': mean_max_coverages, "app id": appids})

mean_max_acvtool = generate_boxplot_data_acvtool()

In [25]:
def generate_boxplot_data_profcov_acvtool_apps_only():
    acvtool_apps_only = list(mean_max_acvtool['app id'].unique())
    return mean_max_profcov[mean_max_profcov['app id'].isin(acvtool_apps_only)]

mean_max_profcov_acvtool_apps_only = generate_boxplot_data_profcov_acvtool_apps_only()

In [26]:
def draw_stripplot(data, name, ylabel="Profile Coverage"):
    # see https://seaborn.pydata.org/examples/jitter_stripplot.html

    #sns.set_theme(style="whitegrid")
    #iris = sns.load_dataset("iris")

    # "Melt" the dataset to "long-form" or "tidy" representation
    #iris = iris.melt(id_vars="species", var_name="measurement")

    # Initialize the figure
    f, ax = plt.subplots()
    #sns.despine(bottom=True, left=True)

    # Show each observation with a scatterplot
    sns.stripplot(
        data=data, x="tool", y="mean max coverage", 
        #hue="tool",
        color="#4477AA",
        dodge=True, alpha=.65, zorder=1, legend=False,
        jitter=0.25,
    )
    ax.set(ylim=(0,1))

    # Show the conditional means, aligning each pointplot in the
    # center of the strips by adjusting the width allotted to each
    # category (.8 by default) by the number of hue levels
    sns.pointplot(
        data=data, x="tool", y="mean max coverage", 
        color="#AA3377",
        #hue="tool",
        #dodge=.3 - .3 / 3,
        #palette="dark", 
        #errorbar=None,
        errorbar=("pi", 50),
        markers="_", markersize=64, linestyle="none",
    )

    # Improve the legend
    #sns.move_legend(
    #    ax, loc="lower right", ncol=3, frameon=True, columnspacing=1, handletextpad=0,
    #)
    ax.set_xticklabels(["No-Interaction","Monkey", "DroidBot", "Fastbot2"])
    ax.set_ylabel(ylabel)
    ax.set_xlabel("")

    plt.savefig(f"plots/stripplot_{name}.pdf", bbox_inches='tight')
    plt.savefig(f"plots/stripplot_{name}.png", bbox_inches='tight')
    plt.show()

In [27]:
draw_stripplot(mean_max_profcov, "profile_coverage")

In [28]:
draw_stripplot(mean_max_acvtool, "code_coverage_acvtool", ylabel="Code Coverage")

In [29]:
draw_stripplot(mean_max_profcov_acvtool_apps_only, "profile_coverage_acvtool_apps_only")

In [30]:
mean_max_profcov.describe()

In [34]:
mean_max_profcov[['tool','mean max coverage']].groupby('tool').describe()

In [35]:
mean_max_acvtool[['tool','mean max coverage']].groupby('tool').describe()