Libraries & Database Setup
==========================

Prepare function definitions and setup the database connection.

In [None]:
library(ggplot2)
library(DBI)
library(RPostgres)
library(reshape)
library(ggthemes)
library(scales)
library(repr)
library(sm)
library(vioplot)

options(repr.plot.family = 'mono', repr.plot.width = 8, repr.plot.height = 6, warn = -1)

get_experiments <- function(connection) {
    rs <- dbSendQuery(connection, strwrap(paste(
        "SELECT experiment_group, experiment_name, MAX(finished) as completed
         FROM run
         WHERE NOT experiment_group = '00000000-0000-0000-0000-000000000000'::uuid
         GROUP BY experiment_group, experiment_name ORDER BY completed;"), width=10000, simplify=TRUE))
    res <- dbFetch(rs)
    head(res)
    dbClearResult(rs)
  return(res)
}

get_raw_runtime <- function(experiment, connection) {
  query <- strwrap(sprintf(paste(
    "SELECT project_name, name, SUM(value) as sumval
     FROM run, metrics WHERE run.id = metrics.run_id
     AND experiment_name = 'raw'
     AND experiment_group = '%s'::uuid
     GROUP BY run_group, project_name, name, value
     ORDER BY sumval DESC;
    "), experiment), width=10000, simplify=TRUE)
  query_res <- dbSendQuery(connection, query)
  res <- melt(dbFetch(query_res))
  dbClearResult(query_res)
  if (nrow(res) > 0) {
    res$project_name <- factor(res$project_name, levels = res$project_name)
  }
  
  return(res)
}

get_papi_dyncov <- function(experiment, connection, order_by) {
  query <- strwrap(sprintf(paste(
    "SELECT run.project_name, metrics.value, project.domain
     FROM run, metrics, project
     WHERE experiment_group = '%s'
     AND run.id = metrics.run_id
     AND run.project_name = project.name
     AND metrics.name = 'pprof.dyncov' ORDER BY %s;
    "), experiment, order_by), width=10000, simplify=TRUE)
  query_res <- dbSendQuery(connection, query)
  res <- melt(dbFetch(query_res))
  
  if (nrow(res) > 0) {
    res$project_name <- factor(res$project_name, levels = res$project_name)
  }
  dbClearResult(query_res)
  return(res)
}

papi_dyncov <- function(c, exp) {
    q <- strwrap(sprintf(paste("SELECT project_name, name, MAX(value)
                              FROM run, metrics 
                              WHERE run.id = metrics.run_id 
                              AND experiment_group = '%s' 
                              AND experiment_name = 'papi' 
                              AND name = 'pprof.dyncov' 
                              GROUP BY project_name, name 
                              ORDER BY project_name;"), exp), width=10000, simplify=TRUE)
    qr <- dbSendQuery(c, q)
    res <- melt(dbFetch(qr))
    dbClearResult(qr)
    return(res)
}

papi_time <- function(c, exp) {
    q <- strwrap(sprintf(paste("SELECT project_name, name, SUM(value) 
                                FROM run, metrics 
                                WHERE run.id = metrics.run_id 
                                AND experiment_group = '%s' 
                                AND experiment_name = 'papi' 
                                AND name = 'pprof.time.total_s' 
                                GROUP BY project_name, name 
                                ORDER BY project_name;"), exp), width=10000, simplify=TRUE)
    qr <- dbSendQuery(c, q)
    res <- melt(dbFetch(qr))
    dbClearResult(qr)
    return(res)
}

papi_time_scops <- function(c, exp) {
    q <- strwrap(sprintf(paste("SELECT project_name, name, SUM(value) 
                                FROM run, metrics 
                                WHERE run.id = metrics.run_id 
                                AND experiment_group = '%s'::uuid
                                AND experiment_name = 'papi' 
                                AND name = 'pprof.time.scops_s' 
                                GROUP BY project_name, name 
                                ORDER BY project_name;"), exp), width=10000, simplify=TRUE)
    qr <- dbSendQuery(c, q)
    res <- melt(dbFetch(qr))
    dbClearResult(qr)
    return(res)
}

likwid.get_metrics <- function(c) {
    q <- strwrap(paste("SELECT DISTINCT(metric) FROM likwid;"), width=10000, simplify=TRUE)
    qr <- dbSendQuery(c, q)
    res <- melt(dbFetch(qr))
    dbClearResult(qr)
    return(res)
}

likwid.total <- function(c, exp, metric) {
    q <- strwrap(sprintf(paste("SELECT project_name, SUM(value) as Total
                                FROM run, likwid 
                                WHERE run.id = likwid.run_id 
                                AND experiment_group = '%s'::uuid
                                AND experiment_name = 'polyjit' 
                                AND metric = '%s'
                                AND region = 'main' 
                                GROUP BY project_name 
                                ORDER BY project_name;"), exp, metric), width=10000, simplify=TRUE)
    qr <- dbSendQuery(c, q)
    res <- melt(dbFetch(qr))
    dbClearResult(qr)
    return(res)
}

likwid.overhead <- function(c, exp, metric) {
    q <- strwrap(sprintf(paste("SELECT project_name, SUM(value) as Overhead
                                FROM run, likwid 
                                WHERE run.id = likwid.run_id 
                                AND experiment_group = '%s'::uuid
                                AND experiment_name = 'polyjit' 
                                AND metric = '%s'
                                AND (    region = 'JitSelectParams'
                                      OR region = 'CodeGenJIT'
                                      OR region = 'GetOrParsePrototype'
                                )
                                GROUP BY project_name 
                                ORDER BY project_name;"), exp, metric), width=10000, simplify=TRUE)
    qr <- dbSendQuery(c, q)
    res <- melt(dbFetch(qr))
    dbClearResult(qr)
    return(res)
}

likwid.runtime <- function(c, exp, metric) {
    q <- strwrap(sprintf(paste("SELECT project_name, SUM(value) as Runtime
                                FROM run, likwid 
                                WHERE run.id = likwid.run_id 
                                AND experiment_group = '%s'::uuid
                                AND experiment_name = 'polyjit' 
                                AND metric = '%s'
                                AND NOT (    region = 'JitSelectParams'
                                          OR region = 'CodeGenJIT'
                                          OR region = 'GetOrParsePrototype'
                                          OR region = 'main'
                                )
                                GROUP BY project_name 
                                ORDER BY project_name;"), exp, metric), width=10000, simplify=TRUE)
    qr <- dbSendQuery(c, q)
    res <- melt(dbFetch(qr))
    dbClearResult(qr)
    return(res)
}

con <- dbConnect(RPostgres::Postgres(),
                 dbname="pprof",
                 user="pprof",
                 host="debussy.fim.uni-passau.de",
                 port=32769,
                 password="pprof")

Experiment Summary
------------------

A little summary of the data we have.

In [None]:
exps <- get_experiments(con)
exps.raw <- subset(exps, experiment_name == "raw")
exps.polyjit <- subset(exps, experiment_name == "polyjit")
exps.papi <- subset(exps, experiment_name == "papi")
exps.papi.std <- subset(exps, experiment_name == "papi-std")

repr_html(exps)

Experiment: RAW
---------------

This plots the runtime breakdown (system/real/total) for all projects for each 'raw' experiment.


In [None]:
for (exp.index in 1:nrow(exps.raw)) {
    exp.name <- exps.raw[exp.index, "experiment_name"]
    exp.date <- exps.raw[exp.index, "completed"]
    exp <- exps.raw[exp.index, "experiment_group"]

    d <- get_raw_runtime(exp, con)
    d.cast <- subset(d, value < 100)
    d.cast <- cast(data = d.cast, project_name ~ name, fun.aggregate = sum) 

    p <- ggplot(data = d.cast, aes(x = project_name))
    p <- p + theme(axis.ticks.x = element_blank(),
                   axis.title.x = element_blank(),
                   axis.text.x  = element_blank(),
                   axis.title.y = element_blank(),
                   legend.position = "none",
                   plot.title = element_text(size = 8))
    p <- p + ggtitle(sprintf(" Runtime breakdown '%s' @ '%s' (%s)", exp.name, exp.date, exp))
    p <- p + geom_point(aes(y = raw.time.real_s), size = 1, colour = "blue")
    p <- p + geom_point(aes(y = raw.time.user_s), size = 1, colour = "red")
    #p <- p + geom_point(aes(y = raw.time.system_s), size = 1, colour = "green")
    repr(p)
}

Dynamic SCoP coverage
---------------------

Various plots using dynamic SCoP coverage. The following plots show dynamic SCoP coverage for each
project (aggregated from runs) sorted by domain.

* Timings have been aggregated with $sum$.
* Dynamic SCoP coverage has been aggregated with $max$.

In [None]:
papi.exps <- rbind(exps.papi, exps.papi.std)
repr_html(papi.exps)
for (exp.index in 1:nrow(papi.exps)) {
    exp.name <- papi.exps[exp.index, "experiment_name"]
    exp <- papi.exps[exp.index, "experiment_group"]

    cov.dom <- get_papi_dyncov(exp, con, "project.domain")
    cov.value <- get_papi_dyncov(exp, con, "metrics.value")
        
    cov.dom <- subset(cov.dom, value > 0)
    p <- ggplot(data = cov.dom, aes(x = project_name))
    p <- p + theme(axis.ticks.x = element_blank(),
                   axis.text.x = element_blank(),
                   legend.position = "right",
                   plot.title = element_text(size = 8))
    p <- p + ggtitle(sprintf(" Dynamic SCoP coverage ordered by domain '%s'\n'%s' (%s)", exp.name, exp.date, exp))
    p <- p + labs(y = "Dynamic SCoP coverage [%]", x = "Project")
    p <- p + geom_point(aes(y = value, color = domain), size = 1)
    repr(p)
}

Dynamic Coverage Boxplots
-------------------------

Aggregate all coverage results in the form of one boxplot per domain.

In [None]:
for (exp.index in 1:nrow(exps.papi)) {
    exp.name <- exps.papi[exp.index, "experiment_name"]
    exp <- exps.papi[exp.index, "experiment_group"]

    cov.dom <- get_papi_dyncov(exp, con, "project.domain")
    cov0 <- cov.dom[cov.dom$value > 1,]

    p <- qplot(data = cov0, x = domain, y = value)
    p <- p + ggtitle(sprintf(" Runtime breakdown '%s' @ '%s' (%s)", exp.name, exp.date, exp))
    p <- p + theme(plot.title = element_text(size = 8))
    p <- p + geom_boxplot(outlier.size = 1, fill = "white")
    repr(p)
}

In [None]:
for (exp.index in 1:nrow(exps.papi)) {
    exp.name <- exps.papi[exp.index, "experiment_name"]
    exp <- exps.papi[exp.index, "experiment_group"]

    papi.cov <- papi_dyncov(con, exp)
    papi.time <- papi_time(con, exp)
    papi.t_scops <- papi_time_scops(con, exp)

    papi.combined <- rbind(papi.t_scops, papi.time)
    papi.combined.cast <- cast(papi.combined, project_name ~ name)
    papi.combined.cast <- papi.combined.cast[with(papi.combined.cast, order(-pprof.time.total_s)), ]
    papi.combined.cast <- subset(papi.combined.cast, pprof.time.scops_s > 0)
    papi.combined.cast <- subset(papi.combined.cast, pprof.time.total_s < 2000)

    c <- coef(lm(pprof.time.scops_s ~ pprof.time.total_s, data = papi.combined.cast))
    p <- ggplot(data = papi.combined.cast, aes(x = pprof.time.total_s, y = pprof.time.scops_s), ordered = TRUE) +
        geom_point(size = 1, ordered = TRUE) +
        geom_abline(intercept = c[1], slope = c[2])
    repr(p)
    
    cor.test(x = papi.combined.cast$pprof.time.total_s,
             y = papi.combined.cast$pprof.time.scops_s,
             method = "pearson")

    cor.test(x = papi.combined.cast$pprof.time.total_s,
             y = papi.combined.cast$pprof.time.scops_s,
             method = "spearman")

    cor.test(x = papi.combined.cast$pprof.time.total_s,
             y = papi.combined.cast$pprof.time.scops_s,
             method = "kendall")

    t.test(x = papi.combined.cast$pprof.time.total_s,
           y = papi.combined.cast$pprof.time.scops_s)

    shapiro.test(papi.combined.cast$pprof.time.total_s)
    shapiro.test(papi.combined.cast$pprof.time.scops_s)

    hist(papi.combined.cast$pprof.time.total_s, 200)
    hist(papi.combined.cast$pprof.time.scops_s, 200)

    var(x = papi.combined.cast$pprof.time.scops_s, y = papi.combined.cast$pprof.time.total_s)
}

In [None]:
metrics <- likwid.get_metrics(con)

In [None]:
metrics <- likwid.get_metrics(con)

for (metric.index in 1:nrow(metrics)) {
    metric <- metrics[metric.index, ]
    for (exp.index in 1:nrow(exps.polyjit)) {
        exp.name <- exps.polyjit[exp.index, "experiment_name"]
        exp <- exps.polyjit[exp.index, "experiment_group"]

        lw.total <- likwid.total(con, exp, metric)
        lw.runtime <- likwid.runtime(con, exp, metric)
        lw.overhead <- likwid.overhead(con, exp, metric)

        lw <- rbind(lw.runtime, lw.overhead)

        p <- ggplot(data = lw, aes(x = project_name, y = value, fill = variable))
        p <- p + ggtitle(sprintf("Overhead vs. Runtime of jitte'd functions '%s' @ '%s'\nMetric '%s' (%s)",
                                 exp.name, exp.date, metric, exp))
        p <- p + labs(y = metric, x = "Project")
        p <- p + geom_bar(position = "dodge", stat= "identity")
        p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1), plot.title = element_text(size = 8))
        repr(p)
    }
}