# Evaluation of the average frequency and age of basic blocks in our subjects
Generates Figure 8 in https://mboehme.github.io/paper/CCS21.pdf.

To cite this data or evaluation, please use
```bibtex
@inproceedings{aflchurn,
 author = {Zhu, Xiaogang and B{\"o}hme, Marcel}, 
 title = {Regression Greybox Fuzzing},
 booktitle = {Proceedings of the 28th ACM Conference on Computer and Communications Security},
 series = {CCS},
 year = {2021},
 numpages = {12},
}
```

In [None]:
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)

In [None]:
dist = read.csv("BB.regressions.nocrash.csv",header=TRUE)
dist = dist %>% group_by(subject,type) %>% mutate(cumsum = cumsum(frequency)) %>% mutate(tally = sum(frequency))
dist$short_name = sapply(dist$subject, function(x) return(str_trim(unlist(str_split(x,'_'))[1])))
summary(dist)

crashes = read.csv("BB.regressions.crash.csv",header=TRUE)
crashes$short_name = sapply(crashes$subject, function(x) return(str_trim(unlist(str_split(x,'_'))[1])))
summary(crashes)

ndist = read.csv("BB.noregressions.nocrash.csv",header=TRUE)
ndist = ndist %>% group_by(subject,type) %>% mutate(cumsum = cumsum(frequency)) %>% mutate(tally = sum(frequency))
ndist$short_name = sapply(ndist$subject, function(x) {
  if (grepl("ndpi_fuzz_process_packet",x)) return("ndpi-1")
  if (grepl("ndpi_fuzz_ndpi_reader",x)) return("ndpi-2")
  return(str_trim(unlist(str_split(x,'_'))[1]))
})
summary(ndist)

ncrashes = read.csv("BB.noregressions.crash.csv",header=TRUE)
ncrashes$short_name = sapply(ncrashes$subject, function(x) {
  if (grepl("ndpi_fuzz_process_packet",x)) return("ndpi-1")
  if (grepl("ndpi_fuzz_ndpi_reader",x)) return("ndpi-2")
  return(str_trim(unlist(str_split(x,'_'))[1]))
})
summary(ncrashes)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)

generate_changes_plot = function(crashes, dist, columns, pdfname, pdfwidth){
    changesdist=subset(dist, grepl("changes",type))
    p = ggplot(changesdist, aes(count,1-cumsum/tally)) +
      geom_line() +
      geom_vline(data=subset(crashes, depth < 5), aes(xintercept=churn-0.5, linetype=as.factor(depth)))+#, color=as.factor(depth))) +
      expand_limits(x = 0.5, y = 0) +
      scale_y_continuous(labels=scales::percent)+
      scale_x_log10(breaks=c(1,10,100,1000,10000))+
      scale_linetype_manual(values=c(seq(1,4),6), name="",labels=c("Crash Location (CL0)","Stack above CL0 (CL1)", "Stack above CL1 (CL2)", 
                                                              "CL3", "CL4"))+
      ylab("Proportion of BBs changed more than X times") +
      theme(legend.position="top", axis.text.x = element_text(colour="black"), axis.text.y = element_text(colour="black")) +
      facet_wrap(~short_name, scale="free_x", ncol=columns)
    if (grepl("^n",pdfname)) {
      p = p + guides(linetype=FALSE)
      p = p + xlab("")
    } else {
      p = p + xlab("How often a BB was changed (#changes)") 
    }
    p
    ggsave(pdfname,width=pdfwidth,scale=0.7)
}

# Filter out data for projects that are not among our subjects.
crashes2 = subset(crashes,  ! subject %in% c("openssl_client","libgit2_patch_parse_fuzzer") & 
                            ! short_name %in% c("oniguruma","serenity","libsass","jsoncpp"))
dist2 = subset(dist,        ! subject %in% c("openssl_client","libgit2_patch_parse_fuzzer") & 
                            ! short_name %in% c("oniguruma","serenity","libsass","jsoncpp"))
ncrashes2 = subset(ncrashes,! subject %in% c("openssl_client","libgit2_patch_parse_fuzzer") & 
                            ! short_name %in% c("oniguruma","serenity","libsass","jsoncpp"))
ndist2 = subset(ndist,      ! subject %in% c("openssl_client","libgit2_patch_parse_fuzzer") & 
                            ! short_name %in% c("oniguruma","serenity","libsass","jsoncpp"))

generate_changes_plot(crashes2, dist2,5,"Figure.8a.top.pdf",10)
generate_changes_plot(ncrashes2, ndist2,2,"Figure.8b.top.pdf",4.3)

In [None]:
generate_days_plot = function(crashes, dist, columns, pdfname, pdfwidth){
    daysdist=subset(dist, grepl("days",type))
    p = ggplot(daysdist, aes(count,cumsum/tally)) +
      geom_line() +
      geom_vline(data=subset(crashes, depth < 5), aes(xintercept=age+0.5, linetype=as.factor(depth))) +
      scale_y_continuous(labels=scales::percent)+
      ylab("Proportion of BBs changed less than X days, ago") +
      theme(legend.position="top",axis.text.x = element_text(colour="black"), axis.text.y = element_text(colour="black")) +
      facet_wrap(~short_name, scale="free_x", ncol=columns) +
      guides(linetype=FALSE)
    if (grepl("^n",pdfname)) {
      p = p + xlab("")
    } else {
      p = p + xlab("How recently a BB was changed (#days)") 
    }
    p
    ggsave(pdfname,width=pdfwidth,scale=0.7)
}

generate_days_plot(crashes2,dist2,5,"Figure.8a.bottom.pdf",10)
generate_days_plot(ncrashes2, ndist2,2,"Figure.8b.bottom.pdf",4)