In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import pandas as pd
import os

from mpl_toolkits.mplot3d import Axes3D

plt.rcParams['text.usetex'] = True

In [None]:
# Relative path to results file.
data_path = "results.csv"

In [None]:
sources = {
    1: "Snort",
    2: "Suricata",
    3: "Zeek",
}

runs = [
    ("d2fa",  False, r"\textsc{D\textsuperscript{2}FA}"),
    ("mdfak", True,  r"\textsc{Sparse-D\textsuperscript{2}FA}"),
    
    ("d2fa",  False, r"\textsc{D\textsuperscript{2}FA-LD}"),
    ("mdfak", True,  r"\textsc{Sparse-D\textsuperscript{2}FA-LD}"),
    ("mdfac", False, r"\textsc{D\textsuperscript{2}FA}-LD-Cut"),
    ("mdfac", True,  r"\textsc{Sparse-D\textsuperscript{2}FA}-LD-Cut"),
    
    ("adfa",  True,  r"\textsc{Sparse-D\textsuperscript{2}FA}-MD"),
    ("adfa",  False, r"\textsc{D\textsuperscript{2}FA}-MD"),
]

run_sets = [
    ("unbounded",  0, [0, 1]),
    ("longdelay",  2, [2, 3, 4, 5]),
    ("matchdelay", 2, [6, 7]),
]

styles = { 
    "d2fa":  ("o", "black"), 
    "mdfak": ("o", "black"),
    "mdfac": ("s", "green"),
    "adfa":  ("^", "red"),
}

single_size = (5, 5)

source_key_M = 100000000

def get_results(df, algo, source, use_srg, param):
    mdf = df[df["algo"] == algo]
    mdf = mdf[mdf["sparse"] == int(use_srg)]
    xdf = mdf[mdf["data"] // source_key_M == source]
    ys = xdf[param].tolist()
    xs = [(x % source_key_M) for x in xdf["data"].tolist()]
    return (xs, ys)

def plot_entry(ax, df, algo, source, name, use_srg, param):
    (xs, ys) = get_results(df, algo, source, use_srg, param)
    if param == "comp":
        ys = [y*100.0 for y in ys]
    label = name
    (marker, color) = styles[algo]
    marker = '+' if use_srg else 'o'
    marker_size=80
    if marker in ['o', 'D']:
        ax.scatter(xs, ys, label=label, marker=marker, s=marker_size, facecolors="none", edgecolors=color)
    else:
        ax.scatter(xs, ys, label=label, marker=marker, s=marker_size, color=color)

def plot_time(in_df, fig, ax, source, run_set_idx):
    ax.title.set_text(f"Time")
    ax.set_ylabel("Seconds")
    ax.set_xlabel("Number of states in DFA")

    (run_name, run_bound, run_runs) = run_sets[run_set_idx]
    df = in_df[in_df["bound"] == run_bound]
    
    for run_idx in run_runs:
        (algo, use_srg, name) = runs[run_idx]
        plot_entry(ax, df, algo, source, name, use_srg, "secs")
     
    ax.set_yscale("log")
    ax.set_xscale("log", base=10)
    ax.set_xlim(left=900)
        
    ax.grid()
#   ax.legend()
    
def plot_comp(in_df, fig, ax, source, run_set_idx):
    ax.title.set_text(f"Compression")
    ax.set_ylabel("Relative size of D\\textsuperscript{2}FA (\%)")
    ax.set_xlabel("Number of states in DFA")

    (run_name, run_bound, run_runs) = run_sets[run_set_idx]
    df = in_df[in_df["bound"] == run_bound]
    
    for run_idx in run_runs:
        (algo, use_srg, name) = runs[run_idx]
        plot_entry(ax, df, algo, source, name, use_srg, "comp")
          
    ax.set_xlim(left=900)
    ax.set_xscale("log", base=10)
    ax.set_ylim(bottom=0)
        
    ax.grid()
#   ax.legend()
    
def plot_side_by_side(source=1, run_set_idx=1):
    path = data_path
    df = pd.read_csv(path, delimiter="\t")
    
    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    
    plot_time(df, fig, axs[0], source, run_set_idx)
    plot_comp(df, fig, axs[1], source, run_set_idx)  
    
    lines, labels = axs[-1].get_legend_handles_labels()
    fig.legend(lines, labels, loc="upper center", ncol=6)

In [None]:
plot_side_by_side(1, 1)

In [None]:
def export_side_by_side(base_dir, source, run_set_idx):
    path = data_path
    df = pd.read_csv(path, delimiter="\t")
    
    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    
    plot_time(df, fig, axs[0], source, run_set_idx)
    plot_comp(df, fig, axs[1], source, run_set_idx)  
    
    lines, labels = axs[-1].get_legend_handles_labels()
    fig.legend(lines, labels, loc="upper center", ncol=6, frameon=False)
    
    (run_name, run_bound, run_runs) = run_sets[run_set_idx]
    
    path = "{}/result_{}_{}_bound-{}.pdf".format(
        base_dir, sources[source].lower(), 
        run_name, run_bound)
    
    fig.savefig(path)
    
def export_all(base_dir):
    if not os.path.isdir(base_dir):
        os.mkdir(base_dir)
    for run_set_idx in range(len(run_sets)):
        for source in [1, 2, 3]:
            export_side_by_side(base_dir, source, run_set_idx)

In [None]:
export_all("img")