In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import wispy.theme as theme

sns.set()
pal = theme.paper()
TWO_COL = 180 / 25.4
HEIGHT = 3.5
ONE_COL = 88 / 25.4

def get_results(log_file):
    """Extract the wall clock time and maximum RSS from a GNU time log file"""
    time = np.nan
    mem = np.nan
    with open(log_file) as log:
        for line in log:
            if "Elapsed (wall clock) time" in line:
                time = line.split(" ")[-1].split(":")
                if len(time) == 3:
                    time = float(time[0])*60**2 + float(time[1])*60 + float(time[2])
                else:
                    time = float(time[0])*60 + float(time[1])
            
            if "Maximum resident set size" in line:
                mem = float(line.split(" ")[-1]) / 1000
            
                
    file_comp = log_file.name.split("_")
    tool = file_comp[0]
    psms = int(file_comp[2])
    rep = int(file_comp[-1].split(".")[0])
                
    df = pd.DataFrame({"psms": [psms],
                       "tool": tool,
                       "rep": rep,
                       "time": time,
                       "mem": mem})
        
    return df
                

In [None]:
res = pd.concat([get_results(l) for l in Path("logs").iterdir()])
res.loc[res["tool"] == "percolator", "tool"] = "Percolator"

best_time = (res.sort_values("time").groupby(["psms", "tool"])).head(1)
best_mem = (res.sort_values("mem").groupby(["psms", "tool"])).head(1)

best_mem.sort_values("psms")

## Plot the Performance

In [None]:
sns.set_context("notebook")
fig, axs = plt.subplots(1, 2, figsize=(TWO_COL, HEIGHT))

sns.lineplot(x="psms", y="time", hue="tool", ax=axs[0], data=best_time, marker="o")
axs[0].set_ylabel("Wall Clock Time (s)")

sns.lineplot(x="psms", y="mem", hue="tool", ax=axs[1], data=best_mem, marker="o")
axs[1].set_ylabel("Max Memory (Mb)")

for ax, label in zip(axs, "ab"):
    ax.set_xlabel("Number of PSMs")
    ax.set_xscale("log")
    ax.set_yscale("log")
    ax.get_legend().set_title(None)

    lab_y = 200
    ax.annotate(
        label, 
        (-10, lab_y), 
        xycoords="axes points", 
        fontweight='bold', 
        va='top', 
        ha='right'
    )
    
    
plt.tight_layout()
plt.savefig("figures/benchmark.png", dpi=300)