In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys, copy, os, shutil
from tqdm.notebook import tqdm
from matplotlib.lines import Line2D
from matplotlib.patches import Patch

# translations for our datasets
dataset_descs = {"avazu-app_binary_sparse" : "Avazu (App)",
                 "avazu-site_binary_sparse" : "Avazu (Site)",
                 "criteo_binary_sparse" : "Criteo",
                 "dexter_binary_sparse" : "Dexter",
                 "dorothea_binary_sparse" : "Dorothea",
                 "kdd2010-a_binary_sparse" : "KDD2010 (Algebra)",
                 "mnist8-4+9_binary_sparse" : "MNIST8 (4+9)",
                 "news20_binary_sparse" : "News20",
                 "newsgroups_binary_sparse" : "Newsgroups (Binary, CS)",
                 "pcmac_binary_sparse" : "PCMAC",
                 "rcv1_binary_sparse" : "RCV1",
                 "real-sim_binary_sparse" : "Real-Sim",
                 "sst2_binary_sparse" : "SST-2",
                 "url_binary_sparse" : "URL",
                 "w8a_binary_sparse" : "W8A",
                 "webspam_binary_sparse" : "Webspam"}

# load in our logs
master = pd.read_csv("logs/master.csv")

# create a subdirectory
if "errorbars" not in os.listdir("figures"):
    os.mkdir("figures/errorbars")

# Error Bars for L1 ("Relative Oracle Performance")

In [None]:
# go thru each combination of base model + weight_scheme
for model in ["PAC", "FSOL"]:
    for weight_scheme in ["dense", "exp-dense"]:
        
        # start our figure
        fig, ax = plt.subplots(4, 4, dpi=200, figsize=(14, 12))
        
        success = 0
        
        # go thru each of our datasets
        for i, dataset in enumerate(dataset_descs.keys()):
            
            # beautify our subplot immediately
            ax[i // 4, i % 4].grid()
            ax[i // 4, i % 4].set_title(dataset_descs[dataset], fontsize=15)
            
            # set to store our base values
            base_vals = []
            
            # load in the instantaneous test accuracies for seed=0
            if model == "PAC":

                # immediately load in the best hyperparameters for this dataset + model
                log10Cerr = pd.read_csv("base_variants/PAC_hparams.csv")\
                .query(f"dataset == '{dataset}'")[["log10Cerr"]].values[0,0]
                log10Cerr = int(log10Cerr)

                # load in the file + get the values that we should be plotting
                for seed in range(5):
                    logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log10Cerr={log10Cerr}_seed={seed}_metrics.csv")
                    base_vals.append((logs_inst["inst_test-set-acc"][1:].cummax().values - logs_inst["inst_test-set-acc"][1:].values).mean())
                    
            elif model == "FSOL":

                # immediately load in the best hyperparameters for this dataset + model
                log2eta, log10lmbda = pd.read_csv("base_variants/FSOL_hparams.csv")\
                .query(f"dataset == '{dataset}'")[["log2eta", "log10lmbda"]].values[0]
                log2eta, log10lmbda = log2eta, log10lmbda

                # load in the file + get the values that we should be plotting
                for seed in range(5):
                    logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log2eta={log2eta}_log10lmbda={log10lmbda}_seed={seed}_metrics.csv")
                    base_vals.append((logs_inst["inst_test-set-acc"][1:].cummax().values - logs_inst["inst_test-set-acc"][1:].values).mean())
        
            # put a confidence band in light green + line at the mean (MIN + MAX)
            ax[i // 4, i % 4].axhline(y=np.mean(base_vals), color="green")
            ax[i // 4, i % 4].fill_between([0-1/2, 2, 3, 6+1/2], [np.min(base_vals)] * 4, [np.max(base_vals)] * 4, 
                                           alpha=0.5, color="green")
            
            # let's showing up our variants
            for j, K in enumerate([1, 4, 16, 64]):
                
                # create a counter to help us out
                counter = 0
                
                # plot all four error bars for this value of K
                for AS in ["SA", "WA"]:
                    for VZ in ["", "_VZ"]:
                        
                        # line + color settings
                        color = "blue" if AS == "SA" else "red"
                        
                        # what are the values at play for this variant?
                        vals = master.query((f"dataset == '{dataset}' and model == '{model}' and "
                                             f"K == {K} and weight_scheme == '{weight_scheme}'"))\
                        [f"L1_{AS}{VZ}"].values
                        
                        # make a point scatter with the mean, and then error bars for the min & max.
                        ax[i // 4, i % 4].scatter(x=[(2*j) + [-1/2, -1/6, +1/6, +1/2][counter]], 
                                                  y=[np.mean(vals)], color=color, s=12)
                        eb = ax[i // 4, i % 4].errorbar(x=[(2*j) + [-1/2, -1/6, +1/6, +1/2][counter]], 
                                                        y=[np.mean(vals)],
                                                        yerr=[[np.mean(vals) - np.min(vals)], 
                                                              [np.max(vals) - np.mean(vals)]], 
                                                        color=color, capsize=3)
                        if VZ == "_VZ":
                            eb[-1][0].set_linestyle('--')
                        
                        # increment our counter
                        counter += 1
                        
                        # I just want to sanity check
                        if (K == 64) and (AS == "SA") and (VZ == ""):
                            if np.mean(vals) < np.mean(base_vals):
                                success += 1
            
            # more beautifying
            ax[i // 4, i % 4].set_xticks([0, 2, 4, 6])
            ax[i // 4, i % 4].set_xticklabels(["K=1", "K=4", "K=16", "K=64"])
            ax[i // 4, i % 4].tick_params("both", labelsize=13.5)
        
        # custom legend
        custom_lines = [Line2D([0], [0], color="blue", linestyle=None, 
                               label="Simple Average"),
                        Line2D([0], [0], color="blue", linestyle="--", 
                               label='Simple Average + Voting-Based Zeroing'),
                        Line2D([0], [0], color="red", linestyle=None, 
                               label="Weighted Average"),
                        Line2D([0], [0], color="red", linestyle='--', 
                               label="Weighted Average + Voting-Based Zeroing"),
                        Patch(facecolor="green", alpha=0.5, label=model),
                        Line2D([0], [0], color="green", linestyle=None, 
                               label=f"{model} (Mean)")]
        fig.legend(handles=custom_lines, loc="lower center", ncol=3, fontsize=13.5, bbox_to_anchor=(0.5, -0.075))
        
        # for our logging purposes
        print(success)
        
        # beautify
        plt.tight_layout()
        plt.savefig(f"figures/errorbars/model={model}_ws={weight_scheme}_L1-ROP.png", 
                    facecolor="white", bbox_inches="tight")
        plt.show()

# Error Bars for Final Test Accuracy

In [None]:
# go thru each combination of base model + weight_scheme
for model in ["PAC", "FSOL"]:
    for weight_scheme in ["dense", "exp-dense"]:
        
        # start our figure
        fig, ax = plt.subplots(4, 4, dpi=200, figsize=(14, 12))
        
        # counters to see how many times we have mean higher test accuracy than baseline + oracle.
        success, oracle_success = 0, 0
        
        # go thru each of our datasets
        for i, dataset in enumerate(dataset_descs.keys()):
            
            # beautify our subplot immediately
            ax[i // 4, i % 4].grid()
            ax[i // 4, i % 4].set_title(dataset_descs[dataset], fontsize=15)
            
            # set to store our base values for instantaneous + cummax
            base_vals, oracle_vals = [], []
            
            # load in the instantaneous test accuracies for seed=0
            if model == "PAC":

                # immediately load in the best hyperparameters for this dataset + model
                log10Cerr = pd.read_csv("base_variants/PAC_hparams.csv")\
                .query(f"dataset == '{dataset}'")[["log10Cerr"]].values[0,0]
                log10Cerr = int(log10Cerr)

                # load in the file + get the values that we should be plotting
                for seed in range(5):
                    logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log10Cerr={log10Cerr}_seed={seed}_metrics.csv")
                    base_vals.append(logs_inst["inst_test-set-acc"].values[-1])
                    oracle_vals.append(logs_inst["inst_test-set-acc"].max())
                    
            elif model == "FSOL":

                # immediately load in the best hyperparameters for this dataset + model
                log2eta, log10lmbda = pd.read_csv("base_variants/FSOL_hparams.csv")\
                .query(f"dataset == '{dataset}'")[["log2eta", "log10lmbda"]].values[0]
                log2eta, log10lmbda = log2eta, log10lmbda

                # load in the file + get the values that we should be plotting
                for seed in range(5):
                    logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log2eta={log2eta}_log10lmbda={log10lmbda}_seed={seed}_metrics.csv")
                    base_vals.append(logs_inst["inst_test-set-acc"].values[-1])
                    oracle_vals.append(logs_inst["inst_test-set-acc"].max())
        
            # for test accuracy, let's do cummax Oracle as the colored bar
            ax[i // 4, i % 4].axhline(y=np.mean(oracle_vals), color="green")
            ax[i // 4, i % 4].fill_between([0-1/2, 2, 3, 8+1/2], [np.min(oracle_vals)] * 4, [np.max(oracle_vals)] * 4, 
                                           alpha=0.5, color="green")
            
            # for test accuracy, let's do the instantaneous as ITS OWN ERROR BAR!
            ax[i // 4, i % 4].scatter(x=[0], y=[np.mean(base_vals)], color="black", s=12)
            eb = ax[i // 4, i % 4].errorbar(x=[0], y=[np.mean(base_vals)], 
                                            yerr=[[np.mean(base_vals) - np.min(base_vals)], 
                                                  [np.max(base_vals) - np.mean(base_vals)]], 
                                            color="black", capsize=3)
            
            # let's iterate thru our variants
            for j, K in enumerate([1, 4, 16, 64]):
                
                # create a counter to help us out
                counter = 0
                
                # plot all four error bars for this value of K
                for AS in ["SA", "WA"]:
                    for VZ in ["", "_VZ"]:
                        
                        # line + color settings
                        color = "blue" if AS == "SA" else "red"
                        
                        # what are the values at play for this variant?
                        vals = master.query((f"dataset == '{dataset}' and model == '{model}' and "
                                             f"K == {K} and weight_scheme == '{weight_scheme}'"))\
                        [f"fin_test_acc_{AS}{VZ}"].values
                        
                        # make a point scatter with the mean, and then error bars for the min & max.
                        ax[i // 4, i % 4].scatter(x=[(2*(j+1)) + [-1/2, -1/6, +1/6, +1/2][counter]], 
                                                  y=[np.mean(vals)], color=color, s=12)
                        eb = ax[i // 4, i % 4].errorbar(x=[(2*(j+1)) + [-1/2, -1/6, +1/6, +1/2][counter]], 
                                                        y=[np.mean(vals)],
                                                        yerr=[[np.mean(vals) - np.min(vals)], 
                                                              [np.max(vals) - np.mean(vals)]], 
                                                        color=color, capsize=3)
                        if VZ == "_VZ":
                            eb[-1][0].set_linestyle('--')
                        
                        # increment our counter
                        counter += 1
                        
                        # I just want to sanity check
                        if (K == 64) and (AS == "SA") and (VZ == ""):
                            if np.mean(vals) > np.mean(base_vals):
                                success += 1
                            if np.mean(vals) > np.mean(oracle_vals):
                                oracle_success += 1
            
            # more beautifying
            ax[i // 4, i % 4].set_xticks([0, 2, 4, 6, 8])
            ax[i // 4, i % 4].set_xticklabels([model, "K=1", "K=4", "K=16", "K=64"])
            ax[i // 4, i % 4].tick_params("both", labelsize=12)
        
        # custom legend
        custom_lines = [Line2D([0], [0], color="blue", linestyle=None, 
                               label="Simple Average"),
                        Line2D([0], [0], color="blue", linestyle="--", 
                               label='Simple Average + Voting-Based Zeroing'),
                        Line2D([0], [0], color="red", linestyle=None, 
                               label="Weighted Average"),
                        Line2D([0], [0], color="red", linestyle='--', 
                               label="Weighted Average + Voting-Based Zeroing"),
                        Patch(facecolor="green", alpha=0.5, label="Oracle"),
                        Line2D([0], [0], color="green", linestyle=None, 
                               label=f"Oracle (Mean)"),
                        Line2D([0], [0], color="black", linestyle=None, 
                               label=model)]
        fig.legend(handles=custom_lines, loc="lower center", ncol=4, fontsize=12, bbox_to_anchor=(0.5, -0.075))
        
        print(f"{model}, {weight_scheme}: Success={success}, Oracle Success={oracle_success}")
        
        # beautify
        plt.tight_layout()
        plt.savefig(f"figures/errorbars/model={model}_ws={weight_scheme}_final-test-acc.png", 
                    facecolor="white", bbox_inches="tight")
        plt.show()

# Main Text ROP Figure - PAC + 3 Datasets

In [None]:
# let's do PAC and three datasets
model, exhibition_datasets = "PAC", ["avazu-app_binary_sparse", "mnist8-4+9_binary_sparse", "news20_binary_sparse"]
weight_scheme = "dense"

# create our subplots
fig, ax = plt.subplots(1, 3, dpi=200, figsize=(14*(3/4), 12/4))

# counter for successes on each of these datasets
success = 0
        
# go thru each of our datasets
for i, dataset in enumerate(exhibition_datasets):

    # beautify our subplot immediately
    ax[i].grid()
    ax[i].set_title(dataset_descs[dataset], fontsize=15)

    # set to store our base values
    base_vals = []

    # load in the instantaneous test accuracies for seed=0
    if model == "PAC":

        # immediately load in the best hyperparameters for this dataset + model
        log10Cerr = pd.read_csv("base_variants/PAC_hparams.csv")\
        .query(f"dataset == '{dataset}'")[["log10Cerr"]].values[0,0]
        log10Cerr = int(log10Cerr)

        # load in the file + get the values that we should be plotting
        for seed in range(5):
            logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log10Cerr={log10Cerr}_seed={seed}_metrics.csv")
            base_vals.append((logs_inst["inst_test-set-acc"][1:].cummax().values - logs_inst["inst_test-set-acc"][1:].values).mean())

    elif model == "FSOL":

        # immediately load in the best hyperparameters for this dataset + model
        log2eta, log10lmbda = pd.read_csv("base_variants/FSOL_hparams.csv")\
        .query(f"dataset == '{dataset}'")[["log2eta", "log10lmbda"]].values[0]
        log2eta, log10lmbda = log2eta, log10lmbda

        # load in the file + get the values that we should be plotting
        for seed in range(5):
            logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log2eta={log2eta}_log10lmbda={log10lmbda}_seed={seed}_metrics.csv")
            base_vals.append((logs_inst["inst_test-set-acc"][1:].cummax().values - logs_inst["inst_test-set-acc"][1:].values).mean())

    # put a confidence band in light green + line at the mean (MIN + MAX)
    ax[i].axhline(y=np.mean(base_vals), color="green")
    ax[i].fill_between([0-1/2, 2, 3, 6+1/2], [np.min(base_vals)] * 4, [np.max(base_vals)] * 4, 
                                   alpha=0.5, color="green")

    # let's showing up our variants
    for j, K in enumerate([1, 4, 16, 64]):

        # create a counter to help us out
        counter = 0

        # plot all four error bars for this value of K
        for AS in ["SA", "WA"]:
            for VZ in ["", "_VZ"]:

                # line + color settings
                color = "blue" if AS == "SA" else "red"

                # what are the values at play for this variant?
                vals = master.query((f"dataset == '{dataset}' and model == '{model}' and "
                                     f"K == {K} and weight_scheme == '{weight_scheme}'"))\
                [f"L1_{AS}{VZ}"].values

                # make a point scatter with the mean, and then error bars for the min & max.
                ax[i].scatter(x=[(2*j) + [-1/2, -1/6, +1/6, +1/2][counter]], 
                                          y=[np.mean(vals)], color=color, s=12)
                eb = ax[i].errorbar(x=[(2*j) + [-1/2, -1/6, +1/6, +1/2][counter]], 
                                                y=[np.mean(vals)],
                                                yerr=[[np.mean(vals) - np.min(vals)], 
                                                      [np.max(vals) - np.mean(vals)]], 
                                                color=color, capsize=3)
                if VZ == "_VZ":
                    eb[-1][0].set_linestyle('--')

                # increment our counter
                counter += 1

                # I just want to sanity check
                if (K == 64) and (AS == "SA") and (VZ == ""):
                    if np.mean(vals) < np.mean(base_vals):
                        success += 1

    # more beautifying
    ax[i].set_xticks([0, 2, 4, 6])
    ax[i].set_xticklabels(["K=1", "K=4", "K=16", "K=64"])
    ax[i].tick_params("both", labelsize=13.5)

# custom legend
custom_lines = [Line2D([0], [0], color="blue", linestyle=None, 
                       label="Simple Average"),
                Line2D([0], [0], color="blue", linestyle="--", 
                       label='Simple Average + Voting-Based Zeroing'),
                Line2D([0], [0], color="red", linestyle=None, 
                       label="Weighted Average"),
                Line2D([0], [0], color="red", linestyle='--', 
                       label="Weighted Average + Voting-Based Zeroing"),
                Patch(facecolor="green", alpha=0.5, label=model),
                Line2D([0], [0], color="green", linestyle=None, 
                       label=f"{model} (Mean)")]
fig.legend(handles=custom_lines, loc="lower center", ncol=3, fontsize=10, bbox_to_anchor=(0.5, -0.15))

# for our logging purposes
print(success)

# beautify
plt.tight_layout()
plt.savefig(f"figures/exhibition-ROP.png", 
            facecolor="white", bbox_inches="tight")
plt.show()

# Main Text Final Test Accuracy Figure - FSOL + 3 Datasets

In [None]:
# let's do FSOL and three datasets
model, exhibition_datasets = "FSOL", ["avazu-site_binary_sparse", "kdd2010-a_binary_sparse", "sst2_binary_sparse"]
weight_scheme = "dense"

# counters to see how many times we have mean higher test accuracy than baseline + oracle.
success, oracle_success = 0, 0

# create our subplots
fig, ax = plt.subplots(1, 3, dpi=200, figsize=(14*(3/4), 12/4))

# go thru each of our datasets
for i, dataset in enumerate(exhibition_datasets):

    # beautify our subplot immediately
    ax[i].grid()
    ax[i].set_title(dataset_descs[dataset], fontsize=15)

    # set to store our base values for instantaneous + cummax
    base_vals, oracle_vals = [], []

    # load in the instantaneous test accuracies for seed=0
    if model == "PAC":

        # immediately load in the best hyperparameters for this dataset + model
        log10Cerr = pd.read_csv("base_variants/PAC_hparams.csv")\
        .query(f"dataset == '{dataset}'")[["log10Cerr"]].values[0,0]
        log10Cerr = int(log10Cerr)

        # load in the file + get the values that we should be plotting
        for seed in range(5):
            logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log10Cerr={log10Cerr}_seed={seed}_metrics.csv")
            base_vals.append(logs_inst["inst_test-set-acc"].values[-1])
            oracle_vals.append(logs_inst["inst_test-set-acc"].max())

    elif model == "FSOL":

        # immediately load in the best hyperparameters for this dataset + model
        log2eta, log10lmbda = pd.read_csv("base_variants/FSOL_hparams.csv")\
        .query(f"dataset == '{dataset}'")[["log2eta", "log10lmbda"]].values[0]
        log2eta, log10lmbda = log2eta, log10lmbda

        # load in the file + get the values that we should be plotting
        for seed in range(5):
            logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log2eta={log2eta}_log10lmbda={log10lmbda}_seed={seed}_metrics.csv")
            base_vals.append(logs_inst["inst_test-set-acc"].values[-1])
            oracle_vals.append(logs_inst["inst_test-set-acc"].max())

    # for test accuracy, let's do cummax Oracle as the colored bar
    ax[i].axhline(y=np.mean(oracle_vals), color="green")
    ax[i].fill_between([0-1/2, 2, 3, 8+1/2], [np.min(oracle_vals)] * 4, [np.max(oracle_vals)] * 4, 
                                   alpha=0.5, color="green")

    # for test accuracy, let's do the instantaneous as ITS OWN ERROR BAR!
    ax[i].scatter(x=[0], y=[np.mean(base_vals)], color="black", s=12)
    eb = ax[i].errorbar(x=[0], y=[np.mean(base_vals)], 
                                    yerr=[[np.mean(base_vals) - np.min(base_vals)], 
                                          [np.max(base_vals) - np.mean(base_vals)]], 
                                    color="black", capsize=3)

    # let's iterate thru our variants
    for j, K in enumerate([1, 4, 16, 64]):

        # create a counter to help us out
        counter = 0

        # plot all four error bars for this value of K
        for AS in ["SA", "WA"]:
            for VZ in ["", "_VZ"]:

                # line + color settings
                color = "blue" if AS == "SA" else "red"

                # what are the values at play for this variant?
                vals = master.query((f"dataset == '{dataset}' and model == '{model}' and "
                                     f"K == {K} and weight_scheme == '{weight_scheme}'"))\
                [f"fin_test_acc_{AS}{VZ}"].values

                # make a point scatter with the mean, and then error bars for the min & max.
                ax[i].scatter(x=[(2*(j+1)) + [-1/2, -1/6, +1/6, +1/2][counter]], 
                                          y=[np.mean(vals)], color=color, s=12)
                eb = ax[i].errorbar(x=[(2*(j+1)) + [-1/2, -1/6, +1/6, +1/2][counter]], 
                                                y=[np.mean(vals)],
                                                yerr=[[np.mean(vals) - np.min(vals)], 
                                                      [np.max(vals) - np.mean(vals)]], 
                                                color=color, capsize=3)
                if VZ == "_VZ":
                    eb[-1][0].set_linestyle('--')

                # increment our counter
                counter += 1

                # I just want to sanity check
                if (K == 64) and (AS == "SA") and (VZ == ""):
                    if np.mean(vals) > np.mean(base_vals):
                        success += 1
                    if np.mean(vals) > np.mean(oracle_vals):
                        oracle_success += 1

    # more beautifying
    ax[i].set_xticks([0, 2, 4, 6, 8])
    ax[i].set_xticklabels([model, "K=1", "K=4", "K=16", "K=64"])
    ax[i].tick_params("both", labelsize=12)

# custom legend
custom_lines = [Line2D([0], [0], color="blue", linestyle=None, 
                       label="Simple Average"),
                Line2D([0], [0], color="blue", linestyle="--", 
                       label='Simple Average + Voting-Based Zeroing'),
                Line2D([0], [0], color="red", linestyle=None, 
                       label="Weighted Average"),
                Line2D([0], [0], color="red", linestyle='--', 
                       label="Weighted Average + Voting-Based Zeroing"),
                Patch(facecolor="green", alpha=0.5, label="Oracle"),
                Line2D([0], [0], color="green", linestyle=None, 
                       label=f"Oracle (Mean)"),
                Line2D([0], [0], color="black", linestyle=None, 
                       label=model)]
fig.legend(handles=custom_lines, loc="lower center", ncol=4, fontsize=10, bbox_to_anchor=(0.5, -0.15))

print(f"{model}, {weight_scheme}: Success={success}, Oracle Success={oracle_success}")

# beautify
plt.tight_layout()
plt.savefig(f"figures/exhibition_final-test-acc.png", 
            facecolor="white", bbox_inches="tight")
plt.show()

# Error Bars for Final Sparsity 

In [None]:
# go thru each combination of base model + weight_scheme
for model in ["PAC", "FSOL"]:
    for weight_scheme in ["dense", "exp-dense"]:
        
        # start our figure
        fig, ax = plt.subplots(4, 4, dpi=200, figsize=(14, 12))
        
        # go thru each of our datasets
        for i, dataset in enumerate(dataset_descs.keys()):
            
            # beautify our subplot immediately
            ax[i // 4, i % 4].grid()
            ax[i // 4, i % 4].set_title(dataset_descs[dataset], fontsize=15)
            
            # set to store our base values for instantaneous. NO CUMMAX CONCEPT FOR SPARSITY!
            base_vals = []
            
            # load in the instantaneous test accuracies for seed=0
            if model == "PAC":

                # immediately load in the best hyperparameters for this dataset + model
                log10Cerr = pd.read_csv("base_variants/PAC_hparams.csv")\
                .query(f"dataset == '{dataset}'")[["log10Cerr"]].values[0,0]
                log10Cerr = int(log10Cerr)

                # load in the file + get the values that we should be plotting
                for seed in range(5):
                    logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log10Cerr={log10Cerr}_seed={seed}_metrics.csv")
                    base_vals.append(logs_inst["inst_sparsity"].values[-1])
                    
            elif model == "FSOL":

                # immediately load in the best hyperparameters for this dataset + model
                log2eta, log10lmbda = pd.read_csv("base_variants/FSOL_hparams.csv")\
                .query(f"dataset == '{dataset}'")[["log2eta", "log10lmbda"]].values[0]
                log2eta, log10lmbda = log2eta, log10lmbda

                # load in the file + get the values that we should be plotting
                for seed in range(5):
                    logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log2eta={log2eta}_log10lmbda={log10lmbda}_seed={seed}_metrics.csv")
                    base_vals.append(logs_inst["inst_sparsity"].values[-1])
                    
            # for test accuracy, let's do the instantaneous as ITS OWN ERROR BAR!
            ax[i // 4, i % 4].scatter(x=[0], y=[np.mean(base_vals)], color="black", s=12)
            eb = ax[i // 4, i % 4].errorbar(x=[0], y=[np.mean(base_vals)], 
                                            yerr=[[np.maximum(np.mean(base_vals) - np.min(base_vals), 0)], 
                                                  [np.maximum(np.max(base_vals) - np.mean(base_vals), 0)]], 
                                            color="black", capsize=3)
            
            # let's iterate thru our variants
            for j, K in enumerate([1, 4, 16, 64]):
                
                # create a counter to help us out
                counter = 0
                
                # plot all four error bars for this value of K
                for AS in ["SA", "WA"]:
                    for VZ in ["", "_VZ"]:
                        
                        # line + color settings
                        color = "blue" if AS == "SA" else "red"
                        
                        # what are the values at play for this variant?
                        vals = master.query((f"dataset == '{dataset}' and model == '{model}' and "
                                             f"K == {K} and weight_scheme == '{weight_scheme}'"))\
                        [f"fin_sparsity_{AS}{VZ}"].values
                        
                        # make a point scatter with the mean, and then error bars for the min & max.
                        ax[i // 4, i % 4].scatter(x=[(2*(j+1)) + [-1/2, -1/6, +1/6, +1/2][counter]], 
                                                  y=[np.mean(vals)], color=color, s=12)
                        eb = ax[i // 4, i % 4].errorbar(x=[(2*(j+1)) + [-1/2, -1/6, +1/6, +1/2][counter]], 
                                                        y=[np.mean(vals)],
                                                        yerr=[[np.maximum(np.mean(vals) - np.min(vals), 0)], 
                                                              [np.maximum(np.max(vals) - np.mean(vals), 0)]], 
                                                        color=color, capsize=3)
                        if VZ == "_VZ":
                            eb[-1][0].set_linestyle('--')
                        
                        # increment our counter
                        counter += 1
            
            # more beautifying
            ax[i // 4, i % 4].set_xticks([0, 2, 4, 6, 8])
            ax[i // 4, i % 4].set_xticklabels([model, "K=1", "K=4", "K=16", "K=64"])
            ax[i // 4, i % 4].tick_params("both", labelsize=12)
        
        # custom legend
        custom_lines = [Line2D([0], [0], color="blue", linestyle=None, 
                               label="Simple Average"),
                        Line2D([0], [0], color="blue", linestyle="--", 
                               label='Simple Average + Voting-Based Zeroing'),
                        Line2D([0], [0], color="red", linestyle=None, 
                               label="Weighted Average"),
                        Line2D([0], [0], color="red", linestyle='--', 
                               label="Weighted Average + Voting-Based Zeroing"),
                        Line2D([0], [0], color="black", linestyle=None, 
                               label=model)]
        fig.legend(handles=custom_lines, loc="lower center", ncol=3, fontsize=12, bbox_to_anchor=(0.5, -0.075))
        
        # beautify
        plt.tight_layout()
        plt.savefig(f"figures/errorbars/model={model}_ws={weight_scheme}_sparsity.png", 
                    facecolor="white", bbox_inches="tight")
        plt.show()

# Main Text Final Sparsity Figure - FSOL + 3 Datasets

In [None]:
# let's do FSOL and three datasets
model, exhibition_datasets = "FSOL", ["avazu-site_binary_sparse", "kdd2010-a_binary_sparse", "sst2_binary_sparse"]
weight_scheme = "dense"

# create our subplots
fig, ax = plt.subplots(1, 3, dpi=200, figsize=(14*(3/4), 12/4))

# go thru each of our datasets
for i, dataset in enumerate(exhibition_datasets):

    # beautify our subplot immediately
    ax[i].grid()
    ax[i].set_title(dataset_descs[dataset], fontsize=15)

    # set to store our base values for instantaneous. NO CUMMAX CONCEPT FOR SPARSITY!
    base_vals = []

    # load in the instantaneous test accuracies for seed=0
    if model == "PAC":

        # immediately load in the best hyperparameters for this dataset + model
        log10Cerr = pd.read_csv("base_variants/PAC_hparams.csv")\
        .query(f"dataset == '{dataset}'")[["log10Cerr"]].values[0,0]
        log10Cerr = int(log10Cerr)

        # load in the file + get the values that we should be plotting
        for seed in range(5):
            logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log10Cerr={log10Cerr}_seed={seed}_metrics.csv")
            base_vals.append(logs_inst["inst_sparsity"].values[-1])

    elif model == "FSOL":

        # immediately load in the best hyperparameters for this dataset + model
        log2eta, log10lmbda = pd.read_csv("base_variants/FSOL_hparams.csv")\
        .query(f"dataset == '{dataset}'")[["log2eta", "log10lmbda"]].values[0]
        log2eta, log10lmbda = log2eta, log10lmbda

        # load in the file + get the values that we should be plotting
        for seed in range(5):
            logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log2eta={log2eta}_log10lmbda={log10lmbda}_seed={seed}_metrics.csv")
            base_vals.append(logs_inst["inst_sparsity"].values[-1])

    # for test accuracy, let's do the instantaneous as ITS OWN ERROR BAR!
    ax[i].scatter(x=[0], y=[np.mean(base_vals)], color="black", s=12)
    eb = ax[i].errorbar(x=[0], y=[np.mean(base_vals)], 
                                    yerr=[[np.maximum(np.mean(base_vals) - np.min(base_vals), 0)], 
                                          [np.maximum(np.max(base_vals) - np.mean(base_vals), 0)]], 
                                    color="black", capsize=3)

    # let's iterate thru our variants
    for j, K in enumerate([1, 4, 16, 64]):

        # create a counter to help us out
        counter = 0

        # plot all four error bars for this value of K
        for AS in ["SA", "WA"]:
            for VZ in ["", "_VZ"]:

                # line + color settings
                color = "blue" if AS == "SA" else "red"

                # what are the values at play for this variant?
                vals = master.query((f"dataset == '{dataset}' and model == '{model}' and "
                                     f"K == {K} and weight_scheme == '{weight_scheme}'"))\
                [f"fin_sparsity_{AS}{VZ}"].values

                # make a point scatter with the mean, and then error bars for the min & max.
                ax[i].scatter(x=[(2*(j+1)) + [-1/2, -1/6, +1/6, +1/2][counter]], 
                                          y=[np.mean(vals)], color=color, s=12)
                eb = ax[i].errorbar(x=[(2*(j+1)) + [-1/2, -1/6, +1/6, +1/2][counter]], 
                                                y=[np.mean(vals)],
                                                yerr=[[np.maximum(np.mean(vals) - np.min(vals), 0)], 
                                                      [np.maximum(np.max(vals) - np.mean(vals), 0)]], 
                                                color=color, capsize=3)
                if VZ == "_VZ":
                    eb[-1][0].set_linestyle('--')

                # increment our counter
                counter += 1

    # more beautifying
    ax[i].set_xticks([0, 2, 4, 6, 8])
    ax[i].set_xticklabels([model, "K=1", "K=4", "K=16", "K=64"])
    ax[i].tick_params("both", labelsize=12)

# custom legend
custom_lines = [Line2D([0], [0], color="blue", linestyle=None, 
                       label="Simple Average"),
                Line2D([0], [0], color="blue", linestyle="--", 
                       label='Simple Average + Voting-Based Zeroing'),
                Line2D([0], [0], color="red", linestyle=None, 
                       label="Weighted Average"),
                Line2D([0], [0], color="red", linestyle='--', 
                       label="Weighted Average + Voting-Based Zeroing"),
                Line2D([0], [0], color="black", linestyle=None, 
                       label=model)]
fig.legend(handles=custom_lines, loc="lower center", ncol=3, fontsize=10, bbox_to_anchor=(0.5, -0.15))

# beautify
plt.tight_layout()
plt.savefig(f"figures/exhibition-sparsity.png", 
            facecolor="white", bbox_inches="tight")
plt.show()

# Process Some Logs on Baseline Methods (allows derive oracle)

In [None]:
# create our dataframe
baseline_logs = pd.DataFrame(data=None, columns=["dataset", "model", "seed", "fin_test_acc_inst", 
                                                 "fin_test_hinge_inst", "fin_sparsity_inst", "L1_inst"])

# go thru each model + dataset + seed
for model in ["PAC", "FSOL"]:
    for dataset in dataset_descs.keys():
        for seed in range(5):
            
            # load in the relevant log files
            if model == "PAC":

                # immediately load in the best hyperparameters for this dataset + model
                log10Cerr = pd.read_csv("base_variants/PAC_hparams.csv")\
                .query(f"dataset == '{dataset}'")[["log10Cerr"]].values[0,0]
                log10Cerr = int(log10Cerr)

                # load in the file + get the values that we should be plotting
                logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log10Cerr={log10Cerr}_seed={seed}_metrics.csv")

            elif model == "FSOL":

                # immediately load in the best hyperparameters for this dataset + model
                log2eta, log10lmbda = pd.read_csv("base_variants/FSOL_hparams.csv")\
                .query(f"dataset == '{dataset}'")[["log2eta", "log10lmbda"]].values[0]
                log2eta, log10lmbda = log2eta, log10lmbda

                # load in the file + get the values that we should be plotting
                logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log2eta={log2eta}_log10lmbda={log10lmbda}_seed={seed}_metrics.csv")
                
            # create our row
            row = [dataset, model, seed, 
                   logs_inst["inst_test-set-acc"].values[-1], 
                   logs_inst["inst_test-set-hinge"].values[-1], 
                   logs_inst["inst_sparsity"].values[-1], 
                   (logs_inst["inst_test-set-acc"].cummax().values[1:]\
                    - logs_inst["inst_test-set-acc"].values[1:]).mean()]
            
            # add to our dataframe
            baseline_logs.loc[len(baseline_logs.index)] = row
            
# save our file as a .csv
baseline_logs.to_csv("logs/baseline_logs.csv", index=False)