In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys, copy, os, shutil
from tqdm.notebook import tqdm
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
from matplotlib.ticker import MaxNLocator

# make a directory to store figures
if "figures" not in os.listdir():
    os.mkdir("figures")
    
# translations for our datasets
dataset_descs = {"avazu-app_binary_sparse" : "Avazu (App)",
                 "avazu-site_binary_sparse" : "Avazu (Site)",
                 "criteo_binary_sparse" : "Criteo",
                 "dexter_binary_sparse" : "Dexter",
                 "dorothea_binary_sparse" : "Dorothea",
                 "kdd2010-a_binary_sparse" : "KDD2010 (Algebra)",
                 "mnist8-4+9_binary_sparse" : "MNIST8 (4+9)",
                 "news20_binary_sparse" : "News20",
                 "newsgroups_binary_sparse" : "Newsgroups (Binary, CS)",
                 "pcmac_binary_sparse" : "PCMAC",
                 "rcv1_binary_sparse" : "RCV1",
                 "real-sim_binary_sparse" : "Real-Sim",
                 "sst2_binary_sparse" : "SST-2",
                 "url_binary_sparse" : "URL",
                 "w8a_binary_sparse" : "W8A",
                 "webspam_binary_sparse" : "Webspam"}

# Generate a .csv summarizing all results + checking relative performance vs. base models.

In [None]:
# create a directory for logs
if "logs" not in os.listdir():
    os.mkdir("logs")

# create a dataframe to store all the logging results
columns = ["dataset", "model", "weight_scheme", "K", "seed",
           "fin_test_acc_WA", "fin_test_hinge_WA", "fin_sparsity_WA", "L1_WA",
           "fin_test_acc_WA_VZ", "fin_test_hinge_WA_VZ", "fin_sparsity_WA_VZ", "L1_WA_VZ",
           "fin_test_acc_SA", "fin_test_hinge_SA", "fin_sparsity_SA", "L1_SA",
           "fin_test_acc_SA_VZ", "fin_test_hinge_SA_VZ", "fin_sparsity_SA_VZ", "L1_SA_VZ",
           "L1_inst"]
master = pd.DataFrame(data=None, columns=columns)


# master table of results on our finished datasets
for model in ["PAC", "FSOL"]:
    for dataset in tqdm(list(dataset_descs.keys())):
        
        # get all filenames that are .csv and correspond to this model + dataset
        fnames = sorted([f for f in os.listdir(f"results/{model}/{dataset}") if ".csv" in f])
        
        # iterate thru all these filenames
        for fname in fnames:
            
            # unpack our settings
            model, weight_scheme, K, seed = [s.split("=")[1] for s in fname.split("_")[:-1]]
            K, seed = int(K), int(seed)
            
            # start our row
            row = [dataset, model, weight_scheme, K, seed]
            
            # get the corresponding instantaneous results for either PAC or FSOL
            if model == "PAC":
                
                # immediately load in the best hyperparameters for this dataset + model
                log10Cerr = pd.read_csv("base_variants/PAC_hparams.csv")\
                .query(f"dataset == '{dataset}'")[["log10Cerr"]].values[0,0]
                log10Cerr = int(log10Cerr)
                
                # load in the file
                logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log10Cerr={log10Cerr}_seed={seed}_metrics.csv")
                
            elif model == "FSOL":
                
                # immediately load in the best hyperparameters for this dataset + model
                log2eta, log10lmbda = pd.read_csv("base_variants/FSOL_hparams.csv")\
                .query(f"dataset == '{dataset}'")[["log2eta", "log10lmbda"]].values[0]
                log2eta, log10lmbda = log2eta, log10lmbda
                
                # load in the file
                logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log2eta={log2eta}_log10lmbda={log10lmbda}_seed={seed}_metrics.csv")
            
            # load in the logs for this variant
            logs = pd.read_csv(f"results/{model}/{dataset}/{fname}")
            
            # get the metrics that we are interested in
            for a_type in ["WA", "SA"]:
                for v_type in ["", "_VZ"]:
                    
                    # add the relevant columns to our row
                    row += list(logs[[f"WRS_test-set-acc_{a_type}{v_type}", 
                                      f"WRS_test-set-hinge_{a_type}{v_type}", 
                                      f"WRS_sparsity_{a_type}{v_type}"]].iloc[-1].values)
                    
                    # compute the L1 metric + add to our list
                    cm_inst_test_accs = logs_inst["inst_test-set-acc"].cummax()
                    test_accs = logs[f"WRS_test-set-acc_{a_type}{v_type}"]
                    row += [(cm_inst_test_accs[1:] - test_accs[1:]).mean()]
                    
            # also need to store the L1 metric of the instantaneous solution
            inst_test_accs = logs_inst["inst_test-set-acc"]
            row += [(cm_inst_test_accs[1:] - inst_test_accs[1:]).mean()]
                    
            # add to our dataframe
            master.loc[len(master.index)] = row

# at the very end
master.to_csv("logs/master.csv", index=False)

In [None]:
# for each model, check how often we can beat the baselines in terms of L1, using K=64
for model in ["PAC", "FSOL"]:
    
    # what model are we using?
    print(f"Model: {model} (K=64)")
    
    ##########
    
    # check how many variants where we were able to beat the instantaneous baseline WITH EXPONENTIAL WEIGHTS
    q = master.groupby(["dataset", "model", "weight_scheme", "K"]).mean().reset_index()\
    .sort_values(by="L1_SA").query(f"model == '{model}' and K == 64 and weight_scheme == 'exp-dense'")\
    [["dataset", "model", "weight_scheme", "K", "L1_SA", "L1_WA", "L1_inst"]]
    
    # how often did we beat the base model?
    num_outperform_SA = ((q.L1_inst - q.L1_SA) > 0).sum()
    print(f"1. Simple-Average WRS with exponential weights stabilized baseline in {num_outperform_SA} of 16 datasets.")
    num_outperform_WA = ((q.L1_inst - q.L1_WA) > 0).sum()
    print(f"2. Weighted-Average WRS with exponential weights stabilized baseline in {num_outperform_WA} of 16 datasets.")
    
    ##########
    
    # check how many variants where we were able to beat the instantaneous baseline WITH NON-EXPONENTIAL WEIGHTS
    q = master.groupby(["dataset", "model", "weight_scheme", "K"]).mean().reset_index()\
    .sort_values(by="L1_SA").query(f"model == '{model}' and K == 64 and weight_scheme == 'dense'")\
    [["dataset", "model", "weight_scheme", "K", "L1_SA", "L1_WA", "L1_inst"]]
    
    # how often did we beat the base model?
    num_outperform_SA = ((q.L1_inst - q.L1_SA) > 0).sum()
    print(f"3. Simple-Average WRS with standard weights stabilized baseline in {num_outperform_SA} of 16 datasets.")
    num_outperform_WA = ((q.L1_inst - q.L1_WA) > 0).sum()
    print(f"4. Weighted-Average WRS with standard weights stabilized baseline in {num_outperform_WA} of 16 datasets.")

# Exhibition Figures - 2x2 of Avazu-App, News20 for PAC and FSOL.

In [None]:
# what are our showcase datasets?
showcase_datasets = ["avazu-app_binary_sparse", "news20_binary_sparse"]

# let's go K=64, show all four possible options on ONE legend!
fig, ax = plt.subplots(2, 2, dpi=200, figsize=(10, 7.5))

# go thru our two models
for i, model in enumerate(["PAC", "FSOL"]):
    
    # plot our two show case dataset
    for j, dataset in enumerate(showcase_datasets):
        
        # load in the instantaneous test accuracies for seed=0
        if model == "PAC":

            # immediately load in the best hyperparameters for this dataset + model
            log10Cerr = pd.read_csv("base_variants/PAC_hparams.csv")\
            .query(f"dataset == '{dataset}'")[["log10Cerr"]].values[0,0]
            log10Cerr = int(log10Cerr)

            # load in the file
            logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log10Cerr={log10Cerr}_seed=0_metrics.csv")

        elif model == "FSOL":

            # immediately load in the best hyperparameters for this dataset + model
            log2eta, log10lmbda = pd.read_csv("base_variants/FSOL_hparams.csv")\
            .query(f"dataset == '{dataset}'")[["log2eta", "log10lmbda"]].values[0]
            log2eta, log10lmbda = log2eta, log10lmbda

            # load in the file
            logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log2eta={log2eta}_log10lmbda={log10lmbda}_seed=0_metrics.csv")
        
        # plot the instantaneous test accuracy + cumulative max. (Oracle)
        ax[i, j].plot(logs_inst["timestep"], logs_inst["inst_test-set-acc"], alpha=0.4, color="grey", label=model)
        ax[i, j].plot(logs_inst["timestep"], logs_inst["inst_test-set-acc"].cummax(), color="black", label="Oracle")
        
        # load + plot the various WRS variants
        logs = pd.read_csv(f"results/{model}/{dataset}/model={model}_ws=dense_K=64_seed=0_metrics.csv")
        ax[i, j].plot(logs["timestep"], logs["WRS_test-set-acc_SA"], label=f"{model}-WRS", color="blue")
        
        # deal with beautifying + labeling
        ax[i, j].set_ylim(bottom=0.4)
        ax[i, j].grid()
        if j == 0:
            ax[i, j].set_ylabel(model, fontsize=18)
        if i == 0:
            ax[i, j].set_title(dataset_descs[dataset], fontsize=18)
        ax[i, j].tick_params("both", labelsize=12)
        ax[i, j].legend(loc="lower right")
        
# beautify at the end
plt.tight_layout()
plt.savefig("figures/exhibition.png", facecolor="white", bbox_inches="tight")
plt.show()

# Generating test accuracy and sparsity over time figures for each dataset, using $K=64$.

In [None]:
# make directory for K=64 metrics
if "K=64" not in os.listdir("figures"):
    os.mkdir("figures/K=64")

# iterate thru these 3 sets of variables: only look at K=64.
for model in ["FSOL", "PAC"]:
    for weight_scheme in ["dense", "exp-dense"]:
        for metric in ["test-set-acc", "sparsity"]:
            
            # start our figure
            fig, ax = plt.subplots(4, 4, dpi=200, figsize=(14, 12))
            
            # just iterate thru all the datasets
            for i, dataset in enumerate(dataset_descs.keys()):
                
                # load in the instantaneous test accuracies for seed=0
                if model == "PAC":

                    # immediately load in the best hyperparameters for this dataset + model
                    log10Cerr = pd.read_csv("base_variants/PAC_hparams.csv")\
                    .query(f"dataset == '{dataset}'")[["log10Cerr"]].values[0,0]
                    log10Cerr = int(log10Cerr)

                    # load in the file
                    logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log10Cerr={log10Cerr}_seed=0_metrics.csv")

                elif model == "FSOL":

                    # immediately load in the best hyperparameters for this dataset + model
                    log2eta, log10lmbda = pd.read_csv("base_variants/FSOL_hparams.csv")\
                    .query(f"dataset == '{dataset}'")[["log2eta", "log10lmbda"]].values[0]
                    log2eta, log10lmbda = log2eta, log10lmbda

                    # load in the file
                    logs_inst = pd.read_csv(f"../hparam_tuning/results/{model}/{dataset}/model={model}_log2eta={log2eta}_log10lmbda={log10lmbda}_seed=0_metrics.csv")
                
                # load + plot the various WRS variants
                logs = pd.read_csv(f"results/{model}/{dataset}/model={model}_ws={weight_scheme}_K=64_seed=0_metrics.csv")
                ax[i // 4, i % 4].plot(logs["timestep"], logs[f"WRS_{metric}_SA"], 
                                       label=f"{model}-WRS (Simple Average)", color="blue", linewidth=0.75)
                ax[i // 4, i % 4].plot(logs["timestep"], logs[f"WRS_{metric}_SA_VZ"], 
                                       label=f"{model}-WRS (Simple Average + Voting-Based Zeroing)", color="blue", linestyle="--", linewidth=0.75)
                ax[i // 4, i % 4].plot(logs["timestep"], logs[f"WRS_{metric}_WA"], 
                                       label=f"{model}-WRS (Weighted Average)", color="red", linewidth=0.75)
                ax[i // 4, i % 4].plot(logs["timestep"], logs[f"WRS_{metric}_WA_VZ"], 
                                       label=f"{model}-WRS (Weighted Average + Voting-Based Zeroing)", color="red", linestyle="--", linewidth=0.75)
                
                # plot the instantaneous test accuracy + cumulative max. (Oracle)
                ax[i // 4, i % 4].plot(logs_inst["timestep"], logs_inst[f"inst_{metric}"], alpha=0.4, color="grey", label=model)
                if metric != "sparsity":
                    ax[i // 4, i % 4].plot(logs_inst["timestep"], logs_inst[f"inst_{metric}"].cummax(), 
                                           color="black", label="Oracle", linewidth=1.0)
            
                # do some beautifying
                ax[i // 4, i % 4].grid()
                ax[i // 4, i % 4].tick_params("both", labelsize=10)
                if metric != "sparsity":
                    ax[i // 4, i % 4].set_ylim(bottom=0.6)
                ax[i // 4, i % 4].set_title(dataset_descs[dataset], fontsize=13.5)
                ax[i // 4, i % 4].xaxis.set_major_locator(MaxNLocator(nbins=4))
            
            # custom legend
            custom_lines = [Line2D([0], [0], color="blue", linestyle=None, 
                                   label=f"{model}-WRS (Simple Average)"),
                            Line2D([0], [0], color="blue", linestyle="--", 
                                   label=f"{model}-WRS (Simple Average + Voting-Based Zeroing)"),
                            Line2D([0], [0], color="red", linestyle=None, 
                                   label=f"{model}-WRS (Weighted Average)"),
                            Line2D([0], [0], color="red", linestyle='--', 
                                   label=f"{model}-WRS (Weighted Average + Voting-Based Zeroing)"),
                            Line2D([0], [0], color="grey", linestyle=None, 
                                   label=model),
                            Line2D([0], [0], color="black", linestyle=None, 
                                   label="Oracle")]
            fig.legend(handles=custom_lines, loc="lower center", ncol=3, fontsize=13.5, bbox_to_anchor=(0.5, -0.05))
            
            # beautify at the end
            plt.tight_layout()
            plt.savefig(f"figures/K=64/{model}_ws={weight_scheme}_metric={metric}.png", facecolor="white",
                        bbox_inches="tight")
            plt.show()