# NAG Experiments

Gettin the data

In [None]:
import datautils
import pandas as pd
import modelutils as mu
import nesterov
import numpy as np
import matplotlib.pyplot as plt
import time
import pickle
import itertools

In [None]:
DATASET = "Dataset/ML-CUP23-TR.csv"
RESULTS = "Results/"
PLOT = "Plots/"
RUNS = "FullRuns/"

In [None]:
df_cup = pd.read_csv(DATASET, skiprows=6)
df_cup.rename(columns={"# Training set: ID": "ID"}, inplace=True)

In [None]:
df_cup.info()

In [None]:
# create test and training sets
DESIGN_SIZE = 0.8
TRAIN_SIZE = 0.8

df_design, df_test = datautils.hold_out(df_cup, DESIGN_SIZE)

assert (
    len(list(set(df_design["ID"]) & set(df_test["ID"]))) == 0
)  # have intersection of cardinality zero

# TODO: Decidere se fare kfold
df_train, df_val = datautils.hold_out(df_design, TRAIN_SIZE)

df_train.info()

X_train, y_train = datautils.obtain_features_targets(df_train)
print(X_train.shape, y_train.shape)

X_val, y_val = datautils.obtain_features_targets(df_val)
print(X_val.shape, y_val.shape)

X_test, y_test = datautils.obtain_features_targets(df_test)
print(X_test.shape, y_test.shape)

input_size = X_train.shape[1]
output_size = y_train.shape[1]

## General experiments for algorithms comparison

In [None]:
results_dict = {
    "Hidden size": [],
    "Mu": [],
    "Beta": [],
    "Epsilon": [],
    "Alpha": [],
    "Seed": [],
    "Initialization type": [],
    "Train": [],
    "Validation": [],
    "Time": [],
    "Epochs": [],
}

In [None]:
# hyperparameters

hidden_size_list = [50, 100]
lr_list = ["auto", "col"]
beta_list = ["schedule", 0, 0.3, 0.6, 0.9, 0.99]
alpha_list = [1e-8]
epsilon_list = [1e-2]

In [None]:
worked = None
for HIDDEN_SIZE, LEARNING_RATE, BETA, ALPHA, EPSILON in itertools.product(
    hidden_size_list, lr_list, beta_list, alpha_list, epsilon_list
):

    # initialize dictionary for results
    results_dict = {
        "Hidden size": [],
        "Mu": [],
        "Beta": [],
        "Epsilon": [],
        "Alpha": [],
        "Seed": [],
        "Initialization type": [],
        "Train": [],
        "Validation": [],
        "Time": [],
        "Epochs": [],
    }

    if worked is not None and not type(LEARNING_RATE) == str:
        if (
            LEARNING_RATE < worked
        ):  # an higher learning rate has already been tested, and it converged
            continue

    # loop over seeds
    for seed in range(5):

        # initialize model
        model = mu.ELM(input_size, HIDDEN_SIZE, output_size, seed=seed)

        # measure time
        start = time.process_time()

        # train model
        (
            final_model,
            loss_train_history,
            loss_val_history,
            epochs,
            has_problem,
        ) = nesterov.nag(
            model,
            X_train,
            y_train,
            X_val,
            y_val,
            lr=LEARNING_RATE,
            alpha=ALPHA,
            beta=BETA,
            max_epochs=2000000,
            eps=EPSILON,
            # fast_mode=True,
            # verbose=True,
        )

        end = time.process_time()

        if has_problem:
            print(
                f"Problem with ELM {HIDDEN_SIZE} {LEARNING_RATE} {BETA} {ALPHA} {EPSILON} {seed}"
            )
            continue
        else:
            worked = LEARNING_RATE

        # save results
        results_dict["Hidden size"].append(HIDDEN_SIZE)
        results_dict["Mu"].append(LEARNING_RATE)
        results_dict["Beta"].append(BETA)
        results_dict["Alpha"].append(ALPHA)
        results_dict["Epsilon"].append(EPSILON)
        results_dict["Seed"].append(seed)
        results_dict["Initialization type"].append("fan-in")
        results_dict["Train"].append(loss_train_history[-1])
        results_dict["Validation"].append(loss_val_history[-1])
        results_dict["Time"].append(end - start)
        results_dict["Epochs"].append(epochs)

        # guard is true if not using fast_mode (i.e. all iterations are saved)
        if len(loss_train_history) > 1:
            # save all iterations using pickle
            with open(
                f"{RUNS}ELM_{HIDDEN_SIZE}_{LEARNING_RATE}_{BETA}_{ALPHA}_{EPSILON}_{seed}.pkl",
                "wb",
            ) as f:
                pickle.dump(
                    {
                        "model": final_model,
                        "loss_train_history": loss_train_history,
                        "loss_val_history": loss_val_history,
                    },
                    f,
                )

            # create plots
            plt.plot(
                np.arange(1, len(loss_train_history) + 1),
                loss_train_history,
                label="Train",
            )
            plt.plot(
                np.arange(1, len(loss_train_history) + 1),
                loss_val_history,
                label="Validation",
            )
            plt.legend()
            # set axis scale
            plt.xscale("log")
            plt.yscale("log")
            # set title and axis labels
            plt.title(
                f"ELM {HIDDEN_SIZE} {LEARNING_RATE} {BETA} {ALPHA} {EPSILON} {seed}"
            )
            plt.xlabel("Epochs")
            plt.ylabel("Loss")
            plt.savefig(
                f"{PLOT}ELM_{HIDDEN_SIZE}_{LEARNING_RATE}_{BETA}_{ALPHA}_{EPSILON}_{seed}.png"
            )
            plt.show()

    # no results obtained because there was an error
    if len(results_dict["Seed"]) == 0:
        continue

    df_results = pd.DataFrame(results_dict)
    df_results.to_csv(
        f"{RESULTS}ELM_{HIDDEN_SIZE}_{LEARNING_RATE}_{BETA}_{ALPHA}_{EPSILON}.csv",
        index=False,
    )

## Experiments for initialization comparisons

In [None]:
# dictionary for results, with keys the strings and values empty lists
results_dict = {
    "Hidden size": [],
    "Mu": [],
    "Beta": [],
    "Epsilon": [],
    "Alpha": [],
    "Seed": [],
    "Initialization type": [],
    "Train": [],
    "Validation": [],
    "Time": [],
    "Epochs": [],
}

In [None]:
# hyperparameters
HIDDEN_SIZE = 1000
LEARNING_RATE = "auto"
BETA = 0
ALPHA = 1e-8
EPSILON = 1e-10

In [None]:
# iterate over multiple seeds

for initialization in ["fan-in", "std"]:
    for seed in range(10):
        # initialize the model
        model = mu.ELM(
            input_size,
            HIDDEN_SIZE,
            output_size,
            seed=seed,
            init=initialization,
        )

        # measure the time
        start_time = time.process_time()

        # train model
        final_model, loss_train_history, loss_val_history, epochs, _ = (
            nesterov.nag(
                model,
                X_train,
                y_train,
                X_val,
                y_val,
                lr=LEARNING_RATE,
                alpha=ALPHA,
                beta=BETA,
                max_epochs=1000000,
                eps=EPSILON,
                # fast_mode=True,
                # verbose=True,
            )
        )

        end_time = time.process_time()

        # save the results

        results_dict["Hidden size"].append(HIDDEN_SIZE)
        results_dict["Mu"].append(LEARNING_RATE)
        results_dict["Beta"].append(BETA)
        results_dict["Epsilon"].append(EPSILON)
        results_dict["Alpha"].append(ALPHA)
        results_dict["Seed"].append(seed)
        results_dict["Initialization type"].append(initialization)
        results_dict["Train"].append(loss_train_history[-1])
        results_dict["Validation"].append(loss_val_history[-1])
        results_dict["Time"].append(end_time - start_time)
        results_dict["Epochs"].append(epochs)

        # save all iterations using pickle
        with open(
            f"{initialization}_{seed}.pkl",
            "wb",
        ) as f:
            pickle.dump(
                {
                    "model": final_model,
                    "loss_train_history": loss_train_history,
                    "loss_val_history": loss_val_history,
                },
                f,
            )

        # plot results
        plt.plot(
            np.arange(1, len(loss_train_history) + 1),
            loss_train_history,
            label="Train",
        )
        plt.plot(
            np.arange(1, len(loss_train_history) + 1),
            loss_val_history,
            label="Validation",
        )
        plt.legend()
        # set title and labels
        plt.title(f"Loss {initialization} {seed}")
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        # set axis scale
        plt.xscale("log")
        plt.yscale("log")
        # fix y axis to have comparable plots
        plt.ylim(0.001, 1000)

        # save the plot
        plt.savefig(PLOT + f"loss_{initialization}_{seed}.png")
        plt.show()


results = pd.DataFrame(results_dict)
results.to_csv(
    RESULTS + f"results_initialization_{HIDDEN_SIZE}.csv", index=False
)

In [None]:
# show average training, validation and time, show also the standard deviation
results.groupby("Initialization type")[["Train", "Validation", "Time"]].agg(
    ["mean", "std"]
)