In [None]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd
import plotly.express as px

if 'src' not in os.listdir('.'):
    os.chdir('../')

import src.utils.models as M
import src.utils.constants as C
import src.utils.evaluation as E
from src.utils.runner import Runner
from config.debugger_config import debugger_experiments
from config.aggregate_config import aggregate_experiments
from config.increment_config import increment_experiments
from scripts.run_experiment import run_experiment
from src.utils.dataframe import build_metadata_df, build_df

In [None]:
import optuna


def tune_hyperparameters(config: M.ExperimentConfig):
    def objective(trial):
        hyperparams = {
            'learning_rate': trial.suggest_categorical('learning_rate', ['constant', 'optimal', 'invscaling', 'adaptive']),
            'eta0': trial.suggest_float('eta0', 0.001, 0.1, log=True),
            'alpha': trial.suggest_float('alpha', 0.0001, 0.01, log=True),
            'penalty': trial.suggest_categorical('penalty', ['l2', 'l1', 'elasticnet']),
            'tol': trial.suggest_categorical('tol', [1e-3, 1e-4]),
            'max_iter': trial.suggest_int('max_iter', 1, 10000, log=True),
            'warm_start': trial.suggest_categorical('warm_start', [True, False]),
        }
        result = run_experiment(config, hyperparams)
        return result[C.RMSE]

    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=50)

    best_hyperparams = study.best_params
    best_score = study.best_value
    print(f'Best hyperparameters: {best_hyperparams} with loss {best_score}')

    return best_hyperparams, study.trials

In [None]:
runner = Runner(parallel=False)
# runner = Runner(parallel=True)

# runner.run(run_experiment, [
# *debugger_experiments,
# *aggregate_experiments,
# *increment_experiments,
# ])

config = debugger_experiments[0]
# run_experiment(config)
best_hyperparams, tuning_results = tune_hyperparameters(config)
# Optionally: Run a final experiment with the best hyperparameters
print("Running final experiment with best hyperparameters...")
final_result = run_experiment(config, best_hyperparams)
print(f"Final experiment result: {final_result}")

In [None]:
best_hyperparams

In [None]:
1e-4

In [None]:
metadata_df = build_metadata_df('results/**/config.json')
# metadata_df = metadata_df[metadata_df[C.EXPERIMENT_NAME].str.contains('Debugger|Aggregate|Increment')]
metadata_df

In [None]:
experiments_df = build_df(metadata_df, C.EXPERIMENT_LOGGER)
experiments_df.sort_values(by=C.OBSERVATION_TIME, inplace=True)
experiments_df

In [None]:
px.scatter(
    experiments_df,
    x=C.ITERATION,
    y=C.Y_PRED,
    color=C.EXPERIMENT_NAME,
    title='Predictions over time',
    hover_data=experiments_df.columns
).show()

In [None]:
px.scatter(
    experiments_df,
    x=C.ITERATION,
    y=C.MAE,
    color=C.EXPERIMENT_NAME,
    title='MAE over time',
    hover_data=experiments_df.columns
)

In [None]:
px.scatter(
    experiments_df,
    x=C.OBSERVATION_TIME,
    y=C.MAE,
    color=C.EXPERIMENT_NAME,
    title='MAE on observation time',
    hover_data=experiments_df.columns
)

In [None]:
px.box(
    experiments_df,
    y=C.MAE,
    title=C.MAE,
    labels={'y': C.MAE},
    color=C.EXPERIMENT_NAME,
    hover_data=experiments_df.columns
).update_traces(boxmean=True).show()

In [None]:
px.scatter(
    experiments_df,
    x=C.ITERATION,
    y=C.HITS,
    color=C.EXPERIMENT_NAME,
    title='Hits over time',
    hover_data=experiments_df.columns
)

In [None]:
experiments_eval_df = E.evaluate_experiment_loss_group(experiments_df)
experiments_eval_df.sort_values(by=C.MAE)

In [None]:
px.bar(
    experiments_eval_df.sort_values(by=C.MAE).melt(
        id_vars=C.EXPERIMENT_NAME,
        value_vars=[C.RMSE, C.MAE]
    ),
    x=C.EXPERIMENT_NAME,
    y='value',
    color='variable',
    barmode='group',
    labels={'value': 'Loss Metrics', 'variable': 'Metric'},
    title='Loss Metrics (RMSE and MAE)',
)

In [None]:
px.bar(
    experiments_eval_df.sort_values(by=C.MBE),
    x=C.EXPERIMENT_NAME,
    y=C.MBE,
    title='Loss Metrics (MBE)'
)

In [None]:
cachais_df = build_df(metadata_df, C.CACHAI_LOGGER)
cachais_df

In [None]:
cachais_eval_df = E.evaluate_experiment_cache_metrics_group(cachais_df)
cachais_eval_df

In [None]:
px.bar(
    cachais_eval_df.sort_values(by=C.CACHE_HIT_PRECISION).melt(
        id_vars=C.EXPERIMENT_NAME,
        value_vars=[
            C.CACHE_SERVE_RATE, C.CACHE_HIT_PRECISION, C.CACHE_HIT_ACCURACY, C.CACHE_STALE_RATE, C.CACHE_MISS_RATE
        ]
    ),
    x=C.EXPERIMENT_NAME,
    y='value',
    color='variable',
    barmode='group',
    labels={'value': 'Rate', 'variable': 'Metric'},
    title='Cache Metrics'
)

In [None]:
px.bar(
    cachais_eval_df.sort_values(by=C.CACHE_HIT_TOTAL),
    x=C.EXPERIMENT_NAME,
    y=C.CACHE_HIT_TOTAL,
    title='Cache Hit Total'
)