In [None]:
import os
import sys

import plotly.graph_objects as go

from ray import tune
from ray.tune.schedulers import ASHAScheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau

# TODO: Why is this needed? os.environ['PYTHONPATH'] = module_path
module_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
if module_path not in sys.path:
    os.environ["PYTHONPATH"] = module_path
    sys.path.append(module_path)

from src.train import train_model

In [None]:
target_variable = "Flow_Kalltveit"
file_name = "cleaned_data_1.csv"
data_dir = "../best_model"
datetime_variable = "Datetime"
models = [
    "LSTM",
    "LSTMTemporalAttention",
    "LSTMSpatialTemporalAttention",
]  # Can be: "FCN", "FCNTemporalAttention", "LSTMTemporalAttention", "LSTM", "LSTMSpatialAttention", "LSTMSpatialTemporalAttention"
variables = [
    [],
    [
        "Wind_Speed_Nilsebu",
        "Air_Temperature_Nilsebu",
        "Wind_Direction_Nilsebu",
        "Relative_Humidity_Nilsebu",
        "Air_Temperature_Fister",
        "Precipitation_Fister",
        "Flow_Lyngsvatn_Overflow",
        "Flow_Tapping",
        "Water_Level_Kalltveit",
        "Water_Temperature_Kalltveit_Kum",
        "Precipitation_Nilsebu",
        "Flow_HBV",
        "Precipitation_HBV",
        "Temperature_HBV",
        "Flow_Without_Tapping_Kalltveit",
        "Flow_Lyngsaana",
        "Water_Temperature_Lyngsaana",
    ],
]

In [None]:
def main(
    exp_name, n_samples, max_num_epochs, min_num_epochs, local_dir="../ray_results/"
):
    config = {
        "data_file": file_name,
        "datetime": datetime_variable,
        "data": {
            "target_variable": target_variable,
            "sequence_length": tune.choice([25]),
            "batch_size": tune.choice([128, 256]),
            "variables": tune.grid_search(variables),
            "split_size": {"train_size": 0.7, "val_size": 0.2, "test_size": 0.1},
        },
        "model": tune.grid_search(models),
        "model_arch": {
            "input_size": tune.sample_from(
                lambda spec: len(spec.config.data["variables"]) + 1
            ),
            "hidden_size": tune.choice([32, 64]),
            "num_layers": tune.choice([1, 2, 3]),
            "output_size": 1,
        },
        "training": {
            "learning_rate": tune.loguniform(1e-5, 1e-1),
            "weight_decay": tune.loguniform(1e-5, 1e-1),
        },
        "num_epochs": max_num_epochs #tune.randint(100, 500),
    }

    reporter = tune.JupyterNotebookReporter(
        metric_columns=["train_loss", "val_loss", "test_loss", "training_iteration"]
    )

    scheduler_asha = ASHAScheduler(
        max_t=max_num_epochs, grace_period=min_num_epochs, reduction_factor=2
    )

    stop = {
        "training_iteration": max_num_epochs,
    }

    if not os.path.exists(local_dir):
        os.makedirs(local_dir)

    results = tune.run(
        train_model,
        resources_per_trial={"cpu": 12, "gpu": 1},
        config=config,
        num_samples=n_samples,
        #scheduler=scheduler_asha,
        progress_reporter=reporter,
        name=exp_name,

        local_dir=local_dir,
        metric="val_loss",
        mode="min",
        stop=stop,
        #search_alg=...
        keep_checkpoints_num=1, 
        checkpoint_score_attr="val_loss"
    )

    return results

In [None]:
def print_best_model(all_trials):
    # Get all trials

    # Sort trials by 'val_loss' in ascending order
    sorted_trials = sorted(
        all_trials, key=lambda trial: trial.metric_analysis["val_loss"]["min"]
    )
    # Initialize a dictionary to store the best trial for each model type
    best_trials_by_model = {}

    # Find the best trials for each model type
    for trial in sorted_trials:
        model_type = trial.config["model"]
        if model_type not in best_trials_by_model:
            best_trials_by_model[model_type] = trial

    # Print the best performance for each model type and their validation losses
    print("Best models and their validation losses:")
    for model_type, trial in best_trials_by_model.items():
        val_loss = trial.metric_analysis["val_loss"]["min"]
        variables = trial.config["data"]["variables"]
        print(
            f"{model_type}: Trial {trial.trial_id} - val_loss: {val_loss:.4f} - variables: {variables}"
        )

In [None]:
def plot_best_mode(df):
    best_model = df
    # Create a line chart
    fig = go.Figure()

    # Add training loss trace
    fig.add_trace(
        go.Scatter(
            x=best_model.index,
            y=best_model["train_loss"],
            mode="lines",
            name="Training Loss",
        )
    )

    # Add validation loss trace
    fig.add_trace(
        go.Scatter(
            x=best_model.index,
            y=best_model["val_loss"],
            mode="lines",
            name="Validation Loss",
        )
    )

    # Customize the layout
    fig.update_layout(
        title="Training and Validation Loss",
        xaxis_title="Iteration",
        yaxis_title="Loss",
        legend_title="Loss Types",
    )
    print("Plot of the best model:")
    # Show the plot
    fig.show()

In [None]:
exp_name = "inflow_forecasting"

analysis = main(exp_name, n_samples=2, max_num_epochs=200, min_num_epochs=100)
# Models trained and last reported loss
analysis.dataframe()[
    [
        "train_loss",
        "val_loss",
        "train_loss",
        "config/model",
        "time_total_s",
        "config/data/variables",
    ]
]

In [None]:
print_best_model(analysis.trials)
plot_best_mode(analysis.best_dataframe)