In [None]:
import json
from pathlib import Path
from operator import itemgetter
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import sys
import os

import plotly.graph_objs as go
import plotly.subplots as sp
from plotly.offline import init_notebook_mode, plot, iplot
import plotly.express as px
from plotly.subplots import make_subplots

module_path = os.path.abspath(os.path.join("../"))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.data import *
from src.train import create_model
from src.evaluate import *

In [None]:
experiment = "test"
best = 100

In [None]:
# List all model directories
ray_results = Path("../ray_results/")
model_dirs = [d for d in ray_results.iterdir() if d.is_dir()]

In [None]:
count_models(model_dirs, experiment)

## Get data points and time series region

In [None]:
model_dfs = {}
parameters = []
for model_dir in model_dirs:
    if experiment not in str(model_dir):
        continue
    rows = []
    best_checkpoints = find_best_checkpoints(model_dir, num_best=best)
    for i, (checkpoint, val_loss, params) in enumerate(best_checkpoints):
        # Load model and weights
        model = create_model(params)
        model = load_model_from_checkpoint(model, checkpoint)

        data_loader, scalers = get_dataloader(params)
        test_loader = data_loader["test"]
        test_loader_length = len(test_loader.dataset)
        print("Number of values in test_loader:", test_loader_length)
        datetime_test = test_loader.datetime_index
        time_series_region = (datetime_test[0], datetime_test[-1])
        print("Time series region:", time_series_region)
        break
    break

# Plotting the loss

In [None]:
for model_dir in model_dirs:
    if experiment not in str(model_dir):
        continue
    best_checkpoints = find_best_checkpoints(model_dir, num_best=1)
    for i, (checkpoint, val_loss, params) in enumerate(best_checkpoints):
        print(f"Best model from {model_dir}")
        run_dir = checkpoint.parents[1]
        losses = get_losses(run_dir)
        plot_losses(losses)

# Hour ahead forecast

In [None]:
plot_pred_actual(model_dirs, experiment)

In [None]:
model_dfs, parameters = calculate_model_metrics(model_dirs, experiment, best)

In [None]:
# Create the data frame
df = pd.DataFrame(parameters)
df.sort_values("val_mea").head(20)

In [None]:
torch.cuda.empty_cache()

In [None]:
# concatenate the dataframes
df_concat_avg = pd.concat([model_dfs[k] for k in model_dfs.keys() if experiment in k])

df_concat_avg = df_concat_avg.drop(columns=["variables"])

# calculate the mean of each evaluation metric
df_avg = df_concat_avg.groupby(["model"]).mean()
df_avg.sort_values("test_mae")

In [None]:
df_concat_avg_w_var = average_with_var(model_dfs, experiment)

In [None]:
descriptive_statistics(df_concat_avg_w_var)

In [None]:
box_plot(df_concat_avg_w_var)

In [None]:
median_iqr(df_concat_avg_w_var)

# Time-space consumption

In [None]:
import pandas as pd
import json
from operator import itemgetter


def find_best_checkpoints_with_time(model_dir, num_best=5):
    checkpoints = []

    # Iterate over all training runs in the model directory
    for run_dir in model_dir.iterdir():
        if run_dir.is_dir():
            # Read the progress.csv file to get the validation losses and training time
            progress_file = run_dir / "progress.csv"
            if progress_file.exists():
                with open(run_dir / "params.json", "r") as f:
                    params = json.load(f)
                progress_data = pd.read_csv(progress_file)

                best_val_idx = progress_data["val_loss"].idxmin()
                best_val_loss = progress_data.loc[best_val_idx, "val_loss"]
                training_time = progress_data.loc[best_val_idx, "time_total_s"]

                # Save the checkpoint path, validation loss, and training time
                checkpoint_path = run_dir / "my_model" / "checkpoint.pt"
                checkpoints.append(
                    (checkpoint_path, best_val_loss, training_time, params)
                )

    # Sort the checkpoints based on validation loss
    checkpoints.sort(key=itemgetter(1))

    return checkpoints[:num_best]

In [None]:
import torch
import numpy as np
import pandas as pd

results = {}

for model_dir in model_dirs:
    if experiment not in str(model_dir):
        continue

    best_checkpoints = find_best_checkpoints_with_time(model_dir, num_best=best)

    for i, (checkpoint, val_loss, training_time, params) in enumerate(best_checkpoints):
        # Load model and weights
        model = create_model(params)

        # Calculate space consumption
        num_parameters = sum(p.numel() for p in model.parameters())
        space_consumption = num_parameters * 4  # 4 bytes per parameter (float32)

        model_name = params["model"]
        if model_name not in results:
            results[model_name] = {
                "val_loss": [],
                "training_time": [],
                "testing_time": [],
                "space_consumption": [],
            }

        results[model_name]["val_loss"].append(val_loss)
        results[model_name]["training_time"].append(training_time)
        results[model_name]["space_consumption"].append(space_consumption)

        # Add testing time to results
        testing_time = np.mean(model_dfs[model_dir.name]["testing (s)"])
        results[model_name]["testing_time"].append(testing_time)

df = pd.DataFrame(
    columns=[
        "Model",
        "Avg val_loss",
        "Avg training_time",
        "Avg testing_time",
        "Avg space_consumption",
    ]
)

for model_name, data in results.items():
    avg_val_loss = np.mean(data["val_loss"])
    avg_training_time = np.mean(data["training_time"])
    avg_testing_time = np.mean(data["testing_time"])
    avg_space_consumption = (
        np.mean(data["space_consumption"]) / 1024
    )  # Space consumption in KB

    df = pd.concat(
        [
            df,
            pd.DataFrame(
                {
                    "Model": [model_name],
                    "Avg val_loss": [avg_val_loss],
                    "Avg training_time": [avg_training_time],
                    "Avg testing_time": [avg_testing_time],
                    "Avg space_consumption": [
                        avg_space_consumption
                    ],  # Space consumption in KB
                }
            ),
        ]
    )

df.sort_values("Avg training_time")

# Attention weights understanding 

In [None]:
from plotly.subplots import make_subplots


def visualize_attention(
    attention_weights_spatial, attention_weights_temporal, batch_idx, features
):
    # Extract attention weights for a specific batch element
    attention_matrix_spatial = (
        attention_weights_spatial[batch_idx].detach().cpu().numpy()
    )
    attention_matrix_temporal = (
        attention_weights_temporal[batch_idx].detach().cpu().numpy()
    )
    zmax_spatial1 = np.percentile(attention_matrix_spatial, 95)
    zmax_temporal = np.percentile(attention_matrix_temporal, 95)

    # Create a subplot with 1 row and 2 columns
    fig = make_subplots(
        rows=1,
        cols=2,
        subplot_titles=("Spatial Attention Weights", "Temporal Attention Weights"),
        horizontal_spacing=0.15
    )

    # Add spatial attention heatmap to subplot
    fig.add_trace(
        go.Heatmap(
            z=attention_matrix_spatial,
            x=[f"f{i}" for i in range(1, attention_matrix_spatial.shape[1] + 1)],
            y=[f"t-{i}" for i in range(1, attention_matrix_spatial.shape[0] + 1)],
            colorscale="Viridis",
            zmin=0,
            zmax=zmax_spatial1,
            name="Spatial Weights",
            colorbar_x=0.45
        ),
        row=1,
        col=1,
    )

    # Add temporal attention heatmap to subplot
    fig.add_trace(
        go.Heatmap(
            z=attention_matrix_temporal,
            y=[f"t-{i}" for i in range(1, attention_matrix_temporal.shape[0] + 1)],
            x=[f"f{i}" for i in range(1, attention_matrix_temporal.shape[1] + 1)],
            colorscale="Viridis",
            zmin=0,
            zmax=zmax_temporal,
            name="Temporal Weights",
        ),
        row=1,
        col=2,
    )

    fig.update_layout(
        width=1800,
        height=800,
        xaxis_title="Features",
        yaxis_title="Input Time Step",
    )
    fig.show()



def plot_attention(params, spatial_weights=None, temporal_weights=None):
    features = [params["data"]["target_variable"]] + params["data"]["variables"]
    b = 20
    # If the attention weights are torch tensors, convert them to numpy arrays first
    if isinstance(spatial_weights, torch.Tensor) and isinstance(
        temporal_weights, torch.Tensor
    ):
        visualize_attention(
            spatial_weights, temporal_weights, batch_idx=b, features=features
        )


In [None]:
for model_dir in model_dirs:
    #if experiment not in str(model_dir):
    #    continue
    rows = []
    best_checkpoints = find_best_checkpoints(model_dir, num_best=1)
    for i, (checkpoint, val_loss, params) in enumerate(best_checkpoints):
        if params["model"] == "LSTM" or params["model"] == "FCN":
            continue
        elif params["model"] == "LSTMTemporalAttention":
            continue

        # Load model and weights
        model = create_model(params)
        model = load_model_from_checkpoint(model, checkpoint)
        data_loader, _ = get_dataloader(params)
        test_dataloader = data_loader["test"]

        # Get a batch of input sequences and their corresponding targets
        inputs, targets = next(iter(test_dataloader))

        output, spatial_attention_weights, temporal_attention_weights = model(
            inputs, True
        )

        plot_attention(params, 
                       spatial_attention_weights, 
                       temporal_attention_weights)

# Multi-time step ahead forecasting

In [None]:
steps_ahead = 12

In [None]:
torch.cuda.empty_cache()

In [None]:
model_dfs = evaluate_multi_step_models(model_dirs, experiment, steps_ahead, best)

In [None]:
model_dfs.get(f"data_4-{experiment}-lstm").sort_values("test_mae")

In [None]:
model_dfs.get(f"data_4-{experiment}-temp").sort_values("test_mae")

In [None]:
model_dfs.get(f"data_4-{experiment}-spa_temp").sort_values("test_mae")

In [None]:
# concatenate the dataframes
df_concat_avg = pd.concat([model_dfs[k] for k in model_dfs.keys() if experiment in k])

df_concat_avg = df_concat_avg.drop(columns=["variables"])

# calculate the mean of each evaluation metric
df_avg = df_concat_avg.groupby(["model"]).mean()
df_avg.sort_values("test_mae")

In [None]:
df_concat_avg_w_var = average_with_var(model_dfs, experiment)

In [None]:
# concatenate the dataframes
descriptive_statistics(df_concat_avg_w_var)

In [None]:
box_plot(df_concat_avg_w_var)

In [None]:
median_iqr(df_concat_avg_w_var)

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

def visualize_attention_dual(
    attention_weights_spatial1, attention_weights_temporal1,
    attention_weights_spatial2, attention_weights_temporal2, 
    batch_idx, features
):
    # Extract attention weights for a specific batch element
    attention_matrix_spatial1 = attention_weights_spatial1[batch_idx].detach().cpu().numpy()
    attention_matrix_temporal1 = attention_weights_temporal1[batch_idx].detach().cpu().numpy()

    attention_matrix_spatial2 = attention_weights_spatial2[batch_idx].detach().cpu().numpy()
    attention_matrix_temporal2 = attention_weights_temporal2[batch_idx].detach().cpu().numpy()

    # Calculate 95th percentile of the attention weights
    zmax_spatial1 = np.percentile(attention_matrix_spatial1, 95)
    zmax_temporal1 = np.percentile(attention_matrix_temporal1, 95)
    zmax_spatial2 = np.percentile(attention_matrix_spatial2, 95)
    zmax_temporal2 = np.percentile(attention_matrix_temporal2, 95)

    # Create a subplot with 1 row and 4 columns
    fig = make_subplots(
        rows=1, cols=4,
        subplot_titles=("Spatial Attention Weights - 1", "Temporal Attention Weights - 1",
                        "Spatial Attention Weights - 12", "Temporal Attention Weights - 12"),
        horizontal_spacing=0.10
    )

    # Add spatial attention heatmap for step 1
    fig.add_trace(
        go.Heatmap(
            z=attention_matrix_spatial1,
            
            x=features,
            y=[f"t-{i}" for i in range(1, attention_matrix_spatial1.shape[0] + 1)],
            colorscale="Viridis",
            zmin=0,
            zmax=zmax_spatial1,
            name="Spatial Weights - 1", colorbar_x=0.20
        ),
        row=1, col=1,
    )

    # Add temporal attention heatmap for step 1
    fig.add_trace(
        go.Heatmap(
            z=attention_matrix_temporal1,
            y=[f"t-{i}" for i in range(1, attention_matrix_temporal1.shape[0] + 1)],
            x=features,
            colorscale="Viridis",
            zmin=0,
            zmax=zmax_temporal1,
            name="Temporal Weights - 1", colorbar_x=0.48
        ),
        row=1, col=2
    )

    # Add spatial attention heatmap for step 12 
    fig.add_trace(
        go.Heatmap(
            z=attention_matrix_spatial2,
            x=features,
            y=[f"t-{i}" for i in range(1, attention_matrix_spatial2.shape[0] + 1)],
            colorscale="Viridis",
            zmin=0,
            zmax=zmax_spatial2,
            name="Spatial Weights - 12", colorbar_x=0.75
        ),
        row=1, col=3,
    )

    # Add temporal attention heatmap for step 12
    fig.add_trace(
        go.Heatmap(
            z=attention_matrix_temporal2,
            y=[f"t-{i}" for i in range(1, attention_matrix_temporal2.shape[0] + 1)],
            x=features,
            colorscale="Viridis",
            zmin=0,
            zmax=zmax_temporal2,
            name="Temporal Weights - 12", colorbar_x=1.02
        ),
        row=1, col=4
    )

    fig.update_layout(
        width=1800,
        height=800,
        xaxis_title="Features",
        yaxis_title="Input Time Step"
    )
    fig.show()

def plot_attention_dual(params, spatial_weights1=None, temporal_weights1=None, spatial_weights2=None, temporal_weights2=None):
    features = [params["data"]["target_variable"]] + params["data"]["variables"]
    b = 7
    visualize_attention_dual(
        spatial_weights1, temporal_weights1, spatial_weights2, temporal_weights2, batch_idx=b, features=features
    )


In [None]:
def plot_attention_multi_step_models(model_dirs, experiment, steps_ahead, best):
    model_dfs = {}
    for model_dir in model_dirs:
        if experiment not in str(model_dir):
            continue

        rows = []
        best_checkpoints = find_best_checkpoints(model_dir, num_best=best)
        for i, (checkpoint, val_loss, params) in enumerate(best_checkpoints):
            if params["model"] == "LSTM" or params["model"] == "FCN":
                continue
            elif params["model"] == "LSTMTemporalAttention":
                continue

            # Load model and weights
            model = create_model(params)
            model = load_model_from_checkpoint(model, checkpoint)

            data_loader, scalers = get_dataloader(params)
            test_loader = data_loader["test"]

            with torch.no_grad():
                y_preds, y_test, attention_weights_all = get_multi_step_preds_actuals(
                    model, test_loader, forecast_steps=steps_ahead, return_weights=True
                )
            attention_weights_1, attention_weights_12 = attention_weights_all[7]
            alpha_list_1, beta_t_1 = attention_weights_1
            alpha_list_12, beta_t_12 = attention_weights_12

            plot_attention_dual(params, spatial_weights1=alpha_list_1, temporal_weights1=beta_t_1, spatial_weights2=alpha_list_12, temporal_weights2=beta_t_12)

plot_attention_multi_step_models(model_dirs, experiment, steps_ahead, 1)