In [None]:
%load_ext autoreload
%autoreload 2

import torch
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from mpl_toolkits.axes_grid1 import make_axes_locatable

from src.utils.visualization import animate_predictions, animate_results, plot_noise_effects, split_and_save_chunks, plot_real_sensor_partitioning
from src.datasets.utils import get_noise, read_real_observation_files

In [None]:
from src.utils.evaluation import *

from src.datasets.real_obs_dataset import load_data as load_real_data
from src.datasets.vitae_dataset import unscale, scale

In [None]:
from datetime import datetime, timedelta

def add_hours(date_str: str, hours: int, date_format: str = "%Y-%m-%d %H:%M:%S") -> str:
    """
    Add a number of hours to a given date string.

    Args:
        date_str (str): Input date string (e.g., "2025-09-05 13:30:00").
        hours (int): Number of hours to add (can be negative).
        date_format (str): Format of the input/output date string.

    Returns:
        str: New date string after adding the hours.
    """
    date_obj = datetime.strptime(date_str, date_format)
    new_date = date_obj + timedelta(hours=hours)
    return new_date.strftime(date_format)

# Example usage
print(add_hours("2014-01-01 00:0:00", 1))
print(add_hours("2014-01-01 00:0:00", 100))
print(add_hours("2014-01-01 00:0:00", 5000))

In [None]:
res = np.load('results/predictions/vunet/sparse_real_random_time_gaussian_full_0_predictions.npz')

print(list(res.keys()))

In [None]:
def add_extra_metrics(files: list[str], model_type: str) -> None:
    for file in files:
        # Load existing arrays into a dictionary
        with np.load(file) as res:
            data = {k: res[k] for k in res.keys()}

        preds = torch.from_numpy(data['predictions'])
        targets = torch.from_numpy(data['ground_truth'])
        errors = data['errors']

        dataset, _ = load_real_data(model_type=model_type, sensor_type="real-random", timesteps=8, val_set=False)
        mask = torch.stack([target_mask for _, _, target_mask in dataset], dim=0)

        unscaled_l2_relative_errors = compute_relative_error(targets * mask, preds * mask)
        rmse, mfb, mfe = compute_extra_metrics(preds * mask, targets * mask, mask)

        unscaled_error = np.mean(unscaled_l2_relative_errors)

        print("RMSE:", round(rmse, 3))
        print("Mean Fractional Error:", round(mfe, 3))
        print("Mean Fractional Bias:", round(mfb, 3))
        print("Unscaled L2 Relative Error:", round(unscaled_error, 3))
        print("Scaled Error:", round(np.mean(errors), 3))
        print("-" * 30)

        # Add new keys
        data.update({
            "rmse": float(rmse),
            "mfe": float(mfe),
            "mfb": float(mfb),
            "unscaled_errors": unscaled_l2_relative_errors
        })

        # Save everything back
        np.savez_compressed(file, **data)

In [None]:
add_extra_metrics([
    'results/predictions/vunet/random_random_5_predictions.npz',
    'results/predictions/vunet/random_random_10_predictions.npz',
    'results/predictions/vunet/random_random_15_predictions.npz',
    'results/predictions/vunet/random_random_20_predictions.npz',
    'results/predictions/vunet/random_random_25_predictions.npz',
    'results/predictions/vunet/random_random_30_predictions.npz',
], model_type='vunet')

print("-" * 50)

add_extra_metrics([
    'results/predictions/vitae/random_random_5_predictions.npz',
    'results/predictions/vitae/random_random_10_predictions.npz',
    'results/predictions/vitae/random_random_15_predictions.npz',
    'results/predictions/vitae/random_random_20_predictions.npz',
    'results/predictions/vitae/random_random_25_predictions.npz',
    'results/predictions/vitae/random_random_30_predictions.npz',
], model_type='vitae')

print("-" * 50)

# add_extra_metrics([
#     'results/predictions/clstm/sparse_real_random_predictions.npz',
#     'results/predictions/clstm/sparse_real_random_0_predictions.npz',
#     'results/predictions/clstm/sparse_real_random_100_predictions.npz',

#     'results/predictions/clstm/sparse_real_random_gaussian_full_predictions.npz',
#     'results/predictions/clstm/sparse_real_random_gaussian_full_0_predictions.npz',
#     'results/predictions/clstm/sparse_real_random_gaussian_full_100_predictions.npz',

#     'results/predictions/clstm/sparse_real_random_time_gaussian_full_predictions.npz',
#     'results/predictions/clstm/sparse_real_random_time_gaussian_full_0_predictions.npz',
#     'results/predictions/clstm/sparse_real_random_time_gaussian_full_100_predictions.npz',

#     'results/predictions/clstm/sparse_real_random_perlin_full_predictions.npz',
#     'results/predictions/clstm/sparse_real_random_perlin_full_0_predictions.npz',
#     'results/predictions/clstm/sparse_real_random_perlin_full_100_predictions.npz',

#     'results/predictions/clstm/sparse_real_random_correlated_full_predictions.npz',
#     'results/predictions/clstm/sparse_real_random_correlated_full_0_predictions.npz',
#     'results/predictions/clstm/sparse_real_random_correlated_full_100_predictions.npz',
# ], model_type='clstm')

add_extra_metrics([
    'results/predictions/vcnn/random_random_5_predictions.npz',
    'results/predictions/vcnn/random_random_10_predictions.npz',
    'results/predictions/vcnn/random_random_15_predictions.npz',
    'results/predictions/vcnn/random_random_20_predictions.npz',
    'results/predictions/vcnn/random_random_25_predictions.npz',
    'results/predictions/vcnn/random_random_30_predictions.npz',
], model_type="vcnn")

print("-" * 50)

add_extra_metrics([
    'results/predictions/kriging/random_random_5_predictions.npz',
    'results/predictions/kriging/random_random_10_predictions.npz',
    'results/predictions/kriging/random_random_15_predictions.npz',
    'results/predictions/kriging/random_random_20_predictions.npz',
    'results/predictions/kriging/random_random_25_predictions.npz',
    'results/predictions/kriging/random_random_30_predictions.npz',
], model_type='vitae')

In [None]:
def to_table_string(files: list[list[str]]) -> None:
    line_mre  = "& L2 MRE "
    line_ssim = "& SSIM "
    line_rmse = "& RMSE "
    line_mfe  = "& MFE "
    line_mfb  = "& MFB "

    def to_scalar(x):
        arr = np.asarray(x)
        return float(arr) if arr.shape == () else float(np.mean(arr))

    for noise_batch_files in files:
        mre_vals, ssim_vals, rmse_vals, mfe_vals, mfb_vals = [], [], [], [], []

        for seed_file in noise_batch_files:
            res = np.load(seed_file)

            mre_val  = round(to_scalar(res["errors"]), 3)
            ssim_val = round(to_scalar(res["ssim"]),    3)
            rmse_val = round(to_scalar(res["rmse"]),   3)
            mfe_val  = round(to_scalar(res["mfe"]),    3)
            mfb_val  = round(to_scalar(res["mfb"]),    3)

            mre_vals.append(mre_val)
            ssim_vals.append(ssim_val)
            rmse_vals.append(rmse_val)
            mfe_vals.append(mfe_val)
            mfb_vals.append(mfb_val)
        
        line_mre  += f"& {np.mean(mre_vals):.3f} "
        line_ssim += f"& {np.mean(ssim_vals):.3f} "
        line_rmse += f"& {np.mean(rmse_vals):.3f} "
        line_mfe  += f"& {np.mean(mfe_vals):.3f} "
        line_mfb  += f"& {np.mean(mfb_vals):.3f} "

    # end each row
    line_mre  += "\\\\"
    line_ssim += "\\\\"
    line_rmse += "\\\\"
    line_mfe  += "\\\\"
    line_mfb  += "\\\\"

    result = "\n".join([line_mre, line_ssim, line_rmse, line_mfe, line_mfb])
    print(result)

In [None]:
to_table_string([
    ['results/predictions/vcnn/random_random_5_predictions.npz'],
    ['results/predictions/vcnn/random_random_10_predictions.npz'],
    ['results/predictions/vcnn/random_random_15_predictions.npz'],
    ['results/predictions/vcnn/random_random_20_predictions.npz'],
    ['results/predictions/vcnn/random_random_25_predictions.npz'],
    ['results/predictions/vcnn/random_random_30_predictions.npz'],
])

print("-" * 50)

to_table_string([
    ['results/predictions/vunet/random_random_5_predictions.npz'],
    ['results/predictions/vunet/random_random_10_predictions.npz'],
    ['results/predictions/vunet/random_random_15_predictions.npz'],
    ['results/predictions/vunet/random_random_20_predictions.npz'],
    ['results/predictions/vunet/random_random_25_predictions.npz'],
    ['results/predictions/vunet/random_random_30_predictions.npz'],
])

print("-" * 50)

to_table_string([
    ['results/predictions/vitae/random_random_5_predictions.npz'],
    ['results/predictions/vitae/random_random_10_predictions.npz'],
    ['results/predictions/vitae/random_random_15_predictions.npz'],
    ['results/predictions/vitae/random_random_20_predictions.npz'],
    ['results/predictions/vitae/random_random_25_predictions.npz'],
    ['results/predictions/vitae/random_random_30_predictions.npz'],
])

print("-" * 50)

to_table_string([
    ['results/predictions/kriging/random_random_5_predictions.npz'],
    ['results/predictions/kriging/random_random_10_predictions.npz'],
    ['results/predictions/kriging/random_random_15_predictions.npz'],
    ['results/predictions/kriging/random_random_20_predictions.npz'],
    ['results/predictions/kriging/random_random_25_predictions.npz'],
    ['results/predictions/kriging/random_random_30_predictions.npz'],
])

In [None]:
d_polair_o3 = np.load('data/d_polair_O3.npy')
d_polair_pm10 = np.load('data/d_polair_PM10.npy')
d_polair_pm25 = np.load('data/d_polair_PM25.npy')
d_polair_no2 = np.load('data/d_polair_NO2.npy')

all_pollutants = np.concatenate([
    d_polair_o3,
    d_polair_pm10,
    d_polair_pm25,
    d_polair_no2
], axis=1)

noise_types = ['gaussian', 'perlin', 'time_gaussian', 'correlated']

for noise_type in noise_types:
    noise = get_noise(
            target_shape=(1000, 4, 75, 110),
            noise_type=noise_type,
            device='cpu'
        ).cpu()
    
    # Standardize the generated noise
    noise_mean = noise.mean(dim=(0, 2, 3), keepdim=True)
    noise_std = noise.std(dim=(0, 2, 3), keepdim=True)
    noise = (noise - noise_mean) / noise_std
    noise = noise * 5  # Scale to have std of 5
    noise = noise.numpy()

    v_max_clean = all_pollutants[0].max()

    for p_idx, pollutant in enumerate(['O3', 'PM10', 'PM2.5', 'NO2']):
        os.makedirs(f'report_images/methodology/noise/{noise_type}', exist_ok=True)

        fig, ax = plt.subplots(figsize=(4, 5))

        im = ax.imshow(all_pollutants[0][p_idx], vmin=0, vmax=v_max_clean, cmap='viridis')

        ax.axis('off')

        # Create a colorbar the same height as the image, aligned to the right
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="5%", pad=0.05)
        cbar = fig.colorbar(im, cax=cax)
        cbar.ax.tick_params(labelsize=12)

        plt.tight_layout()
        plt.savefig(f'report_images/methodology/noise/{noise_type}/clean_{pollutant}.png', dpi=300, bbox_inches='tight')
        plt.close()

        fig, ax = plt.subplots(figsize=(4, 5))

        im = ax.imshow(noise[0][p_idx], vmin=-10, vmax=10, cmap='viridis')

        ax.axis('off')

        # Create a colorbar the same height as the image, aligned to the right
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="5%", pad=0.05)
        cbar = fig.colorbar(im, cax=cax)
        cbar.ax.tick_params(labelsize=12)

        plt.tight_layout()
        plt.savefig(f'report_images/methodology/noise/{noise_type}/noise_{pollutant}.png', dpi=300, bbox_inches='tight')
        plt.close()

        fig, ax = plt.subplots(figsize=(4, 5))

        im = ax.imshow(all_pollutants[0][p_idx] + noise[0][p_idx], vmin=0, vmax=v_max_clean + 5, cmap='viridis')

        ax.axis('off')

        # Create a colorbar the same height as the image, aligned to the right
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="5%", pad=0.05)
        cbar = fig.colorbar(im, cax=cax)
        cbar.ax.tick_params(labelsize=12)

        plt.tight_layout()
        plt.savefig(f'report_images/methodology/noise/{noise_type}/noised_{pollutant}.png', dpi=300, bbox_inches='tight')
        plt.close()