This notebook generates synthetic indicator data for testing and prototyping purposes.

In [None]:
import json
import os
import shutil

import holoviews as hv
import numpy as np
import pandas as pd
import rioxarray  # noqa: F401
import xarray as xr

hv.extension("bokeh")

In [None]:
SITE_ID = "degero"
EXTENT_ID = "extent-1"
PATH_OUT = os.path.join("synthetic-indicators", f"{SITE_ID}-{EXTENT_ID}")

In [None]:
os.path.abspath(PATH_OUT)

In [None]:
if os.path.exists(PATH_OUT):
    shutil.rmtree(PATH_OUT)
os.makedirs(PATH_OUT, exist_ok=True)

info.json

In [None]:
filename = os.path.join(PATH_OUT, "info.json")
with open(filename, "w") as f:
    json.dump(
        {
            "name": "synthetic indicator extent-1",
            "description": "This is a synthetic indicator for testing purposes.",
            "site_id": SITE_ID,
            "default_variable_loading_name": "loading_001",
        },
        f,
        indent=2,
    )

peat_extent.tiff

In [None]:
lon_min = 19.4342728416868
lat_min = 64.0867100842452
lon_max = 19.702569859525383
lat_max = 64.2372650939366

In [None]:
filename = os.path.join(PATH_OUT, "peat_extent.tiff")

# Grid size
n_x, n_y = 100, 100

array = np.random.choice([0, 1], size=(n_y, n_x), p=[0.8, 0.2])
x = np.linspace(lon_min, lon_max, n_x)
y = np.linspace(lat_min, lat_max, n_y)
ds = xr.DataArray(
    array,
    coords={"y": y, "x": x},
    dims=("y", "x"),
    name="peat_extent",
)
ds.name = "peat_extent"
ds.rio.write_crs("EPSG:4326", inplace=True)

ds.rio.to_raster(filename, dtype="uint8")

time_series.h5

In [None]:
date_index = pd.date_range(start="2015-01-01", end="2019-12-31", freq="D")
n_vars = 5
columns = [f"variable_{i + 1}" for i in range(n_vars)]

In [None]:
time = np.arange(len(date_index))
data_df = pd.DataFrame(index=date_index)

for i, col in enumerate(columns):
    """
    Generate synthetic data for each variable with:

    - Random mean and standard deviation for noise
    - Linear trend with random slope
    - Seasonal pattern with random amplitude and phase
    """
    mean = np.random.uniform(-2, 2)
    std = np.random.uniform(0.1, 0.4)
    slope = np.random.uniform(-0.01, 0.01)
    phase = np.random.uniform(0, 2 * np.pi)
    amplitude = np.random.uniform(0.5, 2)

    noise = np.random.normal(loc=mean, scale=std, size=len(date_index))
    trend = slope * time
    season = amplitude * np.sin(2 * np.pi * time / 365 + phase)

    data_df[col] = noise + trend + season

In [None]:
# Plot all variables as overlayed curves
curves = [hv.Curve((data_df.index, data_df[col]), label=col) for col in data_df.columns]
df_hv = hv.Overlay(curves)
df_hv.opts(
    width=800,
    height=400,
    legend_position="right",
    xlabel="Date",
    ylabel="Value",
    title="Synthetic Indicators",
    show_grid=True,
)

In [None]:
variance_df = pd.DataFrame(index=date_index)

for col in columns:
    """
    Generate synthetic variance data for each variable with:

    - Base variance with random value
    - noise
    - Seasonal variance with a sinusoidal pattern
    """

    base_variance = np.random.uniform(0.5, 3.0)
    noise = np.random.normal(0, 0.1, len(date_index))
    seasonal_variance = 0.2 * np.sin(2 * np.pi * time / 365 + np.random.uniform(0, 2 * np.pi))

    variance_df[col] = base_variance + seasonal_variance + noise

In [None]:
# Plot all variables as overlayed curves
curves = [hv.Curve((variance_df.index, variance_df[col]), label=col) for col in variance_df.columns]
df_hv = hv.Overlay(curves)
df_hv.opts(
    width=800,
    height=400,
    legend_position="right",
    xlabel="Date",
    ylabel="Value",
    title="Synthetic Indicators - variance",
    show_grid=True,
)

In [None]:
filename = os.path.join(PATH_OUT, "time_series.h5")
data_df.to_hdf(filename, key="data")
variance_df.to_hdf(filename, key="variance")

variable_loading

In [None]:
variable_loading = os.path.join(PATH_OUT, "variable_loading")
os.makedirs(variable_loading)

In [None]:
filename = os.path.join(variable_loading, "loading_001.json")
with open(filename, "w") as f:
    json.dump(
        {
            "name": "loading_001",
            "description": "This is a synthetic variable loading for testing purposes.",
            "optimal_values": {
                "variable_1": 0.5,
            },
            "variable_loadings": {
                "variable_1": 0.1,
                "variable_2": 0.2,
                "variable_3": 0.3,
                "variable_4": 0.4,
                "variable_5": 0.5,
            },
        },
        f,
        indent=2,
    )

In [None]:
filename = os.path.join(variable_loading, "loading_002.json")
with open(filename, "w") as f:
    json.dump(
        {
            "name": "loading_002",
            "description": "This is a synthetic variable loading for testing purposes.",
            "optimal_values": {
                "variable_2": 0.1,
            },
            "variable_loadings": {
                "variable_1": 0.1,
                "variable_2": -0.2,
                "variable_3": 0.3,
                "variable_4": -0.4,
                "variable_5": 0.5,
            },
        },
        f,
        indent=2,
    )

In [None]:
! tree {PATH_OUT}