In [None]:
import sys
from pathlib import Path

script_dir = Path().resolve()
root_dir = (script_dir.parent)
sys.path.append(str(root_dir))

from endata.data_generator import DataGenerator

## Creating a Data Generator and loading a pre-trained model ##

The first step towards creating synthetic time series data is to load a model. In this instance, we will load a pre-trained ACGAN trained on the PecanStreet dataset. In order to do this, you will first need to initialize a data generator.

In [None]:
generator = DataGenerator(model_name="acgan")

The code snippet above initializes an untrained ACGAN. In order to load an appropriate pre-trained model checkpoint, you will need to explicitly load the model checkpoint provided:

In [None]:
generator.load_model(dataset_name="pecanstreet")

## Generating synthetic data ##

All pre-trained models in EnData are conditional generative models. This means that the user has to specify the conditions for which the generator generates data. To view the required conditioning variables, each TimeSeriesDataset instance (which includes the dataset classes supported by EnData) has a function that outputs the available conditioning variables and the corresponding integer codes:

In [None]:
conditioning_var_codes = generator.get_conditioning_var_codes()

We will now specify the desired conditions in a Python dictionary, and pass those to the data generator, along with the desired number of samples. The samples arg refers to the number of synthetic timeseries with the datasets specified sequence length that the model will generate. It does not refer to the number of measurement values.

In [None]:
cond_vars = {
    "weekday": 1, # Monday
    "month": 2, # January
    "building_type": 0, # Apartment
    "city": 0, # Austin
    "state": 2, # Texas
    "house_construction_year": 3, # between 1969 and 2013
    "total_square_footage": 0, # between 1070 and 2010 sq feet
    "car1": 0, # no ev
    "has_solar": 0, # no solar panels
}

generator.set_model_conditioning_vars(cond_vars)
data = generator.generate(num_samples=2)
data

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from eval.utils import get_hourly_ticks

syn_data = generator.generate(num_samples=100)
syn_data_array = np.array([ts.squeeze() for ts in syn_data["timeseries"]])

In [None]:
timestamps = pd.date_range(start="00:00", end="23:45", freq="15min")
hourly_positions, hourly_labels = get_hourly_ticks(timestamps)
f = plt.figure(figsize=(15, 7))
for index in range(syn_data_array.shape[0]):
    plt.plot(
        range(len(timestamps)),
        syn_data_array[index],
        color="blue",
        marker="o",
        markersize=2,
        linestyle="-",
        #label="Synthetic time series",
        alpha=0.6,
    )
plt.xlabel("Time of day")
plt.ylabel("Electric load in kWh")
plt.xticks(ticks=hourly_positions, labels=hourly_labels, rotation=45)
plt.grid(True, which="both", linestyle="--", linewidth=0.5, alpha=0.7)
plt.tight_layout()
plt.show()