In [1]:
import sys
from pathlib import Path

script_dir = Path().resolve()
root_dir = (script_dir.parent)
sys.path.append(str(root_dir))

from datasets.pecanstreet import PecanStreet
from datasets.openpower import OpenPower
from generator.data_generator import DataGenerator

2024-11-18 06:00:25.877593: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-18 06:00:25.891657: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-18 06:00:25.896101: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-18 06:00:25.906770: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_

## Creating a Data Generator and loading a pre-trained model ##

The first step towards creating synthetic time series data is to load a model. In this instance, we will load a pre-trained ACGAN trained on the PecanStreet dataset. In order to do this, you will first need to initialize a data generator, and set the data generator's dataset to the PecanStreet Dataset.

In [2]:
generator = DataGenerator(model_name="acgan")
dataset_manager = PecanStreet()
dataset = dataset_manager.create_dataset()
generator.set_dataset(dataset)

The code snippet above initializes an untrained ACGAN. In order to load an appropriate pre-trained model checkpoint, you will need to explicitly load the model checkpoint provided:

In [3]:
generator.load_model()

Loaded generator state.
Loaded discriminator state.
Loaded generator optimizer state.
Loaded discriminator optimizer state.
Loaded epoch number: 200
ACGAN models moved to 0.


  checkpoint = torch.load(path)


## Generating synthetic data ##

All pre-trained models in EnData are conditional generative models. This means that the user has to specify the conditions for which the generator generates data. To view the required conditioning variables, each TimeSeriesDataset instance (which includes the dataset classes supported by EnData) has a function that outputs the available conditioning variables and the corresponding integer codes:

In [4]:
print(dataset_manager.get_conditioning_variables_integer_mapping())

{'weekday': {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}, 'month': {0: 'January', 1: 'February', 2: 'March', 3: 'April', 4: 'May', 5: 'June', 6: 'July', 7: 'August', 8: 'September', 9: 'October', 10: 'November', 11: 'December'}, 'building_type': {0: 'Apartment', 1: 'Single-Family Home 001 (Master)', 2: 'Town Home'}, 'city': {0: 'Austin', 1: 'Brooktondale', 2: 'Groton', 3: 'Ithaca', 4: 'Lansing', 5: 'San Diego', 6: 'Trumansburg'}, 'state': {0: 'California', 1: 'New York', 2: 'Texas'}, 'house_construction_year': {0: '(1796.7830000000001, 1840.2]', 1: '(1840.2, 1883.4]', 2: '(1883.4, 1926.6]', 3: '(1926.6, 1969.8]', 4: '(1969.8, 2013.0]'}, 'total_square_footage': {0: '(1070.324, 2010.0]', 1: '(2010.0, 2945.0]', 2: '(2945.0, 3880.0]', 3: '(3880.0, 4815.0]', 4: '(4815.0, 5750.0]'}, 'car1': {0: 'no', 1: 'yes'}, 'has_solar': {0: 'no', 1: 'yes'}}


We will now specify the desired conditions in a Python dictionary, and pass those to the data generator, along with the desired number of samples. The samples arg refers to the number of synthetic timeseries with the datasets specified sequence length that the model will generate. It does not refer to the number of measurement values.

In [5]:
cond_vars = {
    "weekday": 0, # Monday
    "month": 0, # January
    "building_type": 0, # Apartment
    "city": 0, # Austin
    "state": 2, # Texas
    "house_construction_year": 4, # between 1969 and 2013
    "total_square_footage": 0, # between 1070 and 2010 sq feet
    "car1": 0, # no ev
    "has_solar": 0, # no solar panels
}

generator.set_model_conditioning_vars(cond_vars)
data = generator.generate(num_samples=2)
data

AttributeError: 'PecanStreetDataset' object has no attribute 'get_conditioning_variables_integer_mapping'