In [1]:
import pickle
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller
import datetime
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import seaborn as sns
import scipy.stats as stats
from gretel_synthetics.timeseries_dgan.dgan import DGAN
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from timeVAE.vae_dense_model import VariationalAutoencoderDense as VAE_Dense
from timeVAE.vae_conv_model import VariationalAutoencoderConv as VAE_Conv
from timeVAE.vae_conv_I_model import VariationalAutoencoderConvInterpretable as TimeVAE
from timeVAE import utils
from sklearn.preprocessing import MinMaxScaler

2024-04-03 17:34:30.375738: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-03 17:34:30.375955: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-03 17:34:30.535164: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-03 17:34:30.996563: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [20]:
MODEL = 1

In [21]:
index = [0,3] if MODEL==0 else [1,2]
model_name = 'th_v_air' if MODEL==0 else 'el_v_sky'

base_data_train, base_data_test = np.load('../../data/training_data/training_data_1month.npy', allow_pickle=True)

base_data_train, base_data_test = base_data_train[:,:,index], base_data_test[:,:,index]
print(base_data_train.shape)

(108, 730, 2)


In [22]:
from sklearn.preprocessing import MinMaxScaler

scalers = {var_name: MinMaxScaler(feature_range=(-1,1)) for var_name in ['G.air.T', 'G.E_th_I']}

temp_var, energy_var = base_data_train[:,:,0], base_data_train[:,:,1]
temp_var_test, energy_var_test = base_data_test[:,:,0], base_data_test[:,:,1]

temp_var, temp_var_test = scalers['G.air.T'].fit_transform(temp_var), scalers['G.air.T'].fit_transform(temp_var_test)
energy_var, energy_var_test = scalers['G.E_th_I'].fit_transform(energy_var), scalers['G.E_th_I'].fit_transform(energy_var_test)

base_data_train_scaled, base_data_test_scaled = np.stack((temp_var, energy_var), axis=-1), np.stack((temp_var_test, energy_var_test), axis=-1)
print(base_data_train_scaled.shape, base_data_test_scaled.shape)

(108, 730, 2) (12, 730, 2)


Load in tVAE models

In [23]:
batches=[4,8,16,20,24,32]
latent_dims=[3,5,10,15,20,25,30,50]

vae_list = []
for b in batches:
    latent_dim_list = []
    for l in latent_dims:
        samples = TimeVAE.load('../../data/models/model_data/',f'tVAE_{model_name}_b{b}l{l}')
        sample = samples.get_prior_samples(num_samples=1000)
        latent_dim_list.append(sample)
    vae_list.append(latent_dim_list)



In [9]:
print(len(vae_list), len(vae_list[0]), vae_list[0][0].shape)

6 8 (1000, 730, 2)


Load in DGAN models

In [None]:
import tensorflow as tf

# Hide GPU from visible devices
tf.config.set_visible_devices([], 'GPU')
tf.config.get_visible_devices()

In [24]:
batches=[2,4,6,8,10,12,16,20,24,32]
epochs=[100,500,1000]


gan_list = []
for b in batches:
    epoch_list = []
    for e in epochs:
        model = DGAN.load(f'../../data/models/model_data/DGAN_{model_name}_b{b}_e{e}.zip')
        attributes, samples = model.generate_numpy(1000)
        epoch_list.append(samples)
    gan_list.append(epoch_list)

OutOfMemoryError: CUDA out of memory. Tried to allocate 142.00 MiB. GPU 0 has a total capacty of 7.92 GiB of which 156.56 MiB is free. Including non-PyTorch memory, this process has 7.42 GiB memory in use. Of the allocated memory 56.46 MiB is allocated by PyTorch, and 51.54 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [36]:
print(len(gan_list), len(gan_list[0]), gan_list[0][0].shape)

10 3 (1000, 730, 2)


In [12]:
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import grid_search_forecaster 
from sklearn.linear_model import Ridge

model_params = {'lag':24, 'max_depth':5, 'n_est':50} if MODEL==0 else {'lag':2, 'max_depth':3, 'n_est':10}

def find_best_model_regression(synthetic_data):
    
    forecaster = ForecasterAutoreg(
                     regressor = RandomForestRegressor(max_depth=model_params['max_depth'], n_estimators =model_params['n_est']),
                     lags      = model_params['lag']
                 )

    forecaster.fit(y=synthetic_data['G.E_th_I'], exog=synthetic_data['G.air.T'])

    y_test = pd.Series(base_data_test_scaled[:,:,1].reshape(-1))
    exog_var = pd.Series(base_data_test_scaled[:,:,0].reshape(-1))
    exog_var.index = exog_var.index + synthetic_data['G.air.T'].index.max() + 1
    
    predictions = forecaster.predict(exog=exog_var, steps=len(y_test))
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2  = r2_score(y_test, predictions)

    return {'mse':mse, 'mae':mae, 'r2':r2}

In [5]:
def find_best_model_ridge_regression(synthetic_data):
    
    forecaster = ForecasterAutoreg(
                     regressor = Ridge(),
                     lags      = 2 if MODEL==0 else 12 
                 )

    forecaster.fit(y=synthetic_data['G.E_th_I'], exog=synthetic_data['G.air.T'])

    y_test = pd.Series(base_data_test_scaled[:,:,1].reshape(-1))
    exog_var = pd.Series(base_data_test_scaled[:,:,0].reshape(-1))
    exog_var.index = exog_var.index + synthetic_data['G.air.T'].index.max() + 1
    
    predictions = forecaster.predict(exog=exog_var, steps=len(y_test))
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2  = r2_score(y_test, predictions)

    return {'mse':mse, 'mae':mae, 'r2':r2}

<h2>VAE data</h2>

Gather results for RF regression

In [75]:
vae_results = {}
for i,b in enumerate(vae_list):
    for j,e in enumerate(b):
        result = find_best_model_regression(pd.DataFrame(e[0:216].reshape(-1,2), columns=['G.air.T', 'G.E_th_I']))
        vae_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

{'mse': 0.8029748180153373, 'mae': 0.8089149700537648, 'r2': -0.9489413294624292} 0 0
{'mse': 1.2395994901113387, 'mae': 0.9979147635773803, 'r2': -2.008695446053697} 0 1
{'mse': 1.5415108312346475, 'mae': 1.1383057942164303, 'r2': -2.741479933620787} 0 2
{'mse': 0.43492744148959744, 'mae': 0.49357281482899423, 'r2': -0.05563468121143278} 0 3
{'mse': 0.9372238106448142, 'mae': 0.8940626052316817, 'r2': -1.2747839390986466} 0 4
{'mse': 0.3818239268564768, 'mae': 0.49796396683829147, 'r2': 0.0732555804582985} 0 5
{'mse': 0.6336647982950371, 'mae': 0.7438056335728762, 'r2': -0.5380003042624466} 0 6
{'mse': 0.4720159037985335, 'mae': 0.43337996135987444, 'r2': -0.14565398868953405} 0 7
{'mse': 0.7837121391594251, 'mae': 0.8193672824720428, 'r2': -0.9021878944901605} 1 0
{'mse': 1.634160748509586, 'mae': 1.1854178758724971, 'r2': -2.966355295708361} 1 1
{'mse': 0.6474381505815288, 'mae': 0.7324929791268362, 'r2': -0.5714303134160796} 1 2
{'mse': 1.270956227252239, 'mae': 1.0401291864553006,

In [6]:
sorted_models_mae = sorted(vae_results.items(), key=lambda x: x[1]['mae'])
sorted_models_mse = sorted(vae_results.items(), key=lambda x: x[1]['mse'])
sorted_models_r2 = sorted(vae_results.items(), key=lambda x: x[1]['r2'], reverse=True)

print(sorted_models_mae[0:3], '\n', sorted_models_mse[0:3], '\n', sorted_models_r2[0:3])

NameError: name 'vae_results' is not defined

In [76]:
results_df = pd.DataFrame.from_dict(vae_results, orient='index')
#results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_model_results.csv')
print(results_df)

            mse       mae        r2
b0 l0  0.802975  0.808915 -0.948941
b0 l1  1.239599  0.997915 -2.008695
b0 l2  1.541511  1.138306 -2.741480
b0 l3  0.434927  0.493573 -0.055635
b0 l4  0.937224  0.894063 -1.274784
b0 l5  0.381824  0.497964  0.073256
b0 l6  0.633665  0.743806 -0.538000
b0 l7  0.472016  0.433380 -0.145654
b1 l0  0.783712  0.819367 -0.902188
b1 l1  1.634161  1.185418 -2.966355
b1 l2  0.647438  0.732493 -0.571430
b1 l3  1.270956  1.040129 -2.084803
b1 l4  1.694646  1.205789 -3.113162
b1 l5  0.677872  0.754491 -0.645299
b1 l6  0.933418  0.905278 -1.265547
b1 l7  2.935560  1.531046 -6.125047
b2 l0  1.452638  1.054577 -2.525773
b2 l1  0.862700  0.862726 -1.093903
b2 l2  0.593291  0.717492 -0.440008
b2 l3  0.395335  0.534608  0.040462
b2 l4  0.629510  0.487633 -0.527917
b2 l5  0.523060  0.546032 -0.269545
b2 l6  0.407328  0.433120  0.011353
b2 l7  0.940584  0.884544 -1.282939
b3 l0  0.959533  0.856917 -1.328932
b3 l1  0.779043  0.806840 -0.890856
b3 l2  0.567704  0.668728 -0

Gather results for Ridge Regression

In [25]:
vae_ridge_results = {}
for i,b in enumerate(vae_list):
    for j,e in enumerate(b):
        result = find_best_model_ridge_regression(pd.DataFrame(e[0:216].reshape(-1,2), columns=['G.air.T', 'G.E_th_I']))
        vae_ridge_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

{'mse': 0.5407413647237419, 'mae': 0.6590199677038575, 'r2': 0.2082591552897618} 0 0
{'mse': 2.3112815879729265, 'mae': 1.3567335107281469, 'r2': -2.3841243821984976} 0 1
{'mse': 0.7892710936925861, 'mae': 0.766803012228737, 'r2': -0.1556322545156028} 0 2
{'mse': 1.1223403908209968, 'mae': 0.9340238389214284, 'r2': -0.6433045205169592} 0 3
{'mse': 0.6560717059046214, 'mae': 0.7358643979620892, 'r2': 0.039395170205285046} 0 4
{'mse': 0.6893710687011471, 'mae': 0.6504506606964443, 'r2': -0.009360977093161749} 0 5
{'mse': 0.8731572393290519, 'mae': 0.899564937593076, 'r2': -0.27845638475323065} 0 6
{'mse': 0.9627858155836333, 'mae': 0.7813386611748793, 'r2': -0.4096884474422615} 0 7
{'mse': 0.640402513151653, 'mae': 0.7605145351820523, 'r2': 0.06233763533831749} 1 0
{'mse': 1.4353080883277387, 'mae': 0.9956410969078928, 'r2': -1.101544495033425} 1 1
{'mse': 0.5562339045094374, 'mae': 0.6725723826874458, 'r2': 0.1855753412950618} 1 2
{'mse': 1.3590704139816925, 'mae': 1.082605283656311, 'r

In [26]:
results_df = pd.DataFrame.from_dict(vae_ridge_results, orient='index')
results_df.to_csv(f'../../data/models/model_history/VAE_ridge_{model_name}_model_results.csv')
print(results_df)

            mse       mae        r2
b0 l0  0.540741  0.659020  0.208259
b0 l1  2.311282  1.356734 -2.384124
b0 l2  0.789271  0.766803 -0.155632
b0 l3  1.122340  0.934024 -0.643305
b0 l4  0.656072  0.735864  0.039395
b0 l5  0.689371  0.650451 -0.009361
b0 l6  0.873157  0.899565 -0.278456
b0 l7  0.962786  0.781339 -0.409688
b1 l0  0.640403  0.760515  0.062338
b1 l1  1.435308  0.995641 -1.101544
b1 l2  0.556234  0.672572  0.185575
b1 l3  1.359070  1.082605 -0.989919
b1 l4  1.120469  0.955329 -0.640565
b1 l5  1.189247  1.029054 -0.741267
b1 l6  0.861819  0.887502 -0.261855
b1 l7  3.048193  1.500644 -3.463093
b2 l0  0.734541  0.671848 -0.075498
b2 l1  0.868698  0.897998 -0.271927
b2 l2  0.839879  0.879158 -0.229730
b2 l3  0.734373  0.803328 -0.075252
b2 l4  1.083259  0.744311 -0.586082
b2 l5  1.183713  0.898657 -0.733164
b2 l6  0.894916  0.697116 -0.310315
b2 l7  0.618512  0.702104  0.094390
b3 l0  0.517624  0.575607  0.242107
b3 l1  1.266980  1.057885 -0.855083
b3 l2  0.566007  0.613633  0

<h2> GAN data </h2>

Gather results for RF Regression

In [77]:
gan_results = {}
for i,b in enumerate(gan_list):
    for j,e in enumerate(b):
        result = find_best_model_regression(pd.DataFrame(e[0:216].reshape(-1,2), columns=['G.air.T', 'G.E_th_I']))
        gan_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

{'mse': 1.1318198142763778, 'mae': 0.9437059766275813, 'r2': -1.7470978716366052} 0 0
{'mse': 0.6857347900629832, 'mae': 0.6353890132143729, 'r2': -0.6643820496229598} 0 1
{'mse': 2.135001345769634, 'mae': 1.2822319213842108, 'r2': -4.181971175027394} 0 2
{'mse': 1.1953543942876674, 'mae': 0.9622225242992981, 'r2': -1.9013059066284002} 1 0
{'mse': 1.4572759213966302, 'mae': 1.059243806013446, 'r2': -2.5370290673126514} 1 1
{'mse': 0.8992265473138538, 'mae': 0.821995399716797, 'r2': -1.182558834088241} 1 2
{'mse': 0.48121222192262386, 'mae': 0.6198428634935955, 'r2': -0.1679748436762738} 2 0
{'mse': 1.1985155053202392, 'mae': 0.9625005054531665, 'r2': -1.9089784012075284} 2 1
{'mse': 1.041904074324046, 'mae': 0.889671339312508, 'r2': -1.5288587714423714} 2 2
{'mse': 0.41815702255284454, 'mae': 0.5334626027377778, 'r2': -0.01493033800547594} 3 0
{'mse': 0.9087023823888308, 'mae': 0.8305913709267757, 'r2': -1.2055581189903979} 3 1
{'mse': 1.5829689512784477, 'mae': 1.100511864177276, 'r2'

In [65]:
sorted_models_mae = sorted(gan_results.items(), key=lambda x: x[1]['mae'])
sorted_models_mse = sorted(gan_results.items(), key=lambda x: x[1]['mse'])
sorted_models_r2 = sorted(gan_results.items(), key=lambda x: x[1]['r2'], reverse=True)

print(sorted_models_mae[0:3], '\n', sorted_models_mse[0:3], '\n', sorted_models_r2[0:3])

[('b6 l1', {'mse': 0.49415553969179815, 'mae': 0.5661241864354905, 'r2': 0.2764690294893397}), ('b1 l2', {'mse': 0.5338222166580122, 'mae': 0.5689453766290705, 'r2': 0.21839000987499346}), ('b8 l2', {'mse': 0.53719189977815, 'mae': 0.5709294706902118, 'r2': 0.21345619875198718})] 
 [('b4 l0', {'mse': 0.49240435120523374, 'mae': 0.5728619570504203, 'r2': 0.27903307866709715}), ('b6 l1', {'mse': 0.49415553969179815, 'mae': 0.5661241864354905, 'r2': 0.2764690294893397}), ('b3 l1', {'mse': 0.4960254090267697, 'mae': 0.586887935097929, 'r2': 0.27373121059226946})] 
 [('b4 l0', {'mse': 0.49240435120523374, 'mae': 0.5728619570504203, 'r2': 0.27903307866709715}), ('b6 l1', {'mse': 0.49415553969179815, 'mae': 0.5661241864354905, 'r2': 0.2764690294893397}), ('b3 l1', {'mse': 0.4960254090267697, 'mae': 0.586887935097929, 'r2': 0.27373121059226946})]


In [78]:
results_df = pd.DataFrame.from_dict(gan_results, orient='index')
#results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_model_results.csv')
print(results_df)

             mse       mae         r2
b0 l0   1.131820  0.943706  -1.747098
b0 l1   0.685735  0.635389  -0.664382
b0 l2   2.135001  1.282232  -4.181971
b1 l0   1.195354  0.962223  -1.901306
b1 l1   1.457276  1.059244  -2.537029
b1 l2   0.899227  0.821995  -1.182559
b2 l0   0.481212  0.619843  -0.167975
b2 l1   1.198516  0.962501  -1.908978
b2 l2   1.041904  0.889671  -1.528859
b3 l0   0.418157  0.533463  -0.014930
b3 l1   0.908702  0.830591  -1.205558
b3 l2   1.582969  1.100512  -2.842105
b4 l0   1.007881  0.876441  -1.446280
b4 l1   1.593801  1.109767  -2.868396
b4 l2   1.362691  1.026822  -2.307458
b5 l0   0.510711  0.635904  -0.239574
b5 l1   3.842008  1.708054  -8.325135
b5 l2   0.520490  0.491918  -0.263307
b6 l0   0.566191  0.458497  -0.374232
b6 l1   0.914098  0.832097  -1.218655
b6 l2   2.623453  1.484936  -5.367518
b7 l0   0.981027  0.782465  -1.381102
b7 l1   1.715567  1.161289  -3.163940
b7 l2   1.888939  1.211300  -3.584741
b8 l0   0.399780  0.458615   0.029673
b8 l1   2.02

Gather results for Ridge Regression

In [27]:
gan_ridge_results = {}
for i,b in enumerate(gan_list):
    for j,e in enumerate(b):
        result = find_best_model_ridge_regression(pd.DataFrame(e[0:216].reshape(-1,2), columns=['G.air.T', 'G.E_th_I']))
        gan_ridge_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

{'mse': 0.5429744644307168, 'mae': 0.5942072511196403, 'r2': 0.20498950298708318} 0 0
{'mse': 1.186148675302721, 'mae': 0.8512699109533637, 'r2': -0.736731116577132} 0 1
{'mse': 0.8370787441651047, 'mae': 0.7194490980530537, 'r2': -0.22563109691609395} 0 2
{'mse': 0.5364311179391574, 'mae': 0.645696388013433, 'r2': 0.21457011770684986} 1 0
{'mse': 0.5485580389476666, 'mae': 0.5783102618179943, 'r2': 0.196814163919374} 1 1
{'mse': 0.5345797214690907, 'mae': 0.5703152357096903, 'r2': 0.2172808890676705} 1 2
{'mse': 0.6067980022027685, 'mae': 0.6926310180904819, 'r2': 0.11154057341637069} 2 0
{'mse': 0.5544603024981496, 'mae': 0.5812216685501985, 'r2': 0.18817220783090416} 2 1
{'mse': 0.528070145601985, 'mae': 0.5683424367583029, 'r2': 0.226812057629855} 2 2
{'mse': 0.6709816507068812, 'mae': 0.6799519544916907, 'r2': 0.017564378143806403} 3 0
{'mse': 0.5201345010446601, 'mae': 0.6328440698145821, 'r2': 0.2384312425767039} 3 1
{'mse': 0.5751485204626049, 'mae': 0.5861606746752865, 'r2': 0

In [19]:
results_df = pd.DataFrame.from_dict(gan_ridge_results, orient='index')
results_df.to_csv(f'../../data/models/model_history/DGAN_ridge_{model_name}_model_results.csv')
print(results_df)

            mse       mae        r2
b0 l0  0.467223  0.452656 -0.134021
b0 l1  0.501082  0.498549 -0.216201
b0 l2  0.455763  0.477406 -0.106205
b1 l0  0.449349  0.475630 -0.090637
b1 l1  0.552621  0.473596 -0.341295
b1 l2  0.509562  0.497752 -0.236785
b2 l0  0.489568  0.522209 -0.188256
b2 l1  0.465364  0.460927 -0.129510
b2 l2  0.602372  0.599383 -0.462048
b3 l0  0.496831  0.487412 -0.205884
b3 l1  0.439555  0.481184 -0.066867
b3 l2  0.415492  0.438122 -0.008462
b4 l0  0.472939  0.472346 -0.147896
b4 l1  1.028677  0.792600 -1.496754
b4 l2  0.479079  0.469114 -0.162796
b5 l0  0.537194  0.521559 -0.303850
b5 l1  0.517196  0.488157 -0.255314
b5 l2  0.534466  0.538554 -0.297229
b6 l0  0.466080  0.470974 -0.131247
b6 l1  0.454217  0.448634 -0.102453
b6 l2  0.589917  0.555308 -0.431817
b7 l0  0.463720  0.477772 -0.125518
b7 l1  0.459804  0.453324 -0.116013
b7 l2  0.896421  0.861765 -1.175751


In [96]:
vae_results = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_model_results.csv', index_col=0)
gan_results = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_model_results.csv', index_col=0)

#print(vae_results, '\n', gan_results)

In [97]:
best_vaes = vae_results['mse'].nsmallest(5)
best_gans = gan_results['mse'].nsmallest(5)

print(best_vaes)
print(best_gans)

b5 l1    0.530135
b4 l5    0.532272
b5 l4    0.536739
b0 l5    0.538737
b5 l0    0.541052
Name: mse, dtype: float64
b4 e0    0.537730
b9 e2    0.579713
b5 e0    0.582607
b0 e0    0.586430
b8 e2    0.587330
Name: mse, dtype: float64


In [105]:
import re
batches=[4,8,16,20,24,32]
latent_dims=[3,5,10,15,20,25,30,50]

for i, scores in enumerate(best_vaes):
    print(scores, best_vaes.index[i])
    
    b, l = map(int, re.findall(r'\d+', best_vaes.index[i]))
    print(batches[b], latent_dims[l])
    
    vae = TimeVAE.load('../../data/models/model_data/',f'tVAE_{model_name}_b{batches[b]}l{latent_dims[l]}')
    samples = vae.get_prior_samples(num_samples=1000)
    
    np.save(f'../../data/vae_synthetic_data/{model_name}_b{batches[b]}l{latent_dims[l]}_generated_samples.npy', samples)

    temp, energy = scalers['G.air.T'].inverse_transform(samples[:,:,0]), scalers['G.E_th_I'].inverse_transform(samples[:,:,1])
    np.save(f'../../data/vae_synthetic_data/{model_name}_b{batches[b]}l{latent_dims[l]}_rescaled_samples.npy', np.stack((temp, energy),axis=-1))

0.5301347479816934 b5 l1
32 5
0.5322723807857386 b4 l5
24 25
0.5367392105418513 b5 l4
32 20
0.5387367736137983 b0 l5
4 25
0.5410523136136406 b5 l0
32 3


In [74]:
batches=[2,4,6,8,10,12,16,20,24,32]
epochs=[100,500,1000]

for i, scores in enumerate(best_gans):
    print(scores, best_gans.index[i])
    
    b, e = map(int, re.findall(r'\d+', best_gans.index[i]))
    print(batches[b], epochs[e])
    
    dgan = DGAN.load(f'../../data/models/model_data/DGAN_{model_name}_b{batches[b]}_e{epochs[e]}.zip')
    attributes, samples = dgan.generate_numpy(1000)
    
    np.save(f'../../data/gan_synthetic_data/{model_name}_b{batches[b]}e{epochs[e]}_generated_samples.npy', samples)

    temp, energy = scalers['G.air.T'].inverse_transform(samples[:,:,0]), scalers['G.E_th_I'].inverse_transform(samples[:,:,1])
    np.save(f'../../data/gan_synthetic_data/{model_name}_b{batches[b]}e{epochs[e]}_rescaled_samples.npy', np.stack((temp, energy),axis=-1))

0.4127741324562375 b9 e2
32 1000
0.4145561139925519 b7 e0
20 100
0.4220499112934464 b9 e1
32 500
0.4250164590716146 b4 e2
10 1000
0.4274020755166289 b4 e0
10 100


In [91]:
unscaled = (np.load('../../data/models/model_data/th_v_air_l15b8_samples.npy')[:1000,:,:])
rescaled = (np.load('../../data/models/model_data/th_v_air_l15b8_scaled_samples.npy')[:1000,:,:])

print(unscaled.shape, rescaled.shape)
np.save('../../data/vae_synthetic_data/th_v_air_b8l15_generated_samples.npy', unscaled)
np.save('../../data/vae_synthetic_data/th_v_air_b8l15_rescaled_samples.npy', rescaled)

(1000, 730, 2) (1000, 730, 2)


In [20]:
print(np.concatenate((base_data_train_scaled, base_data_test_scaled[0:216]), axis=0).shape)

(120, 730, 2)


Blended datasets

In [37]:
gan_blended_results = {}
for i,b in enumerate(gan_list):
    for j,e in enumerate(b):
        indices = np.random.choice(e.shape[0], 216, replace=False)
        current_df = np.concatenate((e[indices], base_data_train_scaled), axis=0)
        current_df = current_df[np.random.permutation(current_df.shape[0])]
        
        result = find_best_model_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['G.air.T', 'G.E_th_I']))
        gan_blended_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

{'mse': 1.1729881693802657, 'mae': 1.0180147229334828, 'r2': -0.7174618119601601} 0 0
{'mse': 1.0324203616713927, 'mae': 0.6114323760344877, 'r2': -0.5116457193234374} 0 1
{'mse': 0.6517351403218136, 'mae': 0.5733436953824708, 'r2': 0.045744667990474386} 0 2
{'mse': 0.7975964015063317, 'mae': 0.6651258788956244, 'r2': -0.16782197527849507} 1 0
{'mse': 0.7744893268889329, 'mae': 0.8023321650766966, 'r2': -0.13398913767838216} 1 1
{'mse': 0.6747063042453426, 'mae': 0.7475115823449245, 'r2': 0.012110827645961608} 1 2
{'mse': 0.618881366692569, 'mae': 0.6858990760462009, 'r2': 0.09384839406370737} 2 0
{'mse': 0.6161636148084032, 'mae': 0.5951320466097083, 'r2': 0.09782766273604493} 2 1
{'mse': 0.7321677701918994, 'mae': 0.7945904834259836, 'r2': -0.0720229053264696} 2 2
{'mse': 0.6771373161856715, 'mae': 0.6902162249645797, 'r2': 0.008551396885342899} 3 0
{'mse': 0.5877356693097961, 'mae': 0.567438474253315, 'r2': 0.1394511948916487} 3 1
{'mse': 0.6349692982137977, 'mae': 0.540113203389762

In [38]:
results_df = pd.DataFrame.from_dict(gan_blended_results, orient='index')
results_df.to_csv(f'../../data/models/model_history/DGAN_blended_{model_name}_model_results.csv')
print(results_df)

            mse       mae        r2
b0 l0  1.172988  1.018015 -0.717462
b0 l1  1.032420  0.611432 -0.511646
b0 l2  0.651735  0.573344  0.045745
b1 l0  0.797596  0.665126 -0.167822
b1 l1  0.774489  0.802332 -0.133989
b1 l2  0.674706  0.747512  0.012111
b2 l0  0.618881  0.685899  0.093848
b2 l1  0.616164  0.595132  0.097828
b2 l2  0.732168  0.794590 -0.072023
b3 l0  0.677137  0.690216  0.008551
b3 l1  0.587736  0.567438  0.139451
b3 l2  0.634969  0.540113  0.070293
b4 l0  0.544956  0.589113  0.202088
b4 l1  0.645865  0.570175  0.054339
b4 l2  0.640623  0.721513  0.062014
b5 l0  0.683648  0.721462 -0.000982
b5 l1  1.815005  1.194672 -1.657487
b5 l2  0.732444  0.662778 -0.072428
b6 l0  1.230147  1.010555 -0.801152
b6 l1  0.565534  0.594718  0.171959
b6 l2  3.853646  1.780066 -4.642419
b7 l0  1.142624  0.677972 -0.673003
b7 l1  0.762847  0.717744 -0.116942
b7 l2  0.658412  0.711098  0.035969
b8 l0  1.117689  0.966410 -0.636495
b8 l1  0.636254  0.566717  0.068411
b8 l2  0.569608  0.530820  0

In [54]:
vae_blended_results = {}
for i,b in enumerate(vae_list):
    for j,e in enumerate(b):
        indices = np.random.choice(e.shape[0], 216, replace=False)
        current_df = np.concatenate((e[indices], base_data_train_scaled), axis=0)
        current_df = current_df[np.random.permutation(current_df.shape[0])]
        
        result = find_best_model_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['G.air.T', 'G.E_th_I']))
        vae_blended_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

{'mse': 0.7572197536812194, 'mae': 0.8344414357052574, 'r2': -0.10870343295659568} 0 0
{'mse': 10.646028456685217, 'mae': 3.1564175107114005, 'r2': -14.587665588356472} 0 1
{'mse': 0.8554545520142225, 'mae': 0.892906332547314, 'r2': -0.2525365244972193} 0 2
{'mse': 0.7017839368849638, 'mae': 0.6860350591273817, 'r2': -0.0275356080392386} 0 3
{'mse': 0.6947126487089319, 'mae': 0.7702733056545327, 'r2': -0.01718199346687954} 0 4
{'mse': 0.7307868377690802, 'mae': 0.66834370740485, 'r2': -0.07000097640766167} 0 5
{'mse': 0.7002226664500458, 'mae': 0.6873909588946637, 'r2': -0.025249632425749846} 0 6
{'mse': 0.7085066846432759, 'mae': 0.6810222531274811, 'r2': -0.03737889789308424} 0 7
{'mse': 0.7872948509844427, 'mae': 0.8552083275120448, 'r2': -0.15273868621627873} 1 0
{'mse': 0.6842861174701119, 'mae': 0.717636091551701, 'r2': -0.001915710565369988} 1 1
{'mse': 0.6821218843925435, 'mae': 0.7891688413348992, 'r2': 0.0012531088310512306} 1 2
{'mse': 0.731722733568559, 'mae': 0.81352044930

In [56]:
results_df = pd.DataFrame.from_dict(vae_blended_results, orient='index')
results_df.to_csv(f'../../data/models/model_history/VAE_blended_{model_name}_model_results.csv')
print(results_df)

             mse       mae         r2
b0 l0   0.757220  0.834441  -0.108703
b0 l1  10.646028  3.156418 -14.587666
b0 l2   0.855455  0.892906  -0.252537
b0 l3   0.701784  0.686035  -0.027536
b0 l4   0.694713  0.770273  -0.017182
b0 l5   0.730787  0.668344  -0.070001
b0 l6   0.700223  0.687391  -0.025250
b0 l7   0.708507  0.681022  -0.037379
b1 l0   0.787295  0.855208  -0.152739
b1 l1   0.684286  0.717636  -0.001916
b1 l2   0.682122  0.789169   0.001253
b1 l3   0.731723  0.813520  -0.071371
b1 l4   0.765917  0.840180  -0.121438
b1 l5   1.424066  1.095152  -1.085084
b1 l6   0.735191  0.816548  -0.076449
b1 l7   0.727240  0.809457  -0.064808
b2 l0   1.362755  1.046308  -0.995314
b2 l1   3.536141  1.689131  -4.177535
b2 l2   0.696891  0.690495  -0.020371
b2 l3   0.693827  0.768706  -0.015886
b2 l4   0.713818  0.677494  -0.045155
b2 l5   0.960525  0.618117  -0.406379
b2 l6   0.685523  0.748828  -0.003727
b2 l7   0.697374  0.690076  -0.021079
b3 l0   0.794436  0.858813  -0.163194
b3 l1   0.75