In [119]:
import pickle
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller
import datetime
import re
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import seaborn as sns
import scipy.stats as stats
from gretel_synthetics.timeseries_dgan.dgan import DGAN
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from timeVAE.vae_dense_model import VariationalAutoencoderDense as VAE_Dense
from timeVAE.vae_conv_model import VariationalAutoencoderConv as VAE_Conv
from timeVAE.vae_conv_I_model import VariationalAutoencoderConvInterpretable as TimeVAE
from timeVAE import utils
from sklearn.preprocessing import MinMaxScaler

In [13]:
MODEL = 0

In [154]:
index = [0,3] if MODEL==0 else [1,2]
model_name = 'th_v_air' if MODEL==0 else 'el_v_sky'

base_data_train, base_data_test = np.load('../../data/training_data/training_data_1month.npy', allow_pickle=True)

base_data_train, base_data_test = base_data_train[:,:,index], base_data_test[:,:,index]
print(base_data_train.shape)

(108, 730, 2)


In [138]:
from sklearn.preprocessing import MinMaxScaler

scalers = {var_name: MinMaxScaler(feature_range=(-1,1)) for var_name in ['temp', 'energy']}

temp_var, energy_var = base_data_train[:,:,0], base_data_train[:,:,1]
temp_var_test, energy_var_test = base_data_test[:,:,0], base_data_test[:,:,1]

temp_var, temp_var_test = scalers['temp'].fit_transform(temp_var), scalers['temp'].fit_transform(temp_var_test)
energy_var, energy_var_test = scalers['energy'].fit_transform(energy_var), scalers['energy'].fit_transform(energy_var_test)

base_data_train_scaled, base_data_test_scaled = np.stack((temp_var, energy_var), axis=-1), np.stack((temp_var_test, energy_var_test), axis=-1)
print(base_data_train_scaled.shape, base_data_test_scaled.shape)

(108, 730, 2) (12, 730, 2)


Load in tVAE models

In [16]:
batches=[4,8,16,20,24,32]
latent_dims=[3,5,10,15,20,25,30,50]

vae_list = []
for b in batches:
    latent_dim_list = []
    for l in latent_dims:
        samples = TimeVAE.load('../../data/models/model_data/',f'tVAE_{model_name}_b{b}l{l}')
        sample = samples.get_prior_samples(num_samples=1000)
        latent_dim_list.append(sample)
    vae_list.append(latent_dim_list)



In [9]:
print(len(vae_list), len(vae_list[0]), vae_list[0][0].shape)

6 8 (1000, 730, 2)


Load in DGAN models

In [None]:
import tensorflow as tf

# Hide GPU from visible devices
tf.config.set_visible_devices([], 'GPU')
tf.config.get_visible_devices()

In [29]:
batches=[2,4,6,8,10,12,16,20,24,32]
epochs=[100,500,1000]


gan_list = []
for b in batches:
    epoch_list = []
    for e in epochs:
        model = DGAN.load(f'../../data/models/model_data/DGAN_{model_name}_b{b}_e{e}')
        attributes, samples = model.generate_numpy(1000)
        epoch_list.append(samples)
    gan_list.append(epoch_list)

In [30]:
print(len(gan_list), len(gan_list[0]), gan_list[0][0].shape)

10 3 (1000, 730, 2)


<h2> Regression Models </h2>

In [43]:
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import grid_search_forecaster 
from sklearn.linear_model import Ridge

model_params = {'lag':24, 'max_depth':5, 'n_est':50} if MODEL==0 else {'lag':2, 'max_depth':3, 'n_est':10}

def find_best_model_regression(synthetic_data):
    
    forecaster = ForecasterAutoreg(
                     regressor = RandomForestRegressor(max_depth=model_params['max_depth'], n_estimators =model_params['n_est']),
                     lags      = model_params['lag']
                 )

    forecaster.fit(y=synthetic_data['energy'], exog=synthetic_data['temp'])

    y_test = pd.Series(base_data_test_scaled[:,:,1].reshape(-1))
    exog_var = pd.Series(base_data_test_scaled[:,:,0].reshape(-1))
    exog_var.index = exog_var.index + synthetic_data['temp'].index.max() + 1
    
    predictions = forecaster.predict(exog=exog_var, steps=len(y_test))
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2  = r2_score(y_test, predictions)

    return {'mse':mse, 'mae':mae, 'r2':r2}

In [24]:
def find_best_model_ridge_regression(synthetic_data):
    
    forecaster = ForecasterAutoreg(
                     regressor = Ridge(),
                     lags      = 2 if MODEL==0 else 12 
                 )

    forecaster.fit(y=synthetic_data['energy'], exog=synthetic_data['temp'])

    y_test = pd.Series(base_data_test_scaled[:,:,1].reshape(-1))
    exog_var = pd.Series(base_data_test_scaled[:,:,0].reshape(-1))
    exog_var.index = exog_var.index + synthetic_data['temp'].index.max() + 1
    
    predictions = forecaster.predict(exog=exog_var, steps=len(y_test))
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2  = r2_score(y_test, predictions)

    return {'mse':mse, 'mae':mae, 'r2':r2}

In [25]:
gb_params = {'lag':24, 'max_depth':5, 'n_est':10} if MODEL==0 else {'lag':12, 'max_depth':5, 'n_est':10}

def find_best_model_gb_regression(synthetic_data):
    
    forecaster = ForecasterAutoreg(
                     regressor = GradientBoostingRegressor(n_estimators=10, max_depth=5),
                     lags      = 24
                 )

    forecaster.fit(y=synthetic_data['energy'], exog=synthetic_data['temp'])

    y_test = pd.Series(base_data_test_scaled[:,:,1].reshape(-1))
    exog_var = pd.Series(base_data_test_scaled[:,:,0].reshape(-1))
    exog_var.index = exog_var.index + synthetic_data['temp'].index.max() + 1
    
    predictions = forecaster.predict(exog=exog_var, steps=len(y_test))
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2  = r2_score(y_test, predictions)

    return {'mse':mse, 'mae':mae, 'r2':r2}

<h2>VAE data</h2>

Gather results for RF regression

In [42]:
vae_results = {}
for i,b in enumerate(vae_list):
    for j,e in enumerate(b):
        result = find_best_model_regression(pd.DataFrame(e[0:1].reshape(-1,2), columns=['temp', 'energy']))
        vae_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

AttributeError: 'numpy.dtype[float64]' object has no attribute 'base_dtype'

In [6]:
sorted_models_mae = sorted(vae_results.items(), key=lambda x: x[1]['mae'])
sorted_models_mse = sorted(vae_results.items(), key=lambda x: x[1]['mse'])
sorted_models_r2 = sorted(vae_results.items(), key=lambda x: x[1]['r2'], reverse=True)

print(sorted_models_mae[0:3], '\n', sorted_models_mse[0:3], '\n', sorted_models_r2[0:3])

NameError: name 'vae_results' is not defined

In [76]:
results_df = pd.DataFrame.from_dict(vae_results, orient='index')
#results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_model_results.csv')
print(results_df)

            mse       mae        r2
b0 l0  0.802975  0.808915 -0.948941
b0 l1  1.239599  0.997915 -2.008695
b0 l2  1.541511  1.138306 -2.741480
b0 l3  0.434927  0.493573 -0.055635
b0 l4  0.937224  0.894063 -1.274784
b0 l5  0.381824  0.497964  0.073256
b0 l6  0.633665  0.743806 -0.538000
b0 l7  0.472016  0.433380 -0.145654
b1 l0  0.783712  0.819367 -0.902188
b1 l1  1.634161  1.185418 -2.966355
b1 l2  0.647438  0.732493 -0.571430
b1 l3  1.270956  1.040129 -2.084803
b1 l4  1.694646  1.205789 -3.113162
b1 l5  0.677872  0.754491 -0.645299
b1 l6  0.933418  0.905278 -1.265547
b1 l7  2.935560  1.531046 -6.125047
b2 l0  1.452638  1.054577 -2.525773
b2 l1  0.862700  0.862726 -1.093903
b2 l2  0.593291  0.717492 -0.440008
b2 l3  0.395335  0.534608  0.040462
b2 l4  0.629510  0.487633 -0.527917
b2 l5  0.523060  0.546032 -0.269545
b2 l6  0.407328  0.433120  0.011353
b2 l7  0.940584  0.884544 -1.282939
b3 l0  0.959533  0.856917 -1.328932
b3 l1  0.779043  0.806840 -0.890856
b3 l2  0.567704  0.668728 -0

Gather results for Ridge Regression

In [14]:
vae_ridge_results = {}
for i,b in enumerate(vae_list):
    for j,e in enumerate(b):
        result = find_best_model_ridge_regression(pd.DataFrame(e[0:216].reshape(-1,2), columns=['temp', 'energy']))
        vae_ridge_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

{'mse': 0.5316823816200129, 'mae': 0.6469317759379111, 'r2': 0.2215231062331604} 0 0
{'mse': 2.1508902426263625, 'mae': 1.3157294808106783, 'r2': -2.1492831299229724} 0 1
{'mse': 0.7627888909846742, 'mae': 0.7704627019450743, 'r2': -0.11685763339435429} 0 2
{'mse': 1.1546448592036584, 'mae': 0.947147059825273, 'r2': -0.690603966710188} 0 3
{'mse': 0.6451415894816627, 'mae': 0.7221206168146458, 'r2': 0.055398790741908344} 0 4
{'mse': 0.6844222083709699, 'mae': 0.5965670281198887, 'r2': -0.002114971675822863} 0 5
{'mse': 0.9098431987813336, 'mae': 0.9192763757119392, 'r2': -0.3321711075775038} 0 6
{'mse': 1.089185555845522, 'mae': 0.8547652769244078, 'r2': -0.5947599874699603} 0 7
{'mse': 0.6479996249687214, 'mae': 0.7663653139732038, 'r2': 0.05121412210297083} 1 0
{'mse': 1.2043867424337915, 'mae': 0.8919423584871022, 'r2': -0.7634348674241065} 1 1
{'mse': 0.5574151851753598, 'mae': 0.6716919384351415, 'r2': 0.1838457378038344} 1 2
{'mse': 1.4878888552344076, 'mae': 1.1211231915635809, 

In [26]:
results_df = pd.DataFrame.from_dict(vae_ridge_results, orient='index')
results_df.to_csv(f'../../data/models/model_history/VAE_ridge_{model_name}_model_results.csv')
print(results_df)

            mse       mae        r2
b0 l0  0.540741  0.659020  0.208259
b0 l1  2.311282  1.356734 -2.384124
b0 l2  0.789271  0.766803 -0.155632
b0 l3  1.122340  0.934024 -0.643305
b0 l4  0.656072  0.735864  0.039395
b0 l5  0.689371  0.650451 -0.009361
b0 l6  0.873157  0.899565 -0.278456
b0 l7  0.962786  0.781339 -0.409688
b1 l0  0.640403  0.760515  0.062338
b1 l1  1.435308  0.995641 -1.101544
b1 l2  0.556234  0.672572  0.185575
b1 l3  1.359070  1.082605 -0.989919
b1 l4  1.120469  0.955329 -0.640565
b1 l5  1.189247  1.029054 -0.741267
b1 l6  0.861819  0.887502 -0.261855
b1 l7  3.048193  1.500644 -3.463093
b2 l0  0.734541  0.671848 -0.075498
b2 l1  0.868698  0.897998 -0.271927
b2 l2  0.839879  0.879158 -0.229730
b2 l3  0.734373  0.803328 -0.075252
b2 l4  1.083259  0.744311 -0.586082
b2 l5  1.183713  0.898657 -0.733164
b2 l6  0.894916  0.697116 -0.310315
b2 l7  0.618512  0.702104  0.094390
b3 l0  0.517624  0.575607  0.242107
b3 l1  1.266980  1.057885 -0.855083
b3 l2  0.566007  0.613633  0

Gradient Boosting

In [21]:
vae_gb_results = {}
for i,b in enumerate(vae_list):
    for j,e in enumerate(b):
        indices = np.random.choice(e.shape[0], 216, replace=False)
        current_df = e[indices]
        current_df = current_df[np.random.permutation(current_df.shape[0])]
        
        result = find_best_model_gb_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        vae_gb_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

{'mse': 0.5033965193037758, 'mae': 0.6473534717375019, 'r2': -0.22181948869026757} 0 0
{'mse': 0.41680632466649126, 'mae': 0.5255730302890624, 'r2': -0.011651989948636476} 0 1
{'mse': 0.41717105550651484, 'mae': 0.4838263223220936, 'r2': -0.012537246861186402} 0 2
{'mse': 0.8665178563018816, 'mae': 0.8750140704500521, 'r2': -1.103169893967542} 0 3
{'mse': 0.5218320371281189, 'mae': 0.4425946813144205, 'r2': -0.2665652787347286} 0 4
{'mse': 0.5116300698030287, 'mae': 0.6529354357271275, 'r2': -0.24180356103748224} 0 5
{'mse': 0.672078521780003, 'mae': 0.7682576427989324, 'r2': -0.6312362210542504} 0 6
{'mse': 1.2974974064723035, 'mae': 0.9408152260155207, 'r2': -2.1492224458474665} 0 7
{'mse': 0.6775141364434528, 'mae': 0.7715963726468882, 'r2': -0.6444292799534237} 1 0
{'mse': 0.8675312118039241, 'mae': 0.6746685410533722, 'r2': -1.1056294610362207} 1 1
{'mse': 1.2225294994970795, 'mae': 1.0345745114873974, 'r2': -1.9672639970776342} 1 2
{'mse': 0.4139977883832842, 'mae': 0.52540803264

KeyboardInterrupt: 

<h2> GAN data </h2>

Gather results for RF Regression

In [77]:
gan_results = {}
for i,b in enumerate(gan_list):
    for j,e in enumerate(b):
        result = find_best_model_regression(pd.DataFrame(e[0:216].reshape(-1,2), columns=['temp', 'energy']))
        gan_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

{'mse': 1.1318198142763778, 'mae': 0.9437059766275813, 'r2': -1.7470978716366052} 0 0
{'mse': 0.6857347900629832, 'mae': 0.6353890132143729, 'r2': -0.6643820496229598} 0 1
{'mse': 2.135001345769634, 'mae': 1.2822319213842108, 'r2': -4.181971175027394} 0 2
{'mse': 1.1953543942876674, 'mae': 0.9622225242992981, 'r2': -1.9013059066284002} 1 0
{'mse': 1.4572759213966302, 'mae': 1.059243806013446, 'r2': -2.5370290673126514} 1 1
{'mse': 0.8992265473138538, 'mae': 0.821995399716797, 'r2': -1.182558834088241} 1 2
{'mse': 0.48121222192262386, 'mae': 0.6198428634935955, 'r2': -0.1679748436762738} 2 0
{'mse': 1.1985155053202392, 'mae': 0.9625005054531665, 'r2': -1.9089784012075284} 2 1
{'mse': 1.041904074324046, 'mae': 0.889671339312508, 'r2': -1.5288587714423714} 2 2
{'mse': 0.41815702255284454, 'mae': 0.5334626027377778, 'r2': -0.01493033800547594} 3 0
{'mse': 0.9087023823888308, 'mae': 0.8305913709267757, 'r2': -1.2055581189903979} 3 1
{'mse': 1.5829689512784477, 'mae': 1.100511864177276, 'r2'

In [65]:
sorted_models_mae = sorted(gan_results.items(), key=lambda x: x[1]['mae'])
sorted_models_mse = sorted(gan_results.items(), key=lambda x: x[1]['mse'])
sorted_models_r2 = sorted(gan_results.items(), key=lambda x: x[1]['r2'], reverse=True)

print(sorted_models_mae[0:3], '\n', sorted_models_mse[0:3], '\n', sorted_models_r2[0:3])

[('b6 l1', {'mse': 0.49415553969179815, 'mae': 0.5661241864354905, 'r2': 0.2764690294893397}), ('b1 l2', {'mse': 0.5338222166580122, 'mae': 0.5689453766290705, 'r2': 0.21839000987499346}), ('b8 l2', {'mse': 0.53719189977815, 'mae': 0.5709294706902118, 'r2': 0.21345619875198718})] 
 [('b4 l0', {'mse': 0.49240435120523374, 'mae': 0.5728619570504203, 'r2': 0.27903307866709715}), ('b6 l1', {'mse': 0.49415553969179815, 'mae': 0.5661241864354905, 'r2': 0.2764690294893397}), ('b3 l1', {'mse': 0.4960254090267697, 'mae': 0.586887935097929, 'r2': 0.27373121059226946})] 
 [('b4 l0', {'mse': 0.49240435120523374, 'mae': 0.5728619570504203, 'r2': 0.27903307866709715}), ('b6 l1', {'mse': 0.49415553969179815, 'mae': 0.5661241864354905, 'r2': 0.2764690294893397}), ('b3 l1', {'mse': 0.4960254090267697, 'mae': 0.586887935097929, 'r2': 0.27373121059226946})]


In [78]:
results_df = pd.DataFrame.from_dict(gan_results, orient='index')
#results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_model_results.csv')
print(results_df)

             mse       mae         r2
b0 l0   1.131820  0.943706  -1.747098
b0 l1   0.685735  0.635389  -0.664382
b0 l2   2.135001  1.282232  -4.181971
b1 l0   1.195354  0.962223  -1.901306
b1 l1   1.457276  1.059244  -2.537029
b1 l2   0.899227  0.821995  -1.182559
b2 l0   0.481212  0.619843  -0.167975
b2 l1   1.198516  0.962501  -1.908978
b2 l2   1.041904  0.889671  -1.528859
b3 l0   0.418157  0.533463  -0.014930
b3 l1   0.908702  0.830591  -1.205558
b3 l2   1.582969  1.100512  -2.842105
b4 l0   1.007881  0.876441  -1.446280
b4 l1   1.593801  1.109767  -2.868396
b4 l2   1.362691  1.026822  -2.307458
b5 l0   0.510711  0.635904  -0.239574
b5 l1   3.842008  1.708054  -8.325135
b5 l2   0.520490  0.491918  -0.263307
b6 l0   0.566191  0.458497  -0.374232
b6 l1   0.914098  0.832097  -1.218655
b6 l2   2.623453  1.484936  -5.367518
b7 l0   0.981027  0.782465  -1.381102
b7 l1   1.715567  1.161289  -3.163940
b7 l2   1.888939  1.211300  -3.584741
b8 l0   0.399780  0.458615   0.029673
b8 l1   2.02

In [96]:
vae_results = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_model_results.csv', index_col=0)
gan_results = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_model_results.csv', index_col=0)

#print(vae_results, '\n', gan_results)

In [97]:
best_vaes = vae_results['mse'].nsmallest(5)
best_gans = gan_results['mse'].nsmallest(5)

print(best_vaes)
print(best_gans)

b5 l1    0.530135
b4 l5    0.532272
b5 l4    0.536739
b0 l5    0.538737
b5 l0    0.541052
Name: mse, dtype: float64
b4 e0    0.537730
b9 e2    0.579713
b5 e0    0.582607
b0 e0    0.586430
b8 e2    0.587330
Name: mse, dtype: float64


In [105]:
batches=[4,8,16,20,24,32]
latent_dims=[3,5,10,15,20,25,30,50]

for i, scores in enumerate(best_vaes):
    print(scores, best_vaes.index[i])
    
    b, l = map(int, re.findall(r'\d+', best_vaes.index[i]))
    print(batches[b], latent_dims[l])
    
    vae = TimeVAE.load('../../data/models/model_data/',f'tVAE_{model_name}_b{batches[b]}l{latent_dims[l]}')
    samples = vae.get_prior_samples(num_samples=1000)
    
    np.save(f'../../data/vae_synthetic_data/{model_name}_b{batches[b]}l{latent_dims[l]}_generated_samples.npy', samples)

    temp, energy = scalers['temp'].inverse_transform(samples[:,:,0]), scalers['energy'].inverse_transform(samples[:,:,1])
    np.save(f'../../data/vae_synthetic_data/{model_name}_b{batches[b]}l{latent_dims[l]}_rescaled_samples.npy', np.stack((temp, energy),axis=-1))

0.5301347479816934 b5 l1
32 5
0.5322723807857386 b4 l5
24 25
0.5367392105418513 b5 l4
32 20
0.5387367736137983 b0 l5
4 25
0.5410523136136406 b5 l0
32 3


In [74]:
batches=[2,4,6,8,10,12,16,20,24,32]
epochs=[100,500,1000]

for i, scores in enumerate(best_gans):
    print(scores, best_gans.index[i])
    
    b, e = map(int, re.findall(r'\d+', best_gans.index[i]))
    print(batches[b], epochs[e])
    
    dgan = DGAN.load(f'../../data/models/model_data/DGAN_{model_name}_b{batches[b]}_e{epochs[e]}.zip')
    attributes, samples = dgan.generate_numpy(1000)
    
    np.save(f'../../data/gan_synthetic_data/{model_name}_b{batches[b]}e{epochs[e]}_generated_samples.npy', samples)

    temp, energy = scalers['temp'].inverse_transform(samples[:,:,0]), scalers['energy'].inverse_transform(samples[:,:,1])
    np.save(f'../../data/gan_synthetic_data/{model_name}_b{batches[b]}e{epochs[e]}_rescaled_samples.npy', np.stack((temp, energy),axis=-1))

0.4127741324562375 b9 e2
32 1000
0.4145561139925519 b7 e0
20 100
0.4220499112934464 b9 e1
32 500
0.4250164590716146 b4 e2
10 1000
0.4274020755166289 b4 e0
10 100


In [91]:
unscaled = (np.load('../../data/models/model_data/th_v_air_l15b8_samples.npy')[:1000,:,:])
rescaled = (np.load('../../data/models/model_data/th_v_air_l15b8_scaled_samples.npy')[:1000,:,:])

print(unscaled.shape, rescaled.shape)
np.save('../../data/vae_synthetic_data/th_v_air_b8l15_generated_samples.npy', unscaled)
np.save('../../data/vae_synthetic_data/th_v_air_b8l15_rescaled_samples.npy', rescaled)

(1000, 730, 2) (1000, 730, 2)


In [20]:
print(np.concatenate((base_data_train_scaled, base_data_test_scaled[0:216]), axis=0).shape)

(120, 730, 2)


<h1> Blended datasets </h1>
<h2> VAE data </h2>

RF regression

In [54]:
vae_blended_results = {}
for i,b in enumerate(vae_list):
    for j,e in enumerate(b):
        indices = np.random.choice(e.shape[0], 216, replace=False)
        current_df = np.concatenate((e[indices], base_data_train_scaled), axis=0)
        current_df = current_df[np.random.permutation(current_df.shape[0])]
        
        result = find_best_model_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        vae_blended_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

{'mse': 0.7572197536812194, 'mae': 0.8344414357052574, 'r2': -0.10870343295659568} 0 0
{'mse': 10.646028456685217, 'mae': 3.1564175107114005, 'r2': -14.587665588356472} 0 1
{'mse': 0.8554545520142225, 'mae': 0.892906332547314, 'r2': -0.2525365244972193} 0 2
{'mse': 0.7017839368849638, 'mae': 0.6860350591273817, 'r2': -0.0275356080392386} 0 3
{'mse': 0.6947126487089319, 'mae': 0.7702733056545327, 'r2': -0.01718199346687954} 0 4
{'mse': 0.7307868377690802, 'mae': 0.66834370740485, 'r2': -0.07000097640766167} 0 5
{'mse': 0.7002226664500458, 'mae': 0.6873909588946637, 'r2': -0.025249632425749846} 0 6
{'mse': 0.7085066846432759, 'mae': 0.6810222531274811, 'r2': -0.03737889789308424} 0 7
{'mse': 0.7872948509844427, 'mae': 0.8552083275120448, 'r2': -0.15273868621627873} 1 0
{'mse': 0.6842861174701119, 'mae': 0.717636091551701, 'r2': -0.001915710565369988} 1 1
{'mse': 0.6821218843925435, 'mae': 0.7891688413348992, 'r2': 0.0012531088310512306} 1 2
{'mse': 0.731722733568559, 'mae': 0.81352044930

In [56]:
results_df = pd.DataFrame.from_dict(vae_blended_results, orient='index')
results_df.to_csv(f'../../data/models/model_history/VAE_blended_{model_name}_model_results.csv')
print(results_df)

             mse       mae         r2
b0 l0   0.757220  0.834441  -0.108703
b0 l1  10.646028  3.156418 -14.587666
b0 l2   0.855455  0.892906  -0.252537
b0 l3   0.701784  0.686035  -0.027536
b0 l4   0.694713  0.770273  -0.017182
b0 l5   0.730787  0.668344  -0.070001
b0 l6   0.700223  0.687391  -0.025250
b0 l7   0.708507  0.681022  -0.037379
b1 l0   0.787295  0.855208  -0.152739
b1 l1   0.684286  0.717636  -0.001916
b1 l2   0.682122  0.789169   0.001253
b1 l3   0.731723  0.813520  -0.071371
b1 l4   0.765917  0.840180  -0.121438
b1 l5   1.424066  1.095152  -1.085084
b1 l6   0.735191  0.816548  -0.076449
b1 l7   0.727240  0.809457  -0.064808
b2 l0   1.362755  1.046308  -0.995314
b2 l1   3.536141  1.689131  -4.177535
b2 l2   0.696891  0.690495  -0.020371
b2 l3   0.693827  0.768706  -0.015886
b2 l4   0.713818  0.677494  -0.045155
b2 l5   0.960525  0.618117  -0.406379
b2 l6   0.685523  0.748828  -0.003727
b2 l7   0.697374  0.690076  -0.021079
b3 l0   0.794436  0.858813  -0.163194
b3 l1   0.75

Ridge Regression

In [13]:
vae_blended_ridge_results = {}
for i,b in enumerate(vae_list):
    for j,e in enumerate(b):
        indices = np.random.choice(e.shape[0], 216, replace=False)
        current_df = np.concatenate((e[indices], base_data_train_scaled), axis=0)
        current_df = current_df[np.random.permutation(current_df.shape[0])]
        
        result = find_best_model_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        vae_blended_ridge_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

{'mse': 0.6076678900631831, 'mae': 0.6444437762093875, 'r2': 0.1102669039796711} 0 0
{'mse': 1.592182926767665, 'mae': 1.1436222688579112, 'r2': -1.3312369602356453} 0 1
{'mse': 7.172314410972206, 'mae': 2.5474176495335463, 'r2': -9.501534820018222} 0 2
{'mse': 0.7207652066639981, 'mae': 0.8032258650635606, 'r2': -0.05532753880119046} 0 3
{'mse': 1.733621349994804, 'mae': 1.183038609078647, 'r2': -1.5383277877286554} 0 4
{'mse': 0.8426561088830234, 'mae': 0.8859102718030022, 'r2': -0.23379734374146488} 0 5
{'mse': 1.1423746370665273, 'mae': 1.0092681367539702, 'r2': -0.6726381947655957} 0 6
{'mse': 0.773568217737622, 'mae': 0.6522940676259624, 'r2': -0.13264047122690426} 0 7
{'mse': 0.793628002327389, 'mae': 0.8595393306206024, 'r2': -0.1620115381211884} 1 0
{'mse': 1.5427813446869616, 'mae': 1.1292643231380721, 'r2': -1.2589043204965331} 1 1
{'mse': 0.6090720587587108, 'mae': 0.6407971156235748, 'r2': 0.10821095305444317} 1 2
{'mse': 0.9141732838964529, 'mae': 0.9211418126504651, 'r2'

In [None]:
results_df = pd.DataFrame.from_dict(vae_blended_ridge_results, orient='index')
results_df.to_csv(f'../../data/models/model_history/VAE_blended_ridge_{model_name}_model_results.csv')
print(results_df)

<h2> GAN data </h2>

In [37]:
gan_blended_results = {}
for i,b in enumerate(gan_list):
    for j,e in enumerate(b):
        indices = np.random.choice(e.shape[0], 216, replace=False)
        current_df = np.concatenate((e[indices], base_data_train_scaled), axis=0)
        current_df = current_df[np.random.permutation(current_df.shape[0])]
        
        result = find_best_model_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        gan_blended_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

{'mse': 1.1729881693802657, 'mae': 1.0180147229334828, 'r2': -0.7174618119601601} 0 0
{'mse': 1.0324203616713927, 'mae': 0.6114323760344877, 'r2': -0.5116457193234374} 0 1
{'mse': 0.6517351403218136, 'mae': 0.5733436953824708, 'r2': 0.045744667990474386} 0 2
{'mse': 0.7975964015063317, 'mae': 0.6651258788956244, 'r2': -0.16782197527849507} 1 0
{'mse': 0.7744893268889329, 'mae': 0.8023321650766966, 'r2': -0.13398913767838216} 1 1
{'mse': 0.6747063042453426, 'mae': 0.7475115823449245, 'r2': 0.012110827645961608} 1 2
{'mse': 0.618881366692569, 'mae': 0.6858990760462009, 'r2': 0.09384839406370737} 2 0
{'mse': 0.6161636148084032, 'mae': 0.5951320466097083, 'r2': 0.09782766273604493} 2 1
{'mse': 0.7321677701918994, 'mae': 0.7945904834259836, 'r2': -0.0720229053264696} 2 2
{'mse': 0.6771373161856715, 'mae': 0.6902162249645797, 'r2': 0.008551396885342899} 3 0
{'mse': 0.5877356693097961, 'mae': 0.567438474253315, 'r2': 0.1394511948916487} 3 1
{'mse': 0.6349692982137977, 'mae': 0.540113203389762

In [38]:
results_df = pd.DataFrame.from_dict(gan_blended_results, orient='index')
results_df.to_csv(f'../../data/models/model_history/DGAN_blended_{model_name}_model_results.csv')
print(results_df)

            mse       mae        r2
b0 l0  1.172988  1.018015 -0.717462
b0 l1  1.032420  0.611432 -0.511646
b0 l2  0.651735  0.573344  0.045745
b1 l0  0.797596  0.665126 -0.167822
b1 l1  0.774489  0.802332 -0.133989
b1 l2  0.674706  0.747512  0.012111
b2 l0  0.618881  0.685899  0.093848
b2 l1  0.616164  0.595132  0.097828
b2 l2  0.732168  0.794590 -0.072023
b3 l0  0.677137  0.690216  0.008551
b3 l1  0.587736  0.567438  0.139451
b3 l2  0.634969  0.540113  0.070293
b4 l0  0.544956  0.589113  0.202088
b4 l1  0.645865  0.570175  0.054339
b4 l2  0.640623  0.721513  0.062014
b5 l0  0.683648  0.721462 -0.000982
b5 l1  1.815005  1.194672 -1.657487
b5 l2  0.732444  0.662778 -0.072428
b6 l0  1.230147  1.010555 -0.801152
b6 l1  0.565534  0.594718  0.171959
b6 l2  3.853646  1.780066 -4.642419
b7 l0  1.142624  0.677972 -0.673003
b7 l1  0.762847  0.717744 -0.116942
b7 l2  0.658412  0.711098  0.035969
b8 l0  1.117689  0.966410 -0.636495
b8 l1  0.636254  0.566717  0.068411
b8 l2  0.569608  0.530820  0

Ridge Regression

In [15]:
gan_blended_ridge_results = {}
for i,b in enumerate(gan_list):
    for j,e in enumerate(b):
        indices = np.random.choice(e.shape[0], 216, replace=False)
        current_df = np.concatenate((e[indices], base_data_train_scaled), axis=0)
        current_df = current_df[np.random.permutation(current_df.shape[0])]
        
        result = find_best_model_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        gan_blended_ridge_results[f'b{i} l{j}'] = result
        print(result, f'{i} {j}')

{'mse': 1.1517205294797828, 'mae': 1.0124321591763965, 'r2': -0.6863222316019899} 0 0
{'mse': 1.0280635867724421, 'mae': 0.611764284844543, 'r2': -0.5052666315307555} 0 1
{'mse': 0.6689791707097759, 'mae': 0.5988740490006627, 'r2': 0.020496362467279594} 0 2
{'mse': 0.7172774327015325, 'mae': 0.7396269522567561, 'r2': -0.05022082183195731} 1 0
{'mse': 0.8269622665482889, 'mae': 0.8262025863936181, 'r2': -0.21081878726798342} 1 1
{'mse': 0.7678227290435488, 'mae': 0.8298105939344292, 'r2': -0.12422806121228924} 1 2
{'mse': 0.6616312392407856, 'mae': 0.7399278140925395, 'r2': 0.0312550316715553} 2 0
{'mse': 0.6162025365747856, 'mae': 0.5954199965645762, 'r2': 0.09777067439706522} 2 1
{'mse': 0.8996113563964886, 'mae': 0.8740264810836498, 'r2': -0.3171898835373237} 2 2
{'mse': 1.03204477857213, 'mae': 0.9326751636190002, 'r2': -0.5110957993438165} 3 0
{'mse': 1.1498197099186733, 'mae': 0.9585173162581292, 'r2': -0.6835390961085117} 3 1
{'mse': 0.6806001901016873, 'mae': 0.5470581188647348,

In [None]:
results_df = pd.DataFrame.from_dict(gan_blended_ridge_results, orient='index')
results_df.to_csv(f'../../data/models/model_history/DGAN_blended_ridge_{model_name}_model_results.csv')
print(results_df)

<h1> Run all 3 regressors on all datasets </h1>

In [62]:
vae_rf_results = {}
vae_ridge_results = {}
vae_gb_results = {}
vae_rf_blended_results = {}
vae_ridge_blended_results = {}
vae_gb_blended_results = {}

for i,b in enumerate(vae_list):
    for j,e in enumerate(b):
        indices = np.random.choice(e.shape[0], 216, replace=False)
        current_df = e[indices]
        current_df_blended = np.concatenate((e[indices], base_data_train_scaled), axis=0)
        
        current_df = current_df[np.random.permutation(current_df.shape[0])]
        current_df_blended = current_df_blended[np.random.permutation(current_df_blended.shape[0])]
        
        
        rf_result = find_best_model_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        ridge_result = find_best_model_ridge_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        gb_result = find_best_model_gb_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))

        rf_blended_result = find_best_model_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))
        ridge_blended_result = find_best_model_ridge_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))
        gb_blended_result = find_best_model_gb_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))

        vae_rf_results[f'b{i} l{j}'] = rf_result
        vae_ridge_results[f'b{i} l{j}'] = ridge_result
        vae_gb_results[f'b{i} l{j}'] = gb_result
        vae_rf_blended_results[f'b{i} l{j}'] = rf_blended_result
        vae_ridge_blended_results[f'b{i} l{j}'] = ridge_blended_result
        vae_gb_blended_results[f'b{i} l{j}'] = gb_blended_result
        
        print(rf_result, f'{i} {j}')
        print(ridge_result, f'{i} {j}')
        print(gb_result, f'{i} {j}')
        print(rf_blended_result, f'{i} {j}')
        print(ridge_blended_result, f'{i} {j}')
        print(gb_blended_result, f'{i} {j}')

rf_results_df = pd.DataFrame.from_dict(vae_rf_results, orient='index')
rf_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_rf_model_results.csv')
ridge_results_df = pd.DataFrame.from_dict(vae_ridge_results, orient='index')
ridge_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_ridge_model_results.csv')
gb_results_df = pd.DataFrame.from_dict(vae_gb_results, orient='index')
gb_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_gb_model_results.csv')
rf_blended_results_df = pd.DataFrame.from_dict(vae_rf_blended_results, orient='index')
rf_blended_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_rf_blended_model_results.csv')
ridge_blended_results_df = pd.DataFrame.from_dict(vae_ridge_blended_results, orient='index')
ridge_blended_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_ridge_blended_model_results.csv')
gb_blended_results_df = pd.DataFrame.from_dict(vae_gb_blended_results, orient='index')
gb_blended_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_gb_blended_model_results.csv')

gan_rf_results = {}
gan_ridge_results = {}
gan_gb_results = {}
gan_rf_blended_results = {}
gan_ridge_blended_results = {}
gan_gb_blended_results = {}

for i,b in enumerate(gan_list):
    for j,e in enumerate(b):
        indices = np.random.choice(e.shape[0], 216, replace=False)
        current_df = e[indices]
        current_df_blended = np.concatenate((e[indices], base_data_train_scaled), axis=0)
        
        current_df = current_df[np.random.permutation(current_df.shape[0])]
        current_df_blended = current_df_blended[np.random.permutation(current_df_blended.shape[0])]
        
        
        rf_result = find_best_model_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        ridge_result = find_best_model_ridge_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        gb_result = find_best_model_gb_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))

        rf_blended_result = find_best_model_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))
        ridge_blended_result = find_best_model_ridge_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))
        gb_blended_result = find_best_model_gb_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))

        gan_rf_results[f'b{i} l{j}'] = rf_result
        gan_ridge_results[f'b{i} l{j}'] = ridge_result
        gan_gb_results[f'b{i} l{j}'] = gb_result
        gan_rf_blended_results[f'b{i} l{j}'] = rf_blended_result
        gan_ridge_blended_results[f'b{i} l{j}'] = ridge_blended_result
        gan_gb_blended_results[f'b{i} l{j}'] = gb_blended_result
        
        print(rf_result, f'{i} {j}')
        print(ridge_result, f'{i} {j}')
        print(gb_result, f'{i} {j}')
        print(rf_blended_result, f'{i} {j}')
        print(ridge_blended_result, f'{i} {j}')
        print(gb_blended_result, f'{i} {j}')

rf_results_df = pd.DataFrame.from_dict(gan_rf_results, orient='index')
rf_results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_rf_model_results.csv')
ridge_results_df = pd.DataFrame.from_dict(gan_ridge_results, orient='index')
ridge_results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_ridge_model_results.csv')
gb_results_df = pd.DataFrame.from_dict(gan_gb_results, orient='index')
gb_results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_gb_model_results.csv')
rf_blended_results_df = pd.DataFrame.from_dict(gan_rf_blended_results, orient='index')
rf_blended_results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_rf_blended_model_results.csv')
ridge_blended_results_df = pd.DataFrame.from_dict(gan_ridge_blended_results, orient='index')
ridge_blended_results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_ridge_blended_model_results.csv')
gb_blended_results_df = pd.DataFrame.from_dict(gan_gb_blended_results, orient='index')
gb_blended_results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_gb_blended_model_results.csv')


{'mse': 0.4278005065555019, 'mae': 0.4705598059579195, 'r2': -0.038336532211219465} 0 0
{'mse': 0.5909687825414841, 'mae': 0.6797059269881689, 'r2': -0.43437061645835295} 0 0
{'mse': 0.5031241661606956, 'mae': 0.6471650323045395, 'r2': -0.22115844641988858} 0 0
{'mse': 0.413990033615664, 'mae': 0.5266300906016675, 'r2': -0.004816425617592213} 0 0
{'mse': 0.4796396015228001, 'mae': 0.5847320994596024, 'r2': -0.16415785611451228} 0 0
{'mse': 0.4289484540867857, 'mae': 0.5624957399277004, 'r2': -0.041122774491274106} 0 0
{'mse': 0.5263429694521654, 'mae': 0.4452642796628963, 'r2': -0.27751399374234476} 0 1
{'mse': 0.410940995351471, 'mae': 0.510413577107855, 'r2': 0.0025840514022047456} 0 1
{'mse': 0.41288631542962667, 'mae': 0.5201136629250019, 'r2': -0.002137534647929984} 0 1
{'mse': 0.41434973962585187, 'mae': 0.5412771978536624, 'r2': -0.005689486508149155} 0 1
{'mse': 0.40965331239526837, 'mae': 0.47801405609680225, 'r2': 0.0057094526928584965} 0 1
{'mse': 0.41092594113547626, 'mae':

In [99]:
best_vaes_rf, best_gans_rf = {}, {}

df1 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_rf_model_results.csv', index_col=0)
df2 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_rf_blended_model_results.csv', index_col=0)
df3 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_rf_model_results.csv', index_col=0)
df4 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_rf_blended_model_results.csv', index_col=0)

print('VAEs')
print(df1.nsmallest(5, 'mse'))
print(df2.nsmallest(5, 'mse'))

print('GANs')
print(df3.nsmallest(5, 'mse'))
print(df4.nsmallest(5, 'mse'))

best_vaes_rf['b3 l0'] = df1.loc['b3 l0'].to_dict()
best_vaes_rf['b4 l7'] = df1.loc['b4 l7'].to_dict()
best_vaes_rf['b2 l2'] = df1.loc['b2 l2'].to_dict()

best_vaes_rf['b2 l1'] = df2.loc['b2 l1'].to_dict()
best_vaes_rf['b0 l2'] = df2.loc['b0 l2'].to_dict()

best_gans_rf['b7 l0'] = df3.loc['b7 l0'].to_dict()
best_gans_rf['b9 l1'] = df3.loc['b9 l1'].to_dict()

best_gans_rf['b9 l0'] = df4.loc['b9 l0'].to_dict()
best_gans_rf['b9 l1'] = df4.loc['b9 l1'].to_dict()
print(best_vaes_rf, best_gans_rf)

VAEs
            mse       mae        r2
b3 l0  0.389694  0.471982  0.054153
b4 l7  0.401051  0.457907  0.026589
b2 l2  0.408385  0.521762  0.008789
b0 l2  0.413284  0.492527 -0.003103
b2 l1  0.413749  0.495409 -0.004232
            mse       mae        r2
b2 l1  0.413411  0.494864 -0.003411
b0 l2  0.413983  0.529208 -0.004799
b0 l0  0.413990  0.526630 -0.004816
b0 l1  0.414350  0.541277 -0.005689
b4 l6  0.418079  0.489991 -0.014742
GANs
            mse       mae        r2
b7 l0  0.412122  0.491646 -0.000282
b9 l1  0.421365  0.477685 -0.022716
b4 l2  0.421469  0.478430 -0.022969
b3 l2  0.427253  0.466272 -0.037008
b4 l0  0.431227  0.448274 -0.046654
            mse       mae        r2
b9 l1  0.413833  0.485921 -0.004435
b9 l0  0.417204  0.485411 -0.012618
b2 l0  0.417715  0.485491 -0.013858
b4 l0  0.421006  0.476670 -0.021846
b1 l0  0.425723  0.507436 -0.033293
{'b3 l0': {'mse': 0.3896943752146132, 'mae': 0.4719817929730492, 'r2': 0.0541528119250531}, 'b4 l7': {'mse': 0.401050807720536

In [100]:
best_vaes_ridge, best_gans_ridge = {}, {}

df1 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_ridge_model_results.csv', index_col=0)
df2 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_ridge_blended_model_results.csv', index_col=0)
df3 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_ridge_model_results.csv', index_col=0)
df4 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_ridge_blended_model_results.csv', index_col=0)

print('VAEs')
print(df1.nsmallest(5, 'mse'))
print(df2.nsmallest(5, 'mse'))

print('GANs')
print(df3.nsmallest(5, 'mse'))
print(df4.nsmallest(5, 'mse'))

best_vaes_ridge['b4 l3'] = df1.loc['b4 l3'].to_dict()
best_vaes_ridge['b3 l1'] = df1.loc['b3 l1'].to_dict()
best_vaes_ridge['b3 l0'] = df1.loc['b3 l0'].to_dict()
best_vaes_ridge['b0 l1'] = df1.loc['b0 l1'].to_dict()

best_gans_ridge['b9 l1'] = df3.loc['b9 l1'].to_dict()
best_gans_ridge['b3 l2'] = df3.loc['b3 l2'].to_dict()
best_gans_ridge['b3 l1'] = df3.loc['b3 l1'].to_dict()
print(best_vaes_ridge, best_gans_ridge)

VAEs
            mse       mae        r2
b4 l3  0.383215  0.454014  0.069878
b3 l1  0.392434  0.442947  0.047504
b3 l0  0.395214  0.496027  0.040757
b0 l1  0.410941  0.510414  0.002584
b2 l1  0.415346  0.487349 -0.008107
            mse       mae        r2
b4 l3  0.398085  0.450817  0.033786
b3 l0  0.398478  0.474786  0.032833
b3 l1  0.400564  0.436736  0.027770
b0 l1  0.409653  0.478014  0.005709
b2 l1  0.421660  0.468380 -0.023434
GANs
            mse       mae        r2
b9 l1  0.404208  0.454003  0.018927
b3 l2  0.418689  0.435116 -0.016222
b3 l1  0.440414  0.480317 -0.068951
b8 l1  0.447845  0.452056 -0.086987
b1 l0  0.448544  0.477123 -0.088685
            mse       mae        r2
b9 l1  0.417073  0.455209 -0.012299
b9 l0  0.424017  0.441309 -0.029153
b2 l0  0.432736  0.485952 -0.050317
b6 l0  0.434442  0.444671 -0.054456
b3 l2  0.435375  0.449144 -0.056721
{'b4 l3': {'mse': 0.3832154464349572, 'mae': 0.45401444687402, 'r2': 0.0698781519805789}, 'b3 l1': {'mse': 0.3924337086081557,

In [102]:
best_vaes_gb, best_gans_gb = {}, {}

df1 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_gb_model_results.csv', index_col=0)
df2 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_gb_blended_model_results.csv', index_col=0)
df3 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_gb_model_results.csv', index_col=0)
df4 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_gb_blended_model_results.csv', index_col=0)

print('VAEs')
print(df1.nsmallest(5, 'mse'))
print(df2.nsmallest(5, 'mse'))

print('GANs')
print(df3.nsmallest(5, 'mse'))
print(df4.nsmallest(5, 'mse'))

best_vaes_gb['b1 l4'] = df1.loc['b1 l4'].to_dict()
best_vaes_gb['b1 l3'] = df1.loc['b1 l3'].to_dict()

best_vaes_gb['b0 l1'] = df2.loc['b0 l1'].to_dict()

best_gans_gb['b7 l0'] = df3.loc['b7 l0'].to_dict()
best_gans_gb['b2 l0'] = df3.loc['b2 l0'].to_dict()

best_gans_gb['b9 l1'] = df4.loc['b9 l1'].to_dict()
print(best_vaes_gb, best_gans_gb)

VAEs
            mse       mae        r2
b1 l4  0.412233  0.505275 -0.000551
b1 l3  0.412636  0.519228 -0.001529
b3 l0  0.412697  0.519343 -0.001677
b0 l1  0.412886  0.520114 -0.002138
b3 l6  0.413757  0.496415 -0.004252
            mse       mae        r2
b0 l1  0.410926  0.502542  0.002621
b3 l0  0.413277  0.497512 -0.003087
b1 l4  0.413711  0.495347 -0.004138
b1 l3  0.414003  0.493890 -0.004847
b2 l1  0.417815  0.484520 -0.014101
GANs
            mse       mae        r2
b7 l0  0.418620  0.478271 -0.016053
b2 l0  0.419903  0.491859 -0.019168
b1 l0  0.427780  0.475232 -0.038287
b9 l1  0.428881  0.476591 -0.040960
b9 l2  0.432763  0.467903 -0.050380
            mse       mae        r2
b2 l0  0.416775  0.488148 -0.011577
b9 l1  0.419293  0.475526 -0.017689
b0 l2  0.423377  0.488698 -0.027601
b8 l1  0.428707  0.469714 -0.040537
b7 l0  0.429885  0.469077 -0.043395
{'b1 l4': {'mse': 0.4122325403543621, 'mae': 0.5052750487861299, 'r2': -0.0005507236598782}, 'b1 l3': {'mse': 0.41263555893537

In [166]:
batches=[4,8,16,20,24,32]
latent_dims=[3,5,10,15,20,25,30,50]
vaes = [best_vaes_rf, best_vaes_ridge, best_vaes_gb]

best_vaes = set()
for vae in vaes:
    for name, scores in vae.items():
        b, l = map(int, re.findall(r'\d+', name))
        best_vaes.add((batches[b], latent_dims[l]))
        print(scores['mse'], batches[b], latent_dims[l])
        
print(best_vaes)

0.3896943752146132 20 3
0.4010508077205366 24 50
0.4083846614588541 16 10
0.4134110183424701 16 5
0.4139828193218255 4 10
0.3832154464349572 24 15
0.3924337086081557 20 5
0.3952135133108125 20 3
0.410940995351471 4 5
0.4122325403543621 8 20
0.412635558935374 8 15
0.4109259411354762 4 5
{(20, 5), (4, 10), (16, 10), (8, 20), (4, 5), (16, 5), (20, 3), (24, 15), (8, 15), (24, 50)}


In [157]:
for b, l in best_vaes:
    vae = TimeVAE.load('../../data/models/model_data/',f'tVAE_{model_name}_b{b}l{l}')
    samples = vae.get_prior_samples(num_samples=1000)
    
    temp, energy = scalers['temp'].inverse_transform(samples[:,:,0]), scalers['energy'].inverse_transform(samples[:,:,1])
    rescaled_samples = np.stack((temp, energy), axis=-1)

    print(b, l)
    #np.save(f'../../data/vae_synthetic_data/{model_name}_b{b}l{l}_generated_samples.npy', samples)
    #np.save(f'../../data/vae_synthetic_data/{model_name}_b{b}l{l}_rescaled_samples.npy', np.stack((temp, energy),axis=-1))

20 5
4 10
16 10
8 20
4 5
16 5
20 3
24 15
8 15
24 50


In [165]:
batches=[2,4,6,8,10,12,16,20,24,32]
epochs=[100,500,1000]
gans = [best_gans_rf, best_gans_ridge, best_gans_gb]

best_gans = set()
for gan in gans:
    for name, scores in gan.items():
        b, e = map(int, re.findall(r'\d+', name))
        best_gans.add((batches[b], epochs[e]))
        print(scores['mse'], batches[b], epochs[e])
print(best_gans)

0.4121220081949123 20 100
0.4138326980328993 32 500
0.4172043901724792 32 100
0.4042075756887816 32 500
0.4186890486734469 8 1000
0.4404139007071863 8 500
0.4186195987380705 20 100
0.4199031249726431 6 100
0.4192934185623982 32 500
{(32, 500), (8, 1000), (20, 100), (32, 100), (6, 100), (8, 500)}


In [160]:
for b, e in best_gans:
    dgan = DGAN.load(f'../../data/models/model_data/DGAN_{model_name}_b{b}_e{e}')
    attributes, samples = dgan.generate_numpy(1000)
    
    temp, energy = scalers['temp'].inverse_transform(samples[:,:,0]), scalers['energy'].inverse_transform(samples[:,:,1])
    rescaled_samples = np.stack((temp, energy),axis=-1)

    print(b, e)
    #np.save(f'../../data/gan_synthetic_data/{model_name}_b{b}e{e}_generated_samples.npy', samples)
    #np.save(f'../../data/gan_synthetic_data/{model_name}_b{b}e{e}_rescaled_samples.npy', rescaled_samples)