In [26]:
import pickle
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller
import datetime
import re
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import seaborn as sns
import scipy.stats as stats
from gretel_synthetics.timeseries_dgan.dgan import DGAN
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from timeVAE.vae_dense_model import VariationalAutoencoderDense as VAE_Dense
from timeVAE.vae_conv_model import VariationalAutoencoderConv as VAE_Conv
from timeVAE.vae_conv_I_model import VariationalAutoencoderConvInterpretable as TimeVAE
from timeVAE import utils
from sklearn.preprocessing import MinMaxScaler

pd.set_option('display.precision', 4)

In [32]:
MODEL = 1

In [33]:
index = [0,3] if MODEL==0 else [1,2]
model_name = 'th_v_air' if MODEL==0 else 'el_v_sky'

base_data_train, base_data_test = np.load('../../data/training_data/training_data_1month.npy', allow_pickle=True)

base_data_train, base_data_test = base_data_train[:,:,index], base_data_test[:,:,index]
print(base_data_train.shape)
print(model_name)

(108, 730, 2)
el_v_sky


In [24]:
from sklearn.preprocessing import MinMaxScaler

scalers = {var_name: MinMaxScaler(feature_range=(-1,1)) for var_name in ['temp', 'energy']}

temp_var, energy_var = base_data_train[:,:,0], base_data_train[:,:,1]
temp_var_test, energy_var_test = base_data_test[:,:,0], base_data_test[:,:,1]

temp_var, temp_var_test = scalers['temp'].fit_transform(temp_var), scalers['temp'].fit_transform(temp_var_test)
energy_var, energy_var_test = scalers['energy'].fit_transform(energy_var), scalers['energy'].fit_transform(energy_var_test)

base_data_train_scaled, base_data_test_scaled = np.stack((temp_var, energy_var), axis=-1), np.stack((temp_var_test, energy_var_test), axis=-1)
print(base_data_train_scaled.shape, base_data_test_scaled.shape)

(108, 730, 2) (12, 730, 2)


<h3> Load Models </h3>

Load in tVAE models

In [19]:
batches=[4,8,16,20,24,32]
latent_dims=[3,5,10,15,20,25,30,50]

vae_list = []
for b in batches:
    latent_dim_list = []
    for l in latent_dims:
        samples = TimeVAE.load('../../data/models/model_data/',f'tVAE_{model_name}_b{b}l{l}')
        sample = samples.get_prior_samples(num_samples=1000)
        latent_dim_list.append(sample)
    vae_list.append(latent_dim_list)



In [22]:
print(len(vae_list), len(vae_list[0]), vae_list[0][0].shape)

6 8 (1000, 730, 2)


Load in DGAN models

In [10]:
import tensorflow as tf
import torch
# Hide GPU from visible devices
#tf.config.set_visible_devices([], 'GPU')
tf.config.get_visible_devices()

device(type='cpu')

In [25]:
batches=[2,4,6,8,10,12,16,20,24,32]
epochs=[100,500,1000]


gan_list = []
for b in batches:
    epoch_list = []
    for e in epochs:
        try:
            model = DGAN.load(f'../../data/models/model_data/DGAN_{model_name}_b{b}_e{e}.zip')
        except:
            model = DGAN.load(f'../../data/models/model_data/DGAN_{model_name}_b{b}_e{e}', map_location=torch('cpu'))
            
        attributes, samples = model.generate_numpy(1000)
        epoch_list.append(samples)
    gan_list.append(epoch_list)

In [26]:
print(len(gan_list), len(gan_list[0]), gan_list[0][0].shape)

10 3 (1000, 730, 2)


<h2> Regression Models </h2>

In [27]:
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import grid_search_forecaster 
from sklearn.linear_model import Ridge

model_params = {'lag':24, 'max_depth':5, 'n_est':50} if MODEL==0 else {'lag':2, 'max_depth':3, 'n_est':10}

def find_best_model_regression(synthetic_data):
    
    forecaster = ForecasterAutoreg(
                     regressor = RandomForestRegressor(max_depth=model_params['max_depth'], n_estimators =model_params['n_est']),
                     lags      = model_params['lag']
                 )

    forecaster.fit(y=synthetic_data['energy'], exog=synthetic_data['temp'])

    y_test = pd.Series(base_data_test_scaled[:,:,1].reshape(-1))
    exog_var = pd.Series(base_data_test_scaled[:,:,0].reshape(-1))
    exog_var.index = exog_var.index + synthetic_data['temp'].index.max() + 1
    
    predictions = forecaster.predict(exog=exog_var, steps=len(y_test))
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2  = r2_score(y_test, predictions)

    return {'mse':mse, 'mae':mae, 'r2':r2}

In [28]:
def find_best_model_ridge_regression(synthetic_data):
    
    forecaster = ForecasterAutoreg(
                     regressor = Ridge(),
                     lags      = 2 if MODEL==0 else 12 
                 )

    forecaster.fit(y=synthetic_data['energy'], exog=synthetic_data['temp'])

    y_test = pd.Series(base_data_test_scaled[:,:,1].reshape(-1))
    exog_var = pd.Series(base_data_test_scaled[:,:,0].reshape(-1))
    exog_var.index = exog_var.index + synthetic_data['temp'].index.max() + 1
    
    predictions = forecaster.predict(exog=exog_var, steps=len(y_test))
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2  = r2_score(y_test, predictions)

    return {'mse':mse, 'mae':mae, 'r2':r2}

In [29]:
gb_params = {'lag':24, 'max_depth':5, 'n_est':10} if MODEL==0 else {'lag':12, 'max_depth':5, 'n_est':10}

def find_best_model_gb_regression(synthetic_data):
    
    forecaster = ForecasterAutoreg(
                     regressor = GradientBoostingRegressor(n_estimators=10, max_depth=5),
                     lags      = 24
                 )

    forecaster.fit(y=synthetic_data['energy'], exog=synthetic_data['temp'])

    y_test = pd.Series(base_data_test_scaled[:,:,1].reshape(-1))
    exog_var = pd.Series(base_data_test_scaled[:,:,0].reshape(-1))
    exog_var.index = exog_var.index + synthetic_data['temp'].index.max() + 1
    
    predictions = forecaster.predict(exog=exog_var, steps=len(y_test))
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2  = r2_score(y_test, predictions)

    return {'mse':mse, 'mae':mae, 'r2':r2}

<h2> Run all 3 regressors on all datasets </h2>

<h3> VAEs </h3>

In [30]:
vae_rf_results = {}
vae_ridge_results = {}
vae_gb_results = {}
vae_rf_blended_results = {}
vae_ridge_blended_results = {}
vae_gb_blended_results = {}

for i,b in enumerate(vae_list):
    for j,e in enumerate(b):
        indices = np.random.choice(e.shape[0], 216, replace=False)
        current_df = e[indices]
        current_df_blended = np.concatenate((e[indices], base_data_train_scaled), axis=0)
        
        current_df = current_df[np.random.permutation(current_df.shape[0])]
        current_df_blended = current_df_blended[np.random.permutation(current_df_blended.shape[0])]
        
        
        rf_result = find_best_model_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        ridge_result = find_best_model_ridge_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        gb_result = find_best_model_gb_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))

        rf_blended_result = find_best_model_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))
        ridge_blended_result = find_best_model_ridge_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))
        gb_blended_result = find_best_model_gb_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))

        vae_rf_results[f'b{i} l{j}'] = rf_result
        vae_ridge_results[f'b{i} l{j}'] = ridge_result
        vae_gb_results[f'b{i} l{j}'] = gb_result
        vae_rf_blended_results[f'b{i} l{j}'] = rf_blended_result
        vae_ridge_blended_results[f'b{i} l{j}'] = ridge_blended_result
        vae_gb_blended_results[f'b{i} l{j}'] = gb_blended_result
        
        print(rf_result, f'{i} {j}')
        print(ridge_result, f'{i} {j}')
        print(gb_result, f'{i} {j}')
        print(rf_blended_result, f'{i} {j}')
        print(ridge_blended_result, f'{i} {j}')
        print(gb_blended_result, f'{i} {j}')

rf_results_df = pd.DataFrame.from_dict(vae_rf_results, orient='index')
rf_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_rf_model_results.csv')
ridge_results_df = pd.DataFrame.from_dict(vae_ridge_results, orient='index')
ridge_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_ridge_model_results.csv')
gb_results_df = pd.DataFrame.from_dict(vae_gb_results, orient='index')
gb_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_gb_model_results.csv')
rf_blended_results_df = pd.DataFrame.from_dict(vae_rf_blended_results, orient='index')
rf_blended_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_rf_blended_model_results.csv')
ridge_blended_results_df = pd.DataFrame.from_dict(vae_ridge_blended_results, orient='index')
ridge_blended_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_ridge_blended_model_results.csv')
gb_blended_results_df = pd.DataFrame.from_dict(vae_gb_blended_results, orient='index')
gb_blended_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_gb_blended_model_results.csv')


{'mse': 0.611810893989884, 'mae': 0.6493479705202254, 'r2': 0.10420081463908626} 0 0
{'mse': 0.5476959653287211, 'mae': 0.6682505916035447, 'r2': 0.1980763919266859} 0 0
{'mse': 0.6090703951613103, 'mae': 0.7193641001852932, 'r2': 0.10821338885480358} 0 0
{'mse': 0.7415221220120998, 'mae': 0.8259487889856266, 'r2': -0.08571932822843498} 0 0
{'mse': 0.530760295180196, 'mae': 0.6425949226630155, 'r2': 0.22287320360758545} 0 0
{'mse': 0.6203172701068354, 'mae': 0.7351307943292931, 'r2': 0.09174597790637429} 0 0
{'mse': 11.591726957733147, 'mae': 3.302839570417546, 'r2': -15.972335189956897} 0 1
{'mse': 2.295460346013451, 'mae': 1.3530603130462817, 'r2': -2.3609592901775467} 0 1
{'mse': 1.7581730559467261, 'mae': 1.1896706367239303, 'r2': -1.5742758207025762} 0 1
{'mse': 10.502750172739525, 'mae': 3.1336516147973965, 'r2': -14.377880879879896} 0 1
{'mse': 1.4288167790356252, 'mae': 1.1097783104211973, 'r2': -1.09204007196263} 0 1
{'mse': 1.3917832175701168, 'mae': 1.086171869741209, 'r2': 

{'mse': 0.7014070348980221, 'mae': 0.7806617473505193, 'r2': -0.026983756975159867} 2 0
{'mse': 0.7377929479082913, 'mae': 0.6689443306283475, 'r2': -0.08025915882465862} 2 0
{'mse': 1.0649247600137968, 'mae': 0.9511838897418529, 'r2': -0.5592378982823394} 2 0
{'mse': 1.53484992424672, 'mae': 1.1269288734457619, 'r2': -1.2472913204030092} 2 0
{'mse': 0.6809930895429472, 'mae': 0.6269872390410931, 'r2': 0.002905863818975063} 2 0
{'mse': 0.8759184805430296, 'mae': 0.8897479743695397, 'r2': -0.2824993294839715} 2 0
{'mse': 1.4666534617919085, 'mae': 1.1068764085708704, 'r2': -1.1474396569043104} 2 1
{'mse': 1.0610457583969186, 'mae': 0.9824687964967895, 'r2': -0.5535583549421539} 2 1
{'mse': 1.0073746938931798, 'mae': 0.9601464918362377, 'r2': -0.47497443900963154} 2 1
{'mse': 0.758517893986591, 'mae': 0.8349458760832686, 'r2': -0.11060413959562343} 2 1
{'mse': 0.6720395109539905, 'mae': 0.7672852505351033, 'r2': 0.016015483939902375} 2 1
{'mse': 0.7816219350332347, 'mae': 0.8507749126193

{'mse': 0.6056043032515883, 'mae': 0.6797709139663232, 'r2': 0.11328835947009697} 4 0
{'mse': 0.5254846447747912, 'mae': 0.5715357192945701, 'r2': 0.2305976874012473} 4 0
{'mse': 0.602195720768269, 'mae': 0.7202816719231551, 'r2': 0.11827912612984115} 4 0
{'mse': 0.7835320673223687, 'mae': 0.8504731160123729, 'r2': -0.14722930648426136} 4 0
{'mse': 0.5192632067878346, 'mae': 0.57061800615184, 'r2': 0.23970697122609752} 4 0
{'mse': 0.5950105087480807, 'mae': 0.7098826422833142, 'r2': 0.12879954532727456} 4 0
{'mse': 0.6831754945704162, 'mae': 0.7348965159050417, 'r2': -0.0002895625209922148} 4 1
{'mse': 0.5991378174257943, 'mae': 0.6761475612583862, 'r2': 0.12275643660275093} 4 1
{'mse': 0.6492348894753571, 'mae': 0.7011580490299588, 'r2': 0.04940547673620854} 4 1
{'mse': 0.7460862734124244, 'mae': 0.8255189300388189, 'r2': -0.09240205183868544} 4 1
{'mse': 0.5664812281807148, 'mae': 0.6449810513608739, 'r2': 0.1705714499177834} 4 1
{'mse': 0.6440984630140574, 'mae': 0.7277198896438274,

KeyboardInterrupt: 

<h3> GANs </h3>

In [None]:
gan_rf_results = {}
gan_ridge_results = {}
gan_gb_results = {}
gan_rf_blended_results = {}
gan_ridge_blended_results = {}
gan_gb_blended_results = {}

for i,b in enumerate(gan_list):
    for j,e in enumerate(b):
        indices = np.random.choice(e.shape[0], 216, replace=False)
        current_df = e[indices]
        current_df_blended = np.concatenate((e[indices], base_data_train_scaled), axis=0)
        
        current_df = current_df[np.random.permutation(current_df.shape[0])]
        current_df_blended = current_df_blended[np.random.permutation(current_df_blended.shape[0])]
        
        
        rf_result = find_best_model_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        ridge_result = find_best_model_ridge_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        gb_result = find_best_model_gb_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))

        rf_blended_result = find_best_model_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))
        ridge_blended_result = find_best_model_ridge_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))
        gb_blended_result = find_best_model_gb_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))

        gan_rf_results[f'b{i} l{j}'] = rf_result
        gan_ridge_results[f'b{i} l{j}'] = ridge_result
        gan_gb_results[f'b{i} l{j}'] = gb_result
        gan_rf_blended_results[f'b{i} l{j}'] = rf_blended_result
        gan_ridge_blended_results[f'b{i} l{j}'] = ridge_blended_result
        gan_gb_blended_results[f'b{i} l{j}'] = gb_blended_result
        
        print(rf_result, f'{i} {j}')
        print(ridge_result, f'{i} {j}')
        print(gb_result, f'{i} {j}')
        print(rf_blended_result, f'{i} {j}')
        print(ridge_blended_result, f'{i} {j}')
        print(gb_blended_result, f'{i} {j}')

rf_results_df = pd.DataFrame.from_dict(gan_rf_results, orient='index')
rf_results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_rf_model_results.csv')
ridge_results_df = pd.DataFrame.from_dict(gan_ridge_results, orient='index')
ridge_results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_ridge_model_results.csv')
gb_results_df = pd.DataFrame.from_dict(gan_gb_results, orient='index')
gb_results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_gb_model_results.csv')
rf_blended_results_df = pd.DataFrame.from_dict(gan_rf_blended_results, orient='index')
rf_blended_results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_rf_blended_model_results.csv')
ridge_blended_results_df = pd.DataFrame.from_dict(gan_ridge_blended_results, orient='index')
ridge_blended_results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_ridge_blended_model_results.csv')
gb_blended_results_df = pd.DataFrame.from_dict(gan_gb_blended_results, orient='index')
gb_blended_results_df.to_csv(f'../../data/models/model_history/DGAN_{model_name}_gb_blended_model_results.csv')


<h2> Select best Regression models </h2>

<h4> RF regression </h4>

In [17]:
best_vaes_rf, best_gans_rf = {}, {}

df1 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_rf_model_results.csv', index_col=0)
df2 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_rf_blended_model_results.csv', index_col=0)
df3 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_rf_model_results.csv', index_col=0)
df4 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_rf_blended_model_results.csv', index_col=0)

print('VAEs')
print(df1.nsmallest(5, 'mse'))
print(df2.nsmallest(5, 'mse'))

print('GANs')
print(df3.nsmallest(5, 'mse'))
print(df4.nsmallest(5, 'mse'))

VAEs
            mse       mae        r2
b4 l0  0.605604  0.679771  0.113288
b0 l0  0.611811  0.649348  0.104201
b4 l1  0.683175  0.734897 -0.000290
b4 l7  0.691865  0.765003 -0.013013
b3 l7  0.700103  0.801106 -0.025075
            mse       mae        r2
b1 l2  0.678015  0.785796  0.007267
b1 l3  0.696027  0.691361 -0.019106
b3 l7  0.696579  0.690864 -0.019914
b1 l5  0.696820  0.690612 -0.020268
b5 l2  0.697107  0.690273 -0.020688
GANs
            mse       mae        r2
b4 l0  0.542274  0.597414  0.206016
b9 l2  0.545369  0.543727  0.201483
b5 l1  0.571068  0.650214  0.163856
b8 l2  0.582748  0.533028  0.146755
b6 l1  0.600955  0.592479  0.120096
            mse       mae        r2
b4 l0  0.543925  0.612694  0.203598
b9 l2  0.588335  0.574307  0.138573
b8 l2  0.603830  0.552540  0.115887
b2 l1  0.651638  0.599794  0.045887
b9 l1  0.659238  0.540837  0.034759


In [36]:
best_vaes_rf['b4 l0'] = df1.loc['b4 l0'].to_dict()
best_vaes_rf['b0 l0'] = df1.loc['b0 l0'].to_dict()

best_vaes_rf['b1 l2'] = df2.loc['b1 l2'].to_dict()


best_gans_rf['b4 l0'] = df3.loc['b4 l0'].to_dict()
best_gans_rf['b9 l2'] = df3.loc['b9 l2'].to_dict()
best_gans_rf['b5 l1'] = df3.loc['b5 l1'].to_dict()

best_gans_rf['b4 l0'] = df4.loc['b4 l0'].to_dict()
best_gans_rf['b9 l2'] = df4.loc['b9 l2'].to_dict()
print(best_vaes_rf, best_gans_rf)

{'b4 l0': {'mse': 0.6056043032515883, 'mae': 0.6797709139663232, 'r2': 0.1132883594700969}, 'b0 l0': {'mse': 0.611810893989884, 'mae': 0.6493479705202254, 'r2': 0.1042008146390862}, 'b1 l2': {'mse': 0.6780147635160702, 'mae': 0.7857956522483895, 'r2': 0.0072666590496983}} {'b4 l0': {'mse': 0.5439251174583531, 'mae': 0.612693975816244, 'r2': 0.2035975790836628}, 'b9 l2': {'mse': 0.5883354403318565, 'mae': 0.5743070682937852, 'r2': 0.138573024204852}, 'b5 l1': {'mse': 0.5710678684313248, 'mae': 0.650213729558892, 'r2': 0.1638557986595238}}


In [99]:
best_vaes_rf['b3 l0'] = df1.loc['b3 l0'].to_dict()
best_vaes_rf['b4 l7'] = df1.loc['b4 l7'].to_dict()
best_vaes_rf['b2 l2'] = df1.loc['b2 l2'].to_dict()

best_vaes_rf['b2 l1'] = df2.loc['b2 l1'].to_dict()
best_vaes_rf['b0 l2'] = df2.loc['b0 l2'].to_dict()


best_gans_rf['b7 l0'] = df3.loc['b7 l0'].to_dict()
best_gans_rf['b9 l1'] = df3.loc['b9 l1'].to_dict()

best_gans_rf['b9 l0'] = df4.loc['b9 l0'].to_dict()
best_gans_rf['b9 l1'] = df4.loc['b9 l1'].to_dict()
print(best_vaes_rf, best_gans_rf)

VAEs
            mse       mae        r2
b3 l0  0.389694  0.471982  0.054153
b4 l7  0.401051  0.457907  0.026589
b2 l2  0.408385  0.521762  0.008789
b0 l2  0.413284  0.492527 -0.003103
b2 l1  0.413749  0.495409 -0.004232
            mse       mae        r2
b2 l1  0.413411  0.494864 -0.003411
b0 l2  0.413983  0.529208 -0.004799
b0 l0  0.413990  0.526630 -0.004816
b0 l1  0.414350  0.541277 -0.005689
b4 l6  0.418079  0.489991 -0.014742
GANs
            mse       mae        r2
b7 l0  0.412122  0.491646 -0.000282
b9 l1  0.421365  0.477685 -0.022716
b4 l2  0.421469  0.478430 -0.022969
b3 l2  0.427253  0.466272 -0.037008
b4 l0  0.431227  0.448274 -0.046654
            mse       mae        r2
b9 l1  0.413833  0.485921 -0.004435
b9 l0  0.417204  0.485411 -0.012618
b2 l0  0.417715  0.485491 -0.013858
b4 l0  0.421006  0.476670 -0.021846
b1 l0  0.425723  0.507436 -0.033293
{'b3 l0': {'mse': 0.3896943752146132, 'mae': 0.4719817929730492, 'r2': 0.0541528119250531}, 'b4 l7': {'mse': 0.401050807720536

*doesn't matter if we overwrite values here, as we only care about selecting the models for their parameters and will test them all in both non blended and blended again

<h4> Select best Ridge models </h4>

In [28]:
best_vaes_ridge, best_gans_ridge = {}, {}

df1 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_ridge_model_results.csv', index_col=0)
df2 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_ridge_blended_model_results.csv', index_col=0)
df3 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_ridge_model_results.csv', index_col=0)
df4 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_ridge_blended_model_results.csv', index_col=0)

print('VAEs')
print(df1.nsmallest(5, 'mse'))
print(df2.nsmallest(5, 'mse'))

print('GANs')
print(df3.nsmallest(5, 'mse'))
print(df4.nsmallest(5, 'mse'))


VAEs
          mse     mae      r2
b5 l1  0.5170  0.5754  0.2430
b3 l0  0.5193  0.5948  0.2397
b5 l0  0.5240  0.5657  0.2327
b4 l0  0.5255  0.5715  0.2306
b0 l0  0.5477  0.6683  0.1981
          mse     mae      r2
b5 l1  0.5136  0.5717  0.2480
b3 l0  0.5156  0.5752  0.2450
b5 l0  0.5165  0.5659  0.2438
b4 l0  0.5193  0.5706  0.2397
b3 l4  0.5234  0.6066  0.2336
GANs
          mse     mae      r2
b4 l0  0.4932  0.5667  0.2779
b6 l1  0.4953  0.5614  0.2747
b3 l1  0.5091  0.6146  0.2545
b1 l2  0.5230  0.5651  0.2343
b2 l2  0.5328  0.5700  0.2199
          mse     mae      r2
b1 l0  0.5012  0.5761  0.2661
b6 l1  0.5032  0.5623  0.2632
b3 l1  0.5034  0.5784  0.2629
b5 l0  0.5108  0.5759  0.2521
b2 l0  0.5123  0.5669  0.2499


In [1]:
best_vaes_ridge['b5 l1'] = df1.loc['b5 l1'].to_dict()
best_vaes_ridge['b3 l0'] = df1.loc['b3 l0'].to_dict()
best_vaes_ridge['b5 l0'] = df1.loc['b5 l0'].to_dict()
best_vaes_ridge['b4 l0'] = df1.loc['b4 l0'].to_dict()
best_vaes_ridge['b0 l0'] = df1.loc['b0 l0'].to_dict()

best_vaes_ridge['b3 l4'] = df1.loc['b3 l4'].to_dict()


best_gans_ridge['b4 l0'] = df3.loc['b4 l0'].to_dict()
best_gans_ridge['b6 l1'] = df3.loc['b6 l1'].to_dict()
best_gans_ridge['b3 l1'] = df3.loc['b3 l1'].to_dict()
best_gans_ridge['b1 l2'] = df3.loc['b1 l2'].to_dict()
best_gans_ridge['b1 l2'] = df3.loc['b1 l2'].to_dict()

best_gans_ridge['b4 l0'] = df4.loc['b4 l0'].to_dict()
best_gans_ridge['b6 l1'] = df4.loc['b6 l1'].to_dict()
best_gans_ridge['b3 l1'] = df4.loc['b3 l1'].to_dict()
best_gans_ridge['b5 l0'] = df4.loc['b5 l0'].to_dict()
best_gans_ridge['b2 l0'] = df4.loc['b2 l0'].to_dict()
print(best_vaes_ridge, best_gans_ridge)

NameError: name 'df1' is not defined

In [100]:
best_vaes_ridge['b4 l3'] = df1.loc['b4 l3'].to_dict()
best_vaes_ridge['b3 l1'] = df1.loc['b3 l1'].to_dict()
best_vaes_ridge['b3 l0'] = df1.loc['b3 l0'].to_dict()
best_vaes_ridge['b0 l1'] = df1.loc['b0 l1'].to_dict()


best_gans_ridge['b9 l1'] = df3.loc['b9 l1'].to_dict()
best_gans_ridge['b3 l2'] = df3.loc['b3 l2'].to_dict()
best_gans_ridge['b3 l1'] = df3.loc['b3 l1'].to_dict()
print(best_vaes_ridge, best_gans_ridge)

VAEs
            mse       mae        r2
b4 l3  0.383215  0.454014  0.069878
b3 l1  0.392434  0.442947  0.047504
b3 l0  0.395214  0.496027  0.040757
b0 l1  0.410941  0.510414  0.002584
b2 l1  0.415346  0.487349 -0.008107
            mse       mae        r2
b4 l3  0.398085  0.450817  0.033786
b3 l0  0.398478  0.474786  0.032833
b3 l1  0.400564  0.436736  0.027770
b0 l1  0.409653  0.478014  0.005709
b2 l1  0.421660  0.468380 -0.023434
GANs
            mse       mae        r2
b9 l1  0.404208  0.454003  0.018927
b3 l2  0.418689  0.435116 -0.016222
b3 l1  0.440414  0.480317 -0.068951
b8 l1  0.447845  0.452056 -0.086987
b1 l0  0.448544  0.477123 -0.088685
            mse       mae        r2
b9 l1  0.417073  0.455209 -0.012299
b9 l0  0.424017  0.441309 -0.029153
b2 l0  0.432736  0.485952 -0.050317
b6 l0  0.434442  0.444671 -0.054456
b3 l2  0.435375  0.449144 -0.056721
{'b4 l3': {'mse': 0.3832154464349572, 'mae': 0.45401444687402, 'r2': 0.0698781519805789}, 'b3 l1': {'mse': 0.3924337086081557,

<h4> Select best GB models </h4>

In [34]:
best_vaes_gb, best_gans_gb = {}, {}

df1 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_gb_model_results.csv', index_col=0)
df2 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_gb_blended_model_results.csv', index_col=0)
df3 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_gb_model_results.csv', index_col=0)
df4 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_gb_blended_model_results.csv', index_col=0)

print('VAEs')
print(df1.nsmallest(5, 'mse'))
print(df2.nsmallest(5, 'mse'))

print('GANs')
print(df3.nsmallest(5, 'mse'))
print(df4.nsmallest(5, 'mse'))

VAEs
          mse     mae      r2
b4 l0  0.6022  0.7203  0.1183
b5 l0  0.6042  0.6989  0.1153
b0 l0  0.6091  0.7194  0.1082
b1 l2  0.6102  0.6774  0.1066
b5 l3  0.6103  0.6682  0.1065
          mse     mae      r2
b4 l0  0.5950  0.7099  0.1288
b1 l2  0.6077  0.7031  0.1103
b0 l0  0.6203  0.7351  0.0917
b5 l3  0.6237  0.7083  0.0869
b0 l5  0.6318  0.7059  0.0749
GANs
          mse     mae      r2
b8 l2  0.5136  0.5673  0.2480
b7 l1  0.5283  0.5691  0.2265
b3 l2  0.5290  0.5967  0.2254
b6 l1  0.5298  0.6274  0.2243
b9 l2  0.5299  0.6203  0.2242
          mse     mae      r2
b8 l2  0.5070  0.5843  0.2576
b6 l1  0.5163  0.6153  0.2440
b7 l1  0.5172  0.6027  0.2428
b9 l2  0.5233  0.6262  0.2339
b3 l2  0.5316  0.6039  0.2217


In [40]:
best_vaes_gb['b4 l0'] = df1.loc['b4 l0'].to_dict()
best_vaes_gb['b5 l0'] = df1.loc['b5 l0'].to_dict()
best_vaes_gb['b0 l0'] = df1.loc['b0 l0'].to_dict()

best_vaes_gb['b4 l0'] = df2.loc['b4 l0'].to_dict()
best_vaes_gb['b1 l2'] = df2.loc['b1 l2'].to_dict()


best_gans_gb['b8 l2'] = df3.loc['b8 l2'].to_dict()
best_gans_gb['b7 l1'] = df3.loc['b7 l1'].to_dict()
best_gans_gb['b3 l2'] = df3.loc['b3 l2'].to_dict()
best_gans_gb['b6 l1'] = df3.loc['b6 l1'].to_dict()

best_gans_gb['b8 l2'] = df4.loc['b8 l2'].to_dict()
best_gans_gb['b7 l1'] = df4.loc['b7 l1'].to_dict()
best_gans_gb['b9 l2'] = df4.loc['b9 l2'].to_dict()
best_gans_gb['b6 l1'] = df4.loc['b6 l1'].to_dict()
print(best_vaes_gb, best_gans_gb)

{'b4 l0': {'mse': 0.5950105087480807, 'mae': 0.7098826422833142, 'r2': 0.1287995453272745}, 'b5 l0': {'mse': 0.6042453603777782, 'mae': 0.6988714722458035, 'r2': 0.1152780918061334}, 'b0 l0': {'mse': 0.6090703951613103, 'mae': 0.7193641001852932, 'r2': 0.1082133888548035}, 'b1 l2': {'mse': 0.6076647195472598, 'mae': 0.7031185660075746, 'r2': 0.110271546174848}} {'b8 l2': {'mse': 0.5070088939676595, 'mae': 0.5843331444934409, 'r2': 0.2576494491213212}, 'b7 l1': {'mse': 0.5171517751171433, 'mae': 0.6026908200808508, 'r2': 0.2427984800389199}, 'b3 l2': {'mse': 0.5290335829463042, 'mae': 0.5967171920241463, 'r2': 0.2254014152292937}, 'b6 l1': {'mse': 0.5163193902106689, 'mae': 0.6152575733141042, 'r2': 0.2440172385285959}, 'b9 l2': {'mse': 0.5232579295499363, 'mae': 0.6261775430512556, 'r2': 0.2338579916946989}}


In [102]:
best_vaes_gb['b1 l4'] = df1.loc['b1 l4'].to_dict()
best_vaes_gb['b1 l3'] = df1.loc['b1 l3'].to_dict()

best_vaes_gb['b0 l1'] = df2.loc['b0 l1'].to_dict()


best_gans_gb['b7 l0'] = df3.loc['b7 l0'].to_dict()
best_gans_gb['b2 l0'] = df3.loc['b2 l0'].to_dict()

best_gans_gb['b9 l1'] = df4.loc['b9 l1'].to_dict()
print(best_vaes_gb, best_gans_gb)

VAEs
            mse       mae        r2
b1 l4  0.412233  0.505275 -0.000551
b1 l3  0.412636  0.519228 -0.001529
b3 l0  0.412697  0.519343 -0.001677
b0 l1  0.412886  0.520114 -0.002138
b3 l6  0.413757  0.496415 -0.004252
            mse       mae        r2
b0 l1  0.410926  0.502542  0.002621
b3 l0  0.413277  0.497512 -0.003087
b1 l4  0.413711  0.495347 -0.004138
b1 l3  0.414003  0.493890 -0.004847
b2 l1  0.417815  0.484520 -0.014101
GANs
            mse       mae        r2
b7 l0  0.418620  0.478271 -0.016053
b2 l0  0.419903  0.491859 -0.019168
b1 l0  0.427780  0.475232 -0.038287
b9 l1  0.428881  0.476591 -0.040960
b9 l2  0.432763  0.467903 -0.050380
            mse       mae        r2
b2 l0  0.416775  0.488148 -0.011577
b9 l1  0.419293  0.475526 -0.017689
b0 l2  0.423377  0.488698 -0.027601
b8 l1  0.428707  0.469714 -0.040537
b7 l0  0.429885  0.469077 -0.043395
{'b1 l4': {'mse': 0.4122325403543621, 'mae': 0.5052750487861299, 'r2': -0.0005507236598782}, 'b1 l3': {'mse': 0.41263555893537

<h4> Save best VAEs </h4>

In [41]:
batches=[4,8,16,20,24,32]
latent_dims=[3,5,10,15,20,25,30,50]
vaes = [best_vaes_rf, best_vaes_ridge, best_vaes_gb]

best_vaes = set()
for vae in vaes:
    for name, scores in vae.items():
        b, l = map(int, re.findall(r'\d+', name))
        best_vaes.add((batches[b], latent_dims[l]))
        print(scores['mse'], batches[b], latent_dims[l])
        
print(best_vaes)

0.6056043032515883 24 3
0.611810893989884 4 3
0.6780147635160702 8 10
0.5169983828480456 32 5
0.5192887567688758 20 3
0.5240370180863826 32 3
0.5254846447747912 24 3
0.5476959653287211 4 3
0.5480914858763493 20 20
0.5950105087480807 24 3
0.6042453603777782 32 3
0.6090703951613103 4 3
0.6076647195472598 8 10
{(32, 5), (4, 3), (8, 10), (20, 20), (24, 3), (20, 3), (32, 3)}


In [44]:
for b, l in best_vaes:
    vae = TimeVAE.load('../../data/models/model_data/',f'tVAE_{model_name}_b{b}l{l}')
    samples = vae.get_prior_samples(num_samples=1000)
    
    temp, energy = scalers['temp'].inverse_transform(samples[:,:,0]), scalers['energy'].inverse_transform(samples[:,:,1])
    rescaled_samples = np.stack((temp, energy), axis=-1)

    print(b, l)
    np.save(f'../../data/vae_synthetic_data/{model_name}_b{b}l{l}_generated_samples.npy', samples)
    np.save(f'../../data/vae_synthetic_data/{model_name}_b{b}l{l}_rescaled_samples.npy', np.stack((temp, energy),axis=-1))

32 5
4 3
8 10
20 20
24 3
20 3
32 3


<h4> Save best GANs </h4>

In [28]:
batches=[2,4,6,8,10,12,16,20,24,32]
epochs=[100,500,1000]
gans = [best_gans_rf, best_gans_ridge, best_gans_gb]

best_gans = set()
for gan in gans:
    for name, scores in gan.items():
        b, e = map(int, re.findall(r'\d+', name))
        best_gans.add((batches[b], epochs[e]))
        print(scores['mse'], batches[b], epochs[e])
print(best_gans)

0.5373772089984823 10 100
0.5232579295499363 32 1000
0.5565636208788776 12 500
0.5373772089984823 10 100
0.5163193902106689 16 500
0.6418606903914026 8 500
0.607671809879323 4 1000
0.6455071466498753 12 100
0.6368619328750278 6 100
0.5136026413076787 24 1000
0.528252038067138 20 500
0.5290335829463042 8 1000
0.529803679597627 16 500
0.5298659607720224 32 1000
{(20, 500), (4, 1000), (12, 100), (24, 1000), (8, 1000), (10, 100), (32, 1000), (16, 500), (6, 100), (12, 500), (8, 500)}


In [31]:
for b, e in best_gans:
    dgan = DGAN.load(f'../../data/models/model_data/DGAN_{model_name}_b{b}_e{e}',map_location=torch.device('cpu'))
    attributes, samples = dgan.generate_numpy(1000)
    
    temp, energy = scalers['temp'].inverse_transform(samples[:,:,0]), scalers['energy'].inverse_transform(samples[:,:,1])
    rescaled_samples = np.stack((temp, energy),axis=-1)

    print(b, e)
    np.save(f'../../data/gan_synthetic_data/{model_name}_b{b}e{e}_generated_samples.npy', samples)
    np.save(f'../../data/gan_synthetic_data/{model_name}_b{b}e{e}_rescaled_samples.npy', rescaled_samples)

20 500
4 1000
12 100
24 1000
8 1000
10 100
32 1000
16 500
6 100
12 500
8 500
