In [2]:
import pickle
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller
import datetime
import re
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import seaborn as sns
import scipy.stats as stats
from gretel_synthetics.timeseries_dgan.dgan import DGAN
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from timeVAE.vae_dense_model import VariationalAutoencoderDense as VAE_Dense
from timeVAE.vae_conv_model import VariationalAutoencoderConv as VAE_Conv
from timeVAE.vae_conv_I_model import VariationalAutoencoderConvInterpretable as TimeVAE
from timeVAE import utils
from sklearn.preprocessing import MinMaxScaler

pd.set_option('display.precision', 4)

2024-04-23 12:35:51.671770: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-23 12:35:51.671799: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-23 12:35:51.672808: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-23 12:35:51.678734: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
MODEL = 1

In [4]:
index = [0,3] if MODEL==0 else [1,2]
model_name = 'th_v_air' if MODEL==0 else 'el_v_sky'

base_data_train, base_data_test = np.load('../../data/training_data/training_data_1month.npy', allow_pickle=True)

base_data_train, base_data_test = base_data_train[:,:,index], base_data_test[:,:,index]
print(base_data_train.shape)
print(model_name)

(108, 730, 2)
el_v_sky


In [5]:
from sklearn.preprocessing import MinMaxScaler

scalers = {var_name: MinMaxScaler(feature_range=(-1,1)) for var_name in ['temp', 'energy']}

temp_var, energy_var = base_data_train[:,:,0], base_data_train[:,:,1]
temp_var_test, energy_var_test = base_data_test[:,:,0], base_data_test[:,:,1]

temp_var, temp_var_test = scalers['temp'].fit_transform(temp_var), scalers['temp'].fit_transform(temp_var_test)
energy_var, energy_var_test = scalers['energy'].fit_transform(energy_var), scalers['energy'].fit_transform(energy_var_test)

base_data_train_scaled, base_data_test_scaled = np.stack((temp_var, energy_var), axis=-1), np.stack((temp_var_test, energy_var_test), axis=-1)
print(base_data_train_scaled.shape, base_data_test_scaled.shape)

(108, 730, 2) (12, 730, 2)


<h3> Load Models </h3>

Load in tVAE models

In [19]:
batches=[4,8,16,20,24,32]
latent_dims=[3,5,10,15,20,25,30,50]

vae_list = []
for b in batches:
    latent_dim_list = []
    for l in latent_dims:
        samples = TimeVAE.load('../../data/models/model_data/',f'tVAE_{model_name}_b{b}l{l}')
        sample = samples.get_prior_samples(num_samples=1000)
        latent_dim_list.append(sample)
    vae_list.append(latent_dim_list)



In [22]:
print(len(vae_list), len(vae_list[0]), vae_list[0][0].shape)

6 8 (1000, 730, 2)


Load in DGAN models

In [6]:
import tensorflow as tf
import torch
# Hide GPU from visible devices
#tf.config.set_visible_devices([], 'GPU')
tf.config.get_visible_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [17]:
batches=[2,4,6,8,10,12,16,20,24,32]
epochs=[2000,5000]


gan_list = []
for b in batches:
    epoch_list = []
    for e in epochs:
        try:
            model = DGAN.load(f'../../data/models/model_data/DGAN_{model_name}_b{b}_e{e}.zip')
        except:
            model = DGAN.load(f'../../data/models/model_data/DGAN_{model_name}_b{b}_e{e}')
            
        attributes, samples = model.generate_numpy(1000)
        epoch_list.append(samples)
    gan_list.append(epoch_list)

In [18]:
print(len(gan_list), len(gan_list[0]), gan_list[0][0].shape)

10 2 (1000, 730, 2)


<h2> Regression Models </h2>

In [10]:
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import grid_search_forecaster 
from sklearn.linear_model import Ridge

model_params = {'lag':24, 'max_depth':5, 'n_est':50} if MODEL==0 else {'lag':2, 'max_depth':3, 'n_est':10}

def find_best_model_regression(synthetic_data):
    
    forecaster = ForecasterAutoreg(
                     regressor = RandomForestRegressor(max_depth=model_params['max_depth'], n_estimators =model_params['n_est']),
                     lags      = model_params['lag']
                 )

    forecaster.fit(y=synthetic_data['energy'], exog=synthetic_data['temp'])

    y_test = pd.Series(base_data_test_scaled[:,:,1].reshape(-1))
    exog_var = pd.Series(base_data_test_scaled[:,:,0].reshape(-1))
    exog_var.index = exog_var.index + synthetic_data['temp'].index.max() + 1
    
    predictions = forecaster.predict(exog=exog_var, steps=len(y_test))
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2  = r2_score(y_test, predictions)

    return {'mse':mse, 'mae':mae, 'r2':r2}

In [11]:
def find_best_model_ridge_regression(synthetic_data):
    
    forecaster = ForecasterAutoreg(
                     regressor = Ridge(),
                     lags      = 2 if MODEL==0 else 12 
                 )

    forecaster.fit(y=synthetic_data['energy'], exog=synthetic_data['temp'])

    y_test = pd.Series(base_data_test_scaled[:,:,1].reshape(-1))
    exog_var = pd.Series(base_data_test_scaled[:,:,0].reshape(-1))
    exog_var.index = exog_var.index + synthetic_data['temp'].index.max() + 1
    
    predictions = forecaster.predict(exog=exog_var, steps=len(y_test))
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2  = r2_score(y_test, predictions)

    return {'mse':mse, 'mae':mae, 'r2':r2}

In [12]:
gb_params = {'lag':24, 'max_depth':5, 'n_est':10} if MODEL==0 else {'lag':12, 'max_depth':5, 'n_est':10}

def find_best_model_gb_regression(synthetic_data):
    
    forecaster = ForecasterAutoreg(
                     regressor = GradientBoostingRegressor(n_estimators=10, max_depth=5),
                     lags      = 24
                 )

    forecaster.fit(y=synthetic_data['energy'], exog=synthetic_data['temp'])

    y_test = pd.Series(base_data_test_scaled[:,:,1].reshape(-1))
    exog_var = pd.Series(base_data_test_scaled[:,:,0].reshape(-1))
    exog_var.index = exog_var.index + synthetic_data['temp'].index.max() + 1
    
    predictions = forecaster.predict(exog=exog_var, steps=len(y_test))
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2  = r2_score(y_test, predictions)

    return {'mse':mse, 'mae':mae, 'r2':r2}

<h2> Run all 3 regressors on all datasets </h2>

<h3> VAEs </h3>

In [30]:
vae_rf_results = {}
vae_ridge_results = {}
vae_gb_results = {}
vae_rf_blended_results = {}
vae_ridge_blended_results = {}
vae_gb_blended_results = {}

for i,b in enumerate(vae_list):
    for j,e in enumerate(b):
        indices = np.random.choice(e.shape[0], 216, replace=False)
        current_df = e[indices]
        current_df_blended = np.concatenate((e[indices], base_data_train_scaled), axis=0)
        
        current_df = current_df[np.random.permutation(current_df.shape[0])]
        current_df_blended = current_df_blended[np.random.permutation(current_df_blended.shape[0])]
        
        
        rf_result = find_best_model_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        ridge_result = find_best_model_ridge_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        gb_result = find_best_model_gb_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))

        rf_blended_result = find_best_model_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))
        ridge_blended_result = find_best_model_ridge_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))
        gb_blended_result = find_best_model_gb_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))

        vae_rf_results[f'b{i} l{j}'] = rf_result
        vae_ridge_results[f'b{i} l{j}'] = ridge_result
        vae_gb_results[f'b{i} l{j}'] = gb_result
        vae_rf_blended_results[f'b{i} l{j}'] = rf_blended_result
        vae_ridge_blended_results[f'b{i} l{j}'] = ridge_blended_result
        vae_gb_blended_results[f'b{i} l{j}'] = gb_blended_result
        
        print(rf_result, f'{i} {j}')
        print(ridge_result, f'{i} {j}')
        print(gb_result, f'{i} {j}')
        print(rf_blended_result, f'{i} {j}')
        print(ridge_blended_result, f'{i} {j}')
        print(gb_blended_result, f'{i} {j}')

rf_results_df = pd.DataFrame.from_dict(vae_rf_results, orient='index')
rf_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_rf_model_results.csv')
ridge_results_df = pd.DataFrame.from_dict(vae_ridge_results, orient='index')
ridge_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_ridge_model_results.csv')
gb_results_df = pd.DataFrame.from_dict(vae_gb_results, orient='index')
gb_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_gb_model_results.csv')
rf_blended_results_df = pd.DataFrame.from_dict(vae_rf_blended_results, orient='index')
rf_blended_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_rf_blended_model_results.csv')
ridge_blended_results_df = pd.DataFrame.from_dict(vae_ridge_blended_results, orient='index')
ridge_blended_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_ridge_blended_model_results.csv')
gb_blended_results_df = pd.DataFrame.from_dict(vae_gb_blended_results, orient='index')
gb_blended_results_df.to_csv(f'../../data/models/model_history/VAE_{model_name}_gb_blended_model_results.csv')


{'mse': 0.611810893989884, 'mae': 0.6493479705202254, 'r2': 0.10420081463908626} 0 0
{'mse': 0.5476959653287211, 'mae': 0.6682505916035447, 'r2': 0.1980763919266859} 0 0
{'mse': 0.6090703951613103, 'mae': 0.7193641001852932, 'r2': 0.10821338885480358} 0 0
{'mse': 0.7415221220120998, 'mae': 0.8259487889856266, 'r2': -0.08571932822843498} 0 0
{'mse': 0.530760295180196, 'mae': 0.6425949226630155, 'r2': 0.22287320360758545} 0 0
{'mse': 0.6203172701068354, 'mae': 0.7351307943292931, 'r2': 0.09174597790637429} 0 0
{'mse': 11.591726957733147, 'mae': 3.302839570417546, 'r2': -15.972335189956897} 0 1
{'mse': 2.295460346013451, 'mae': 1.3530603130462817, 'r2': -2.3609592901775467} 0 1
{'mse': 1.7581730559467261, 'mae': 1.1896706367239303, 'r2': -1.5742758207025762} 0 1
{'mse': 10.502750172739525, 'mae': 3.1336516147973965, 'r2': -14.377880879879896} 0 1
{'mse': 1.4288167790356252, 'mae': 1.1097783104211973, 'r2': -1.09204007196263} 0 1
{'mse': 1.3917832175701168, 'mae': 1.086171869741209, 'r2': 

{'mse': 0.7014070348980221, 'mae': 0.7806617473505193, 'r2': -0.026983756975159867} 2 0
{'mse': 0.7377929479082913, 'mae': 0.6689443306283475, 'r2': -0.08025915882465862} 2 0
{'mse': 1.0649247600137968, 'mae': 0.9511838897418529, 'r2': -0.5592378982823394} 2 0
{'mse': 1.53484992424672, 'mae': 1.1269288734457619, 'r2': -1.2472913204030092} 2 0
{'mse': 0.6809930895429472, 'mae': 0.6269872390410931, 'r2': 0.002905863818975063} 2 0
{'mse': 0.8759184805430296, 'mae': 0.8897479743695397, 'r2': -0.2824993294839715} 2 0
{'mse': 1.4666534617919085, 'mae': 1.1068764085708704, 'r2': -1.1474396569043104} 2 1
{'mse': 1.0610457583969186, 'mae': 0.9824687964967895, 'r2': -0.5535583549421539} 2 1
{'mse': 1.0073746938931798, 'mae': 0.9601464918362377, 'r2': -0.47497443900963154} 2 1
{'mse': 0.758517893986591, 'mae': 0.8349458760832686, 'r2': -0.11060413959562343} 2 1
{'mse': 0.6720395109539905, 'mae': 0.7672852505351033, 'r2': 0.016015483939902375} 2 1
{'mse': 0.7816219350332347, 'mae': 0.8507749126193

{'mse': 0.6056043032515883, 'mae': 0.6797709139663232, 'r2': 0.11328835947009697} 4 0
{'mse': 0.5254846447747912, 'mae': 0.5715357192945701, 'r2': 0.2305976874012473} 4 0
{'mse': 0.602195720768269, 'mae': 0.7202816719231551, 'r2': 0.11827912612984115} 4 0
{'mse': 0.7835320673223687, 'mae': 0.8504731160123729, 'r2': -0.14722930648426136} 4 0
{'mse': 0.5192632067878346, 'mae': 0.57061800615184, 'r2': 0.23970697122609752} 4 0
{'mse': 0.5950105087480807, 'mae': 0.7098826422833142, 'r2': 0.12879954532727456} 4 0
{'mse': 0.6831754945704162, 'mae': 0.7348965159050417, 'r2': -0.0002895625209922148} 4 1
{'mse': 0.5991378174257943, 'mae': 0.6761475612583862, 'r2': 0.12275643660275093} 4 1
{'mse': 0.6492348894753571, 'mae': 0.7011580490299588, 'r2': 0.04940547673620854} 4 1
{'mse': 0.7460862734124244, 'mae': 0.8255189300388189, 'r2': -0.09240205183868544} 4 1
{'mse': 0.5664812281807148, 'mae': 0.6449810513608739, 'r2': 0.1705714499177834} 4 1
{'mse': 0.6440984630140574, 'mae': 0.7277198896438274,

KeyboardInterrupt: 

<h3> GANs </h3>

In [19]:
gan_rf_results = {}
gan_ridge_results = {}
gan_gb_results = {}
gan_rf_blended_results = {}
gan_ridge_blended_results = {}
gan_gb_blended_results = {}

for i,b in enumerate(gan_list):
    for j,e in enumerate(b):
        indices = np.random.choice(e.shape[0], 216, replace=False)
        current_df = e[indices]
        current_df_blended = np.concatenate((e[indices], base_data_train_scaled), axis=0)
        
        current_df = current_df[np.random.permutation(current_df.shape[0])]
        current_df_blended = current_df_blended[np.random.permutation(current_df_blended.shape[0])]
        
        
        rf_result = find_best_model_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        ridge_result = find_best_model_ridge_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))
        gb_result = find_best_model_gb_regression(pd.DataFrame(current_df.reshape(-1,2), columns=['temp', 'energy']))

        rf_blended_result = find_best_model_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))
        ridge_blended_result = find_best_model_ridge_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))
        gb_blended_result = find_best_model_gb_regression(pd.DataFrame(current_df_blended.reshape(-1,2), columns=['temp', 'energy']))

        gan_rf_results[f'b{i} l{j}'] = rf_result
        gan_ridge_results[f'b{i} l{j}'] = ridge_result
        gan_gb_results[f'b{i} l{j}'] = gb_result
        gan_rf_blended_results[f'b{i} l{j}'] = rf_blended_result
        gan_ridge_blended_results[f'b{i} l{j}'] = ridge_blended_result
        gan_gb_blended_results[f'b{i} l{j}'] = gb_blended_result
        
        print(rf_result, f'{i} {j}')
        print(ridge_result, f'{i} {j}')
        print(gb_result, f'{i} {j}')
        print(rf_blended_result, f'{i} {j}')
        print(ridge_blended_result, f'{i} {j}')
        print(gb_blended_result, f'{i} {j}')

rf_results_df = pd.DataFrame.from_dict(gan_rf_results, orient='index')
rf_results_df.to_csv(f'../../data/models/model_history/testDGAN_{model_name}_rf_model_results.csv')
ridge_results_df = pd.DataFrame.from_dict(gan_ridge_results, orient='index')
ridge_results_df.to_csv(f'../../data/models/model_history/testDGAN_{model_name}_ridge_model_results.csv')
gb_results_df = pd.DataFrame.from_dict(gan_gb_results, orient='index')
gb_results_df.to_csv(f'../../data/models/model_history/testDGAN_{model_name}_gb_model_results.csv')
rf_blended_results_df = pd.DataFrame.from_dict(gan_rf_blended_results, orient='index')
rf_blended_results_df.to_csv(f'../../data/models/model_history/testDGAN_{model_name}_rf_blended_model_results.csv')
ridge_blended_results_df = pd.DataFrame.from_dict(gan_ridge_blended_results, orient='index')
ridge_blended_results_df.to_csv(f'../../data/models/model_history/testDGAN_{model_name}_ridge_blended_model_results.csv')
gb_blended_results_df = pd.DataFrame.from_dict(gan_gb_blended_results, orient='index')
gb_blended_results_df.to_csv(f'../../data/models/model_history/testDGAN_{model_name}_gb_blended_model_results.csv')


{'mse': 0.5912792154631658, 'mae': 0.672451967769303, 'r2': 0.13426281758640624} 0 0
{'mse': 0.5117901863508859, 'mae': 0.5855753847179039, 'r2': 0.2506487927682858} 0 0
{'mse': 0.615117853008804, 'mae': 0.6854373002187645, 'r2': 0.09935884267639106} 0 0
{'mse': 0.5997702288583109, 'mae': 0.6718852440599553, 'r2': 0.12183047459124297} 0 0
{'mse': 0.5080892989192897, 'mae': 0.5747880677813844, 'r2': 0.2560675454889446} 0 0
{'mse': 0.5674859986454305, 'mae': 0.6876043570537858, 'r2': 0.16910028853015713} 0 0
{'mse': 0.6834582005936223, 'mae': 0.7261962670593242, 'r2': -0.0007034940605781248} 0 1
{'mse': 0.5940521600722237, 'mae': 0.6589908996342575, 'r2': 0.13020273701862517} 0 1
{'mse': 0.6832546196633763, 'mae': 0.7270629298025383, 'r2': -0.000405415629381789} 0 1
{'mse': 0.7515767554844025, 'mae': 0.8297148009271604, 'r2': -0.10044108712823707} 0 1
{'mse': 0.5669908952454668, 'mae': 0.6414582829286772, 'r2': 0.16982520733548367} 0 1
{'mse': 0.7468233101242125, 'mae': 0.829934852984418

{'mse': 0.6073009511915867, 'mae': 0.5213134702396006, 'r2': 0.1108041672835487} 8 0
{'mse': 0.5260237383267159, 'mae': 0.571914519787853, 'r2': 0.22980835924545362} 8 0
{'mse': 0.5291837232651373, 'mae': 0.5876749121885018, 'r2': 0.22518158329001037} 8 0
{'mse': 0.5994293370148764, 'mae': 0.5359052313274377, 'r2': 0.12232960044637986} 8 0
{'mse': 0.522775454369682, 'mae': 0.5712765801643453, 'r2': 0.23456442055641813} 8 0
{'mse': 0.5346134149195807, 'mae': 0.6145626909374174, 'r2': 0.21723155590639887} 8 0
{'mse': 0.8928804073645678, 'mae': 0.6564073700004899, 'r2': -0.30733458557069393} 8 1
{'mse': 1.1072631388685334, 'mae': 0.7941329000235896, 'r2': -0.621228761243668} 8 1
{'mse': 0.8689530205484551, 'mae': 0.6390028655923747, 'r2': -0.27230066605692405} 8 1
{'mse': 0.8965147823104537, 'mae': 0.6470691063222667, 'r2': -0.3126559522671728} 8 1
{'mse': 0.7047084580604711, 'mae': 0.6108567727180474, 'r2': -0.03181762346643269} 8 1
{'mse': 0.8411480579386916, 'mae': 0.6512241168881463, 

<h2> Select best Regression models </h2>

<h4> RF regression </h4>

In [6]:
best_vaes_rf, best_gans_rf = {}, {}

df1 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_rf_model_results.csv', index_col=0)
df2 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_rf_blended_model_results.csv', index_col=0)
df3 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_rf_model_results.csv', index_col=0)
df4 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_rf_blended_model_results.csv', index_col=0)

print('VAEs')
print(df1.nsmallest(5, 'mse'))
print(df2.nsmallest(5, 'mse'))

print('GANs')
print(df3.nsmallest(5, 'mse'))
print(df4.nsmallest(5, 'mse'))

VAEs
          mse     mae      r2
b4 l0  0.6056  0.6798  0.1133
b0 l0  0.6118  0.6493  0.1042
b4 l1  0.6832  0.7349 -0.0003
b4 l7  0.6919  0.7650 -0.0130
b3 l7  0.7001  0.8011 -0.0251
          mse     mae      r2
b1 l2  0.6780  0.7858  0.0073
b1 l3  0.6960  0.6914 -0.0191
b3 l7  0.6966  0.6909 -0.0199
b1 l5  0.6968  0.6906 -0.0203
b5 l2  0.6971  0.6903 -0.0207
GANs
               mse     mae      r2
Unnamed: 0                        
b5 l4       0.5420  0.6211  0.2065
b4 l0       0.5423  0.5974  0.2060
b9 l2       0.5454  0.5437  0.2015
b5 l1       0.5711  0.6502  0.1639
b8 l2       0.5827  0.5330  0.1468
               mse     mae      r2
Unnamed: 0                        
b4 l0       0.5439  0.6127  0.2036
b9 l2       0.5883  0.5743  0.1386
b8 l3       0.5994  0.5359  0.1223
b0 l3       0.5998  0.6719  0.1218
b8 l2       0.6038  0.5525  0.1159


In [123]:
if MODEL==0:
    best_vaes_rf['b3 l0'] = df1.loc['b3 l0'].to_dict()
    best_vaes_rf['b4 l7'] = df1.loc['b4 l7'].to_dict()
    #best_vaes_rf['b2 l2'] = df1.loc['b2 l2'].to_dict()

    best_vaes_rf['b2 l1'] = df2.loc['b2 l1'].to_dict()
    best_vaes_rf['b0 l2'] = df2.loc['b0 l2'].to_dict()


    best_gans_rf['b7 l0'] = df3.loc['b7 l0'].to_dict()
    best_gans_rf['b3 l3'] = df3.loc['b3 l3'].to_dict()
    #best_gans_rf['b9 l1'] = df3.loc['b9 l1'].to_dict()

    best_gans_rf['b6 l3'] = df4.loc['b6 l3'].to_dict()
    best_gans_rf['b5 l4'] = df4.loc['b5 l4'].to_dict()
elif MODEL==1:
    best_vaes_rf['b4 l0'] = df1.loc['b4 l0'].to_dict()
    best_vaes_rf['b0 l0'] = df1.loc['b0 l0'].to_dict()

    best_vaes_rf['b1 l2'] = df2.loc['b1 l2'].to_dict()


    best_gans_rf['b4 l0'] = df3.loc['b4 l0'].to_dict()
    best_gans_rf['b9 l2'] = df3.loc['b9 l2'].to_dict()
    #best_gans_rf['b5 l4'] = df3.loc['b5 l4'].to_dict()

    best_gans_rf['b4 l0'] = df4.loc['b4 l0'].to_dict()
    best_gans_rf['b9 l2'] = df4.loc['b9 l2'].to_dict()
    
print(best_vaes_rf, best_gans_rf)

{'b4 l0': {'mse': 0.6056043032515883, 'mae': 0.6797709139663232, 'r2': 0.1132883594700969}, 'b0 l0': {'mse': 0.611810893989884, 'mae': 0.6493479705202254, 'r2': 0.1042008146390862}, 'b1 l2': {'mse': 0.6780147635160702, 'mae': 0.7857956522483895, 'r2': 0.0072666590496983}} {'b4 l0': {'mse': 0.543925117458353, 'mae': 0.612693975816244, 'r2': 0.203597579083663}, 'b9 l2': {'mse': 0.588335440331856, 'mae': 0.574307068293785, 'r2': 0.138573024204852}}


*doesn't matter if we overwrite values here, as we only care about selecting the models for their parameters and will test them all in both non blended and blended again

<h4> Select best Ridge models </h4>

In [1]:
best_vaes_ridge, best_gans_ridge = {}, {}

df1 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_ridge_model_results.csv', index_col=0)
df2 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_ridge_blended_model_results.csv', index_col=0)
df3 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_ridge_model_results.csv', index_col=0)
df4 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_ridge_blended_model_results.csv', index_col=0)

print('VAEs')
print(df1.nsmallest(5, 'mse'))
print(df2.nsmallest(5, 'mse'))

print('GANs')
print(df3.nsmallest(5, 'mse'))
print(df4.nsmallest(5, 'mse'))


NameError: name 'pd' is not defined

In [125]:
if MODEL==0:
    best_vaes_ridge['b4 l3'] = df1.loc['b4 l3'].to_dict()
    best_vaes_ridge['b3 l1'] = df1.loc['b3 l1'].to_dict()
    best_vaes_ridge['b3 l0'] = df1.loc['b3 l0'].to_dict()
    best_vaes_ridge['b0 l1'] = df1.loc['b0 l1'].to_dict()


    best_gans_ridge['b9 l1'] = df3.loc['b9 l1'].to_dict()
    best_gans_ridge['b3 l2'] = df3.loc['b3 l2'].to_dict()
    best_gans_ridge['b9 l0'] = df4.loc['b9 l0'].to_dict()
    
elif MODEL==1:
    best_vaes_ridge['b5 l1'] = df1.loc['b5 l1'].to_dict()
    best_vaes_ridge['b3 l0'] = df1.loc['b3 l0'].to_dict()
    best_vaes_ridge['b5 l0'] = df1.loc['b5 l0'].to_dict()
    #best_vaes_ridge['b4 l0'] = df1.loc['b4 l0'].to_dict()
    #best_vaes_ridge['b0 l0'] = df1.loc['b0 l0'].to_dict()

    best_vaes_ridge['b3 l4'] = df1.loc['b3 l4'].to_dict()


    best_gans_ridge['b4 l0'] = df3.loc['b4 l0'].to_dict()
    best_gans_ridge['b6 l1'] = df3.loc['b6 l1'].to_dict()
    #best_gans_ridge['b3 l1'] = df3.loc['b3 l1'].to_dict()
    #best_gans_ridge['b6 l3'] = df3.loc['b6 l3'].to_dict()
    #best_gans_ridge['b0 l3'] = df3.loc['b0 l3'].to_dict()

    best_gans_ridge['b1 l0'] = df4.loc['b1 l0'].to_dict()
    best_gans_ridge['b6 l1'] = df4.loc['b6 l1'].to_dict()
    #best_gans_ridge['b3 l1'] = df4.loc['b3 l1'].to_dict()
    #best_gans_ridge['b5 l0'] = df4.loc['b5 l0'].to_dict()
    #best_gans_ridge['b0 l3'] = df4.loc['b0 l3'].to_dict()
    
print(best_vaes_ridge, best_gans_ridge)

{'b5 l1': {'mse': 0.5169983828480456, 'mae': 0.575444461627244, 'r2': 0.2430230734076358}, 'b3 l0': {'mse': 0.5192887567688758, 'mae': 0.5947802992677448, 'r2': 0.2396695615421125}, 'b5 l0': {'mse': 0.5240370180863826, 'mae': 0.565681985867529, 'r2': 0.2327172685020776}, 'b3 l4': {'mse': 0.5480914858763493, 'mae': 0.6390083799728222, 'r2': 0.1974972800020057}} {'b4 l0': {'mse': 0.493209381480887, 'mae': 0.566684847127792, 'r2': 0.277854372187358}, 'b6 l1': {'mse': 0.503244884750862, 'mae': 0.562289375561275, 'r2': 0.263160623281869}, 'b1 l0': {'mse': 0.501243324386923, 'mae': 0.576088520607512, 'r2': 0.266091261099993}}


<h4> Select best GB models </h4>

In [126]:
best_vaes_gb, best_gans_gb = {}, {}

df1 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_gb_model_results.csv', index_col=0)
df2 = pd.read_csv(f'../../data/models/model_history/VAE_{model_name}_gb_blended_model_results.csv', index_col=0)
df3 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_gb_model_results.csv', index_col=0)
df4 = pd.read_csv(f'../../data/models/model_history/DGAN_{model_name}_gb_blended_model_results.csv', index_col=0)

print('VAEs')
print(df1.nsmallest(5, 'mse'))
print(df2.nsmallest(5, 'mse'))

print('GANs')
print(df3.nsmallest(5, 'mse'))
print(df4.nsmallest(5, 'mse'))

VAEs
          mse     mae      r2
b4 l0  0.6022  0.7203  0.1183
b5 l0  0.6042  0.6989  0.1153
b0 l0  0.6091  0.7194  0.1082
b1 l2  0.6102  0.6774  0.1066
b5 l3  0.6103  0.6682  0.1065
          mse     mae      r2
b4 l0  0.5950  0.7099  0.1288
b1 l2  0.6077  0.7031  0.1103
b0 l0  0.6203  0.7351  0.0917
b5 l3  0.6237  0.7083  0.0869
b0 l5  0.6318  0.7059  0.0749
GANs
               mse     mae      r2
Unnamed: 0                        
b8 l2       0.5136  0.5673  0.2480
b7 l3       0.5229  0.5866  0.2344
b7 l1       0.5283  0.5691  0.2265
b3 l2       0.5290  0.5967  0.2254
b8 l3       0.5292  0.5877  0.2252
               mse     mae      r2
Unnamed: 0                        
b8 l2       0.5070  0.5843  0.2576
b6 l1       0.5163  0.6153  0.2440
b7 l1       0.5172  0.6027  0.2428
b7 l3       0.5207  0.5958  0.2376
b9 l2       0.5233  0.6262  0.2339


In [127]:
if MODEL==0:
    best_vaes_gb['b1 l4'] = df1.loc['b1 l4'].to_dict()
    best_vaes_gb['b1 l3'] = df1.loc['b1 l3'].to_dict()

    best_vaes_gb['b0 l1'] = df2.loc['b0 l1'].to_dict()


    best_gans_gb['b3 l3'] = df3.loc['b3 l3'].to_dict()
    best_gans_gb['b7 l0'] = df3.loc['b7 l0'].to_dict()

    best_gans_gb['b2 l0'] = df4.loc['b2 l0'].to_dict()
    best_gans_gb['b3 l3'] = df4.loc['b3 l3'].to_dict()
    
elif MODEL==1:
    best_vaes_gb['b4 l0'] = df1.loc['b4 l0'].to_dict()
    best_vaes_gb['b5 l0'] = df1.loc['b5 l0'].to_dict()
    #best_vaes_gb['b0 l0'] = df1.loc['b0 l0'].to_dict()

    best_vaes_gb['b4 l0'] = df2.loc['b4 l0'].to_dict()
    best_vaes_gb['b1 l2'] = df2.loc['b1 l2'].to_dict()


    best_gans_gb['b8 l2'] = df3.loc['b8 l2'].to_dict()
    best_gans_gb['b7 l3'] = df3.loc['b7 l3'].to_dict()
    #best_gans_gb['b7 l1'] = df3.loc['b7 l1'].to_dict()
    #best_gans_gb['b3 l2'] = df3.loc['b3 l2'].to_dict()

    best_gans_gb['b8 l2'] = df4.loc['b8 l2'].to_dict()
    best_gans_gb['b7 l1'] = df4.loc['b7 l1'].to_dict()
    #best_gans_gb['b9 l2'] = df4.loc['b9 l2'].to_dict()
    #best_gans_gb['b6 l1'] = df4.loc['b6 l1'].to_dict()
    
print(best_vaes_gb, best_gans_gb)

{'b4 l0': {'mse': 0.5950105087480807, 'mae': 0.7098826422833142, 'r2': 0.1287995453272745}, 'b5 l0': {'mse': 0.6042453603777782, 'mae': 0.6988714722458035, 'r2': 0.1152780918061334}, 'b1 l2': {'mse': 0.6076647195472598, 'mae': 0.7031185660075746, 'r2': 0.110271546174848}} {'b8 l2': {'mse': 0.507008893967659, 'mae': 0.584333144493441, 'r2': 0.257649449121321}, 'b7 l3': {'mse': 0.522918219977994, 'mae': 0.586586258336415, 'r2': 0.234355386495601}, 'b7 l1': {'mse': 0.517151775117143, 'mae': 0.602690820080851, 'r2': 0.24279848003892}}


<h4> Save best VAEs </h4>

In [128]:
batches=[4,8,16,20,24,32]
latent_dims=[3,5,10,15,20,25,30,50]
vaes = [best_vaes_rf, best_vaes_ridge, best_vaes_gb]

best_vaes = set()
for vae in vaes:
    for name, scores in vae.items():
        b, l = map(int, re.findall(r'\d+', name))
        best_vaes.add((batches[b], latent_dims[l]))
        print(scores['mse'], batches[b], latent_dims[l])
        
print(best_vaes)

0.6056043032515883 24 3
0.611810893989884 4 3
0.6780147635160702 8 10
0.5169983828480456 32 5
0.5192887567688758 20 3
0.5240370180863826 32 3
0.5480914858763493 20 20
0.5950105087480807 24 3
0.6042453603777782 32 3
0.6076647195472598 8 10
{(32, 5), (4, 3), (8, 10), (20, 20), (24, 3), (20, 3), (32, 3)}


In [129]:
for b, l in best_vaes:
    vae = TimeVAE.load('../../data/models/VAE_models/',f'tVAE_{model_name}_b{b}l{l}')
    samples = vae.get_prior_samples(num_samples=1000)
    
    temp, energy = scalers['temp'].inverse_transform(samples[:,:,0]), scalers['energy'].inverse_transform(samples[:,:,1])
    rescaled_samples = np.stack((temp, energy), axis=-1)

    print(b, l)
    np.save(f'../../data/vae_synthetic_data/{model_name}_b{b}l{l}_generated_samples.npy', samples)
    np.save(f'../../data/vae_synthetic_data/{model_name}_b{b}l{l}_rescaled_samples.npy', np.stack((temp, energy),axis=-1))

32 5
4 3
8 10
20 20
24 3
20 3
32 3


<h4> Save best GANs </h4>

In [130]:
batches=[2,4,6,8,10,12,16,20,24,32]
epochs=[100,500,1000,2000,5000]
gans = [best_gans_rf, best_gans_ridge, best_gans_gb]

best_gans = set()
for gan in gans:
    for name, scores in gan.items():
        b, e = map(int, re.findall(r'\d+', name))
        best_gans.add((batches[b], epochs[e]))
        print(scores['mse'], batches[b], epochs[e])
print(best_gans)

0.543925117458353 10 100
0.588335440331856 32 1000
0.493209381480887 10 100
0.503244884750862 16 500
0.501243324386923 4 100
0.507008893967659 24 1000
0.522918219977994 20 2000
0.517151775117143 20 500
{(20, 500), (4, 100), (24, 1000), (20, 2000), (10, 100), (32, 1000), (16, 500)}


In [131]:
for b, e in best_gans:
    try:
        dgan = DGAN.load(f'../../data/models/GAN_models/DGAN_{model_name}_b{b}_e{e}',map_location=torch.device('cpu'))
    except:
        dgan = DGAN.load(f'../../data/models/GAN_models/DGAN_{model_name}_b{b}_e{e}.zip',map_location=torch.device('cpu'))
        
    attributes, samples = dgan.generate_numpy(1000)
    
    temp, energy = scalers['temp'].inverse_transform(samples[:,:,0]), scalers['energy'].inverse_transform(samples[:,:,1])
    rescaled_samples = np.stack((temp, energy),axis=-1)

    print(b, e)
    np.save(f'../../data/gan_synthetic_data/{model_name}_b{b}e{e}_generated_samples.npy', samples)
    np.save(f'../../data/gan_synthetic_data/{model_name}_b{b}e{e}_rescaled_samples.npy', rescaled_samples)

20 500
4 100
24 1000
20 2000
10 100
32 1000
16 500


In [54]:
addr = 'el_v_sky_gb'
df1 = pd.read_csv(f'../../data/models/model_history/DGAN_{addr}_model_results.csv')
df2 = pd.read_csv(f'../../data/models/model_history/testDGAN_{addr}_model_results.csv')

df2.index = df2.index.values + 30

def increm_label(s):
    base, label = s.split(' l')
    label_number = int(label) + 3
    return f"{base} l{label_number}"

df2.iloc[:,0] = df2.iloc[:,0].apply(increm_label)
result = pd.concat([df1, df2])
result.to_csv(f'../../data/models/DGAN_{addr}_model_results.csv')