## Import packages

In [1]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))

if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from helper import series_to_supervised
from tensorflow.keras.models import load_model
from preprocess import water_postprocess
from scipy.stats import mannwhitneyu, wilcoxon, ttest_rel
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from performance import metrics_s1_t1

2024-02-03 15:26:17.696251: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "7"

## Preprocess data

In [4]:
dataset = pd.read_csv('../data/energy_weather.csv', index_col=0)
# https://www.kaggle.com/datasets/nicholasjhana/energy-consumption-generation-prices-and-weather

dataset.fillna(0, inplace=True)
data = dataset
data.head()

Unnamed: 0_level_0,price,price_dayahead,gen_coal,gen_gas,load_actual,gen_lig,gen_oil,gen_oth_renew,pressure_Barcelona,pressure_Bilbao,...,wind_deg_Bilbao,clouds_all_Bilbao,gen_hyd_river,wind_deg_Seville,wind_speed_Barcelona,wind_speed_Valencia,wind_speed_Bilbao,gen_wind,wind_speed_Madrid,gen_hyd_pump
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-01 00:00:00+00:00,64.92,48.1,4755.0,5196.0,24382.0,328.0,158.0,71.0,1035.0,1035.0,...,229.0,0.0,1009.0,21.0,7.0,1.0,0.0,5890.0,1.0,920.0
2015-01-01 01:00:00+00:00,64.48,47.33,4581.0,4857.0,22734.0,323.0,157.0,73.0,1036.0,1036.0,...,224.0,0.0,973.0,27.0,7.0,0.0,1.0,5461.0,1.0,1164.0
2015-01-01 02:00:00+00:00,59.32,42.27,4131.0,4314.0,21286.0,254.0,160.0,75.0,1036.0,1035.0,...,225.0,0.0,949.0,27.0,7.0,0.0,1.0,5238.0,1.0,1503.0
2015-01-01 03:00:00+00:00,56.04,38.41,3840.0,4130.0,20264.0,187.0,156.0,74.0,1036.0,1035.0,...,221.0,0.0,953.0,27.0,7.0,0.0,1.0,4935.0,1.0,1826.0
2015-01-01 04:00:00+00:00,53.63,35.72,3590.0,4038.0,19905.0,178.0,156.0,74.0,1037.0,1035.0,...,224.0,0.0,952.0,57.0,5.0,2.0,1.0,4618.0,0.0,2109.0


In [5]:
values = data.values

# specify the number of lag hours
n_hours = 24*3
n_features = data.shape[-1]
k = 12
split1 = 0.7
split2 = 0.85

# frame as supervised learning
reframed = series_to_supervised(values, n_hours, k)
print("reframed.shape:", reframed.shape)

reframed.shape: (34980, 2184)


In [6]:
# split into train and test sets
reframed_values = reframed.values
n_train_hours = int(len(reframed_values)*split1)
n_valid_hours = int(len(reframed_values)*split2)

train = reframed_values[:n_train_hours, :]
val = reframed_values[n_train_hours:n_valid_hours, :]
test = reframed_values[n_valid_hours:, :]


# split into input and outputs
n_obs = n_hours * n_features
feature_idx = 0
train_X, train_y = train[:, :n_obs], train[:, [n_obs + feature_idx + n_features * i for i in range(k)]]
val_X, val_y = val[:, :n_obs], val[:, [n_obs + feature_idx + n_features * i for i in range(k)]]
test_X, test_y = test[:, :n_obs], test[:, [n_obs + feature_idx + n_features * i for i in range(k)]]


print("train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape", 
      train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape
     )

train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape (24486, 1872) (24486, 12) (5247, 1872) (5247, 12) (5247, 1872) (5247, 12)


In [7]:
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))

train_X = scaler.fit_transform(train_X)
train_y = scaler.fit_transform(train_y)

val_X = scaler.fit_transform(val_X)
val_y = scaler.fit_transform(val_y)

test_X = scaler.fit_transform(test_X)
test_y = scaler.fit_transform(test_y)

# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], n_hours, n_features))
val_X = val_X.reshape((val_X.shape[0], n_hours, n_features))
test_X = test_X.reshape((test_X.shape[0], n_hours, n_features))

print("train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape", 
      train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape
     )

train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape (24486, 72, 26) (24486, 12) (5247, 72, 26) (5247, 12) (5247, 72, 26) (5247, 12)


### Price threshold

In [8]:
train_X_pm = train_X[:, 0, feature_idx]
print(train_X_pm.shape)

val_X_pm = val_X[:, 0, feature_idx]
print(val_X_pm.shape)

test_X_pm = test_X[:, 0, feature_idx]
print(test_X_pm.shape)

(24486,)
(5247,)
(5247,)


In [9]:
percentile = 95

merged_array = np.concatenate((train_X_pm, val_X_pm, test_X_pm))

percentile_pm = np.percentile(merged_array, percentile)

print("{}th Percentile of Daily Rain:".format(percentile), percentile_pm)

95th Percentile of Daily Rain: 0.7037105569454176


### train_X_filter

In [10]:
train_X_extreme = train_X[train_X_pm > percentile_pm]
print(train_X_extreme.shape)

train_y_extreme = train_y[train_X_pm > percentile_pm]
print(train_y_extreme.shape)

(696, 72, 26)
(696, 12)


In [11]:
train_X_normal = train_X[train_X_pm <= percentile_pm]
print(train_X_normal.shape)

train_y_normal = train_y[train_X_pm <= percentile_pm]
print(train_y_normal.shape)

(23790, 72, 26)
(23790, 12)


### val_X_filter

In [12]:
val_X_extreme = val_X[val_X_pm > percentile_pm]
print(val_X_extreme.shape)

val_y_extreme = val_y[val_X_pm > percentile_pm]
print(val_y_extreme.shape)

(55, 72, 26)
(55, 12)


In [13]:
val_X_normal = val_X[val_X_pm <= percentile_pm]
print(val_X_normal.shape)

val_y_normal = val_y[val_X_pm <= percentile_pm]
print(val_y_normal.shape)

(5192, 72, 26)
(5192, 12)


### test_X_filter

In [14]:
test_X_extreme = test_X[test_X_pm > percentile_pm]
print(test_X_extreme.shape)

test_y_extreme = test_y[test_X_pm > percentile_pm]
print(test_y_extreme.shape)

(998, 72, 26)
(998, 12)


In [15]:
test_X_normal = test_X[test_X_pm <= percentile_pm]
print(test_X_normal.shape)

test_y_normal = test_y[test_X_pm <= percentile_pm]
print(test_y_normal.shape)

(4249, 72, 26)
(4249, 12)


## Test model

In [16]:
# ws_threshold = 2.58
time_index = 0

#### Extreme

In [25]:
# saved_model = load_model('../saved_models_mlp/price_N.h5') 
# saved_model = load_model('../saved_models_mlp/price_E.h5') 


# saved_model = load_model('../saved_models_mlp/price_all.h5') 
# saved_model = load_model('../saved_models_mlp/price_all_95_ft.h5')


# saved_model = load_model('../saved_models_mlp/price_all_weighted_IPF_95.h5')
# saved_model = load_model('../saved_models_mlp/price_all_weighted_IPF_95_ft.h5')


# saved_model = load_model('../saved_models_mlp/price_all_weighted_EVT_95.h5')
# saved_model = load_model('../saved_models_mlp/price_all_weighted_EVT_95_ft.h5')


saved_model = load_model('../saved_models_mlp/price_all_weighted_META_95.h5')
# saved_model = load_model('../saved_models_mlp/price_all_weighted_META_95_ft.h5')


yhat_extreme = saved_model.predict(test_X_extreme)
inv_yhat_extreme = scaler.inverse_transform(yhat_extreme)
inv_y_extreme = scaler.inverse_transform(test_y_extreme)
test_errors_extreme = inv_yhat_extreme - inv_y_extreme

# print('MAE = {}'.format(float("{:.6f}".format(mae(inv_y_extreme, inv_yhat_extreme)))))
# print('RMSE = {}'.format(float("{:.6f}".format(np.sqrt(mse(inv_y_extreme, inv_yhat_extreme))))))

In [26]:
metrics_s1_t1(inv_y_extreme.min(), time_index, inv_y_extreme, inv_yhat_extreme, test_errors_extreme)

Peformance when water level is over 37.76 ft 

------ MAE & RMSE ------
MAE = 4.412101
RMSE = 5.816636 

------ Max Errors (t+1 at S1) ------
Max Error of Over Estimation: 14.609786682128906
Max Error of Under Estimation: -10.526218872070316
Max Abs Error of Under Estimation: 14.609786682128906 

------ Time # (t+1 at S1) ------
Time# of Over Estimation: 582
Time# of Under Estimation: 416 

------ Area (t+1 at S1) ------
Area of Over Estimation: 2106.7555892944347
Area of Under Estimation: -1025.0178630065923


In [36]:
# metrics_s1_t1(inv_y_extreme.min(), time_index, inv_y_extreme, inv_yhat_extreme, test_errors_extreme)

#### Normal & Extreme hen water level is over threshold 2.58 feet (95 percentile)

In [25]:
# saved_model = load_model('../saved_models_mlp/price_all.h5') 
# saved_model = load_model('../saved_models_mlp/price_all_95_ft.h5')


# saved_model = load_model('../saved_models_mlp/price_all_weighted_IPF_95.h5')
# saved_model = load_model('../saved_models_mlp/price_all_weighted_IPF_95_ft.h5')


saved_model = load_model('../saved_models_mlp/price_all_weighted_EVT_95.h5')
# saved_model = load_model('../saved_models_mlp/price_all_weighted_EVT_95_ft.h5',)


# saved_model = load_model('../saved_models_mlp/price_all_weighted_META_95.h5')
# saved_model = load_model('../saved_models_mlp/price_all_weighted_META_95_ft.h5',)


yhat = saved_model.predict(test_X)


inv_yhat = scaler.inverse_transform(yhat)
inv_y = scaler.inverse_transform(test_y)
test_errors = inv_yhat - inv_y

metrics_s1_t1(inv_y.min(), time_index, inv_y, inv_yhat, test_errors)

Peformance when water level is over 37.76 ft 

------ MAE & RMSE ------
MAE = 3.488987
RMSE = 4.53643 

------ Max Errors (t+1 at S1) ------
Max Error of Over Estimation: 14.690802917480468
Max Error of Under Estimation: -13.1334831237793
Max Abs Error of Under Estimation: 14.690802917480468 

------ Time # (t+1 at S1) ------
Time# of Over Estimation: 3199
Time# of Under Estimation: 2047 

------ Area (t+1 at S1) ------
Area of Over Estimation: 8244.625369720463
Area of Under Estimation: -4237.75571945191


In [246]:
# metrics_s1_t1(ws_threshold, time_index, inv_y, inv_yhat, test_errors)

#### hyperparameter - frozen layers

In [27]:
layers = [1, 5, 9, 13, 17]

for layer in layers:
    print('layer: {}'.format(layer))    
    saved_model = load_model('../saved_models_hyper/price_all_weighted_META_ft_{}.h5'.format(layer))
    yhat_extreme = saved_model.predict(test_X_extreme)

    inv_yhat_extreme = scaler.inverse_transform(yhat_extreme)
    inv_y_extreme = scaler.inverse_transform(test_y_extreme)
    test_errors_extreme = inv_yhat_extreme - inv_y_extreme

    print('MAE = {}'.format(float("{:.6f}".format(mae(inv_y_extreme, inv_yhat_extreme)))))
    print('RMSE = {}'.format(float("{:.6f}".format(np.sqrt(mse(inv_y_extreme, inv_yhat_extreme))))))
    print('-------------------------')

layer: 1
MAE = 3.611298
RMSE = 4.682596
-------------------------
layer: 5
MAE = 3.589507
RMSE = 4.729908
-------------------------
layer: 9
MAE = 3.531611
RMSE = 4.62896
-------------------------
layer: 13
MAE = 3.542176
RMSE = 4.636445
-------------------------
layer: 17
MAE = 3.55321
RMSE = 4.662319
-------------------------


### p-values

In [19]:
saved_model = load_model('../saved_models_mlp/price_all.h5') 

yhat_extreme = saved_model.predict(test_X_extreme)
inv_yhat_extreme = scaler.inverse_transform(yhat_extreme)
inv_y_extreme = scaler.inverse_transform(test_y_extreme)
test_errors_extreme_ori = inv_yhat_extreme - inv_y_extreme
test_errors_extreme_ori.shape

2024-01-31 14:21:06.270627: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2024-01-31 14:21:06.271559: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 1700105000 Hz
2024-01-31 14:21:06.800240: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10


(998, 12)

In [20]:
saved_model = load_model('../saved_models_mlp/price_all_weighted_META_95.h5')

yhat_extreme = saved_model.predict(test_X_extreme)
inv_yhat_extreme = scaler.inverse_transform(yhat_extreme)
inv_y_extreme = scaler.inverse_transform(test_y_extreme)
test_errors_extreme_re_meta = inv_yhat_extreme - inv_y_extreme

In [39]:
t_index = 7

# ========= Mann-Whitney U test =========
stat_mann, p_value_mann = mannwhitneyu(test_errors_extreme_ori[:,t_index], test_errors_extreme_re_meta[:,t_index], alternative='two-sided')
print(f"p_value_mann: {p_value_mann:.4e}")


# ========= wilcoxon U test =========
stat_wilcoxon, p_value_wilcoxon = wilcoxon(test_errors_extreme_ori[:, t_index], test_errors_extreme_re_meta[:, t_index])
print(f"p_value_wilcoxon: {p_value_wilcoxon:.4e}")


# ========= t-test =========
t_statistic, p_value = ttest_rel(test_errors_extreme_ori[:,t_index], test_errors_extreme_re_meta[:,t_index])
print(f"p_value_ttest: {p_value:.4e}")

p_value_mann: 8.2778e-02
p_value_wilcoxon: 7.3846e-06
p_value_ttest: 8.5589e-04
