## Import packages

In [1]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))

if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.manifold import TSNE
from tensorflow.keras.models import load_model
from preprocess import water_postprocess
from scipy import stats
from scipy.stats import mannwhitneyu, wilcoxon, ttest_rel
from performance import metrics_s1_t1

2024-02-03 14:40:57.152637: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "7"

## Preprocess data

In [4]:
# ====== preprocessing parameters ======
n_hours = 72
k = 24 
masked_value = 1e-10
split_1 = 0.7
split_2 = 0.85

In [5]:
train_X_mask, val_X_mask, test_X_mask, \
train_ws_y, val_ws_y, test_ws_y, \
scaler, ws_scaler = water_postprocess(n_hours, k, masked_value, split_1, split_2)

Index(['MEAN_RAIN', 'WS_S4', 'GATE_S25A', 'GATE_S25B', 'GATE_S25B2',
       'GATE_S26_1', 'GATE_S26_2', 'PUMP_S25B', 'PUMP_S26', 'HWS_S25A',
       'HWS_S25B', 'HWS_S26', 'WS_S1', 'TWS_S25A', 'TWS_S25B', 'TWS_S26'],
      dtype='object')


In [6]:
print(train_X_mask.shape, val_X_mask.shape, test_X_mask.shape, train_ws_y.shape, val_ws_y.shape, test_ws_y.shape)

(67435, 96, 16) (14451, 96, 16) (14451, 96, 16) (67435, 96) (14451, 96) (14451, 96)


### Rain threshold

In [7]:
# train_X_mask_rain_acc = np.sum(train_X_mask[:, -24:, 0], axis=1)
# print(train_X_mask_rain_acc.shape)

# test_X_mask_rain_acc = np.sum(test_X_mask[:, -24:, 0], axis=1)
# print(test_X_mask_rain_acc.shape)

train_X_mask_rain_acc = train_X_mask[:, 0, 0]
print(train_X_mask_rain_acc.shape)

val_X_mask_rain_acc = val_X_mask[:, 0, 0]
print(val_X_mask_rain_acc.shape)

test_X_mask_rain_acc = test_X_mask[:, 0, 0]
print(test_X_mask_rain_acc.shape)

(67435,)
(14451,)
(14451,)


In [8]:
percentile = 95

merged_array = np.concatenate((train_X_mask_rain_acc, val_X_mask_rain_acc, test_X_mask_rain_acc))

percentile_rain = np.percentile(merged_array, percentile)

print("{}th Percentile of Daily Rain:".format(percentile), percentile_rain)

95th Percentile of Daily Rain: 0.010711610503493287


### train_X_mask_filter

In [9]:
train_X_mask_extreme = train_X_mask[train_X_mask_rain_acc > percentile_rain]
print(train_X_mask_extreme.shape)

train_ws_y_extreme = train_ws_y[train_X_mask_rain_acc > percentile_rain]
print(train_ws_y_extreme.shape)

(3251, 96, 16)
(3251, 96)


In [10]:
train_X_mask_normal = train_X_mask[train_X_mask_rain_acc <= percentile_rain]
print(train_X_mask_normal.shape)

train_ws_y_normal = train_ws_y[train_X_mask_rain_acc <= percentile_rain]
print(train_ws_y_normal.shape)

(64184, 96, 16)
(64184, 96)


### val_X_mask_filter

In [11]:
val_X_mask_extreme = val_X_mask[val_X_mask_rain_acc > percentile_rain]
print(val_X_mask_extreme.shape)

val_ws_y_extreme = val_ws_y[val_X_mask_rain_acc > percentile_rain]
print(val_ws_y_extreme.shape)

(536, 96, 16)
(536, 96)


In [12]:
val_X_mask_normal = val_X_mask[val_X_mask_rain_acc <= percentile_rain]
print(val_X_mask_normal.shape)

val_ws_y_normal = val_ws_y[val_X_mask_rain_acc <= percentile_rain]
print(val_ws_y_normal.shape)

(13915, 96, 16)
(13915, 96)


### test_X_mask_filter

In [13]:
test_X_mask_extreme = test_X_mask[test_X_mask_rain_acc > percentile_rain]
print(test_X_mask_extreme.shape)

test_ws_y_extreme = test_ws_y[test_X_mask_rain_acc > percentile_rain]
print(test_ws_y_extreme.shape)

(1030, 96, 16)
(1030, 96)


In [14]:
test_X_mask_normal = test_X_mask[test_X_mask_rain_acc <= percentile_rain]
print(test_X_mask_normal.shape)

test_ws_y_normal = test_ws_y[test_X_mask_rain_acc <= percentile_rain]
print(test_ws_y_normal.shape)

(13421, 96, 16)
(13421, 96)


## Test model

In [15]:
# ws_threshold = 2.58
time_index = 0

#### Extreme

In [19]:
# saved_model = load_model('../saved_models_mlp/water_N.h5') 
# saved_model = load_model('../saved_models_mlp/water_E.h5') 


# saved_model = load_model('../saved_models_mlp/water_all_95.h5') 
# saved_model = load_model('../saved_models_mlp/water_all_95_ft.h5')

# saved_model = load_model('../saved_models_mlp/water_all_weighted_IPF_95.h5')
# saved_model = load_model('../saved_models_mlp/water_all_weighted_IPF_95_ft.h5')

# saved_model = load_model('../saved_models_mlp/water_all_weighted_EVT_95.h5')
# saved_model = load_model('../saved_models_mlp/water_all_weighted_EVT_95_ft.h5')

saved_model = load_model('../saved_models_mlp/water_all_weighted_META_95.h5')
# saved_model = load_model('../saved_models_mlp/water_all_weighted_META_95_ft.h5')

yhat_extreme = saved_model.predict(test_X_mask_extreme)
inv_yhat_extreme = ws_scaler.inverse_transform(yhat_extreme)
inv_y_extreme = ws_scaler.inverse_transform(test_ws_y_extreme)
test_errors_extreme = inv_yhat_extreme - inv_y_extreme

metrics_s1_t1(inv_y_extreme.min(), time_index, inv_y_extreme, inv_yhat_extreme, test_errors_extreme)

Peformance when water level is over -0.8199999928474426 ft 

------ MAE & RMSE ------
MAE = 0.161459
RMSE = 0.21477 

------ Max Errors (t+1 at S1) ------
Max Error of Over Estimation: 0.2705958
Max Error of Under Estimation: -0.9244937
Max Abs Error of Under Estimation: 0.9244937 

------ Time # (t+1 at S1) ------
Time# of Over Estimation: 44
Time# of Under Estimation: 986 

------ Area (t+1 at S1) ------
Area of Over Estimation: 2.177920534275472
Area of Under Estimation: -187.90659834444523


#### Normal & Extreme hen water level is over threshold 2.58 feet (95 percentile)

In [61]:
# saved_model = load_model('../saved_models_mlp/water_N_E_95.h5') 
# saved_model = load_model('../saved_models_mlp/water_E_E_95.h5') 


# saved_model = load_model('../saved_models_mlp/water_all_95.h5') 
# saved_model = load_model('../saved_models_mlp/water_all_95_ft.h5')

# saved_model = load_model('../saved_models_mlp/water_all_weighted_IPF_95.h5')
# saved_model = load_model('../saved_models_mlp/water_all_weighted_IPF_95_ft.h5')

# saved_model = load_model('../saved_models_mlp/water_all_weighted_EVT_95.h5')
# saved_model = load_model('../saved_models_mlp/water_all_weighted_EVT_95_ft.h5')

saved_model = load_model('../saved_models_mlp/water_all_weighted_META_95.h5')
# saved_model = load_model('../saved_models_mlp/water_all_weighted_META_95_ft.h5')


yhat = saved_model.predict(test_X_mask)


inv_yhat = ws_scaler.inverse_transform(yhat)
inv_y = ws_scaler.inverse_transform(test_ws_y)
test_errors = inv_yhat - inv_y
metrics_s1_t1(inv_y.min(), time_index, inv_y, inv_yhat, test_errors)

Peformance when water level is over -1.25 ft 

------ MAE & RMSE ------
MAE = 0.08949
RMSE = 0.122161 

------ Max Errors (t+1 at S1) ------
Max Error of Over Estimation: 0.5513456
Max Error of Under Estimation: -0.7480478
Max Abs Error of Under Estimation: 0.7480478 

------ Time # (t+1 at S1) ------
Time# of Over Estimation: 3036
Time# of Under Estimation: 11415 

------ Area (t+1 at S1) ------
Area of Over Estimation: 162.62288455938688
Area of Under Estimation: -926.6571015275549


#### hyperparameter - frozen layers

In [83]:
layers = [1, 5, 9, 13, 17]

for layer in layers:
    print('layer: {}'.format(layer))    
    saved_model = load_model('../saved_models_hyper/water_all_weighted_META_95_ft_{}.h5'.format(layer))
    yhat_extreme = saved_model.predict(test_X_mask_extreme)

    inv_yhat_extreme = ws_scaler.inverse_transform(yhat_extreme)
    inv_y_extreme = ws_scaler.inverse_transform(test_ws_y_extreme)
    test_errors_extreme = inv_yhat_extreme - inv_y_extreme

    print('MAE = {}'.format(float("{:.6f}".format(mae(inv_y_extreme, inv_yhat_extreme)))))
    print('RMSE = {}'.format(float("{:.6f}".format(np.sqrt(mse(inv_y_extreme, inv_yhat_extreme))))))
    print('-------------------------')

layer: 1
MAE = 0.096839
RMSE = 0.129209
-------------------------
layer: 5
MAE = 0.101532
RMSE = 0.143779
-------------------------
layer: 9
MAE = 0.101841
RMSE = 0.140407
-------------------------
layer: 13
MAE = 0.103867
RMSE = 0.14259
-------------------------
layer: 17
MAE = 0.102034
RMSE = 0.140531
-------------------------


### P-values

In [182]:
saved_model = load_model('../saved_models_mlp/water_all_95.h5') 

yhat_extreme = saved_model.predict(test_X_mask_extreme)
inv_yhat_extreme = ws_scaler.inverse_transform(yhat_extreme)
inv_y_extreme = ws_scaler.inverse_transform(test_ws_y_extreme)
test_errors_extreme_ori = inv_yhat_extreme - inv_y_extreme
test_errors_extreme_ori.shape

(1030, 96)

In [183]:
saved_model = load_model('../saved_models_mlp/water_all_weighted_META_95.h5')

yhat_extreme = saved_model.predict(test_X_mask_extreme)
inv_yhat_extreme = ws_scaler.inverse_transform(yhat_extreme)
inv_y_extreme = ws_scaler.inverse_transform(test_ws_y_extreme)
test_errors_extreme_re_meta = inv_yhat_extreme - inv_y_extreme

In [186]:
t_index = -1

# ========= Mann-Whitney U test =========
stat_mann, p_value_mann = mannwhitneyu(test_errors_extreme_ori[:,t_index], test_errors_extreme_re_meta[:,t_index], alternative='two-sided')
print(f"p_value_mann: {p_value_mann:.4e}")


# ========= wilcoxon U test =========
stat_wilcoxon, p_value_wilcoxon = wilcoxon(test_errors_extreme_ori[:, t_index], test_errors_extreme_re_meta[:, t_index])
print(f"p_value_wilcoxon: {p_value_wilcoxon:.4e}")


# ========= t-test =========
t_statistic, p_value = ttest_rel(test_errors_extreme_ori[:,t_index], test_errors_extreme_re_meta[:,t_index])
print(f"p_value_ttest: {p_value:.4e}")

p_value_mann: 6.8135e-17
p_value_wilcoxon: 6.0727e-20
p_value_ttest: 1.3289e-24
