In [None]:
import pandas as pd
import numpy as np
from src.utils import setup_folders
from src.visualization.evaluate import calculate_metric_horizons, calculate_metric_horizon_windows, calculate_metric_horizons_all_models
from src.visualization.visualize import plot_horizons, plot_scatterplot, plot_test_day, plot_mean_std_error_multiple_models
from src.features.build_features import build_features, build_features_LSTM

In [None]:
# change tech for plots here
tech = "wind"

# Nazeerabad

In [None]:
plant = "Nazeerabad"

In [None]:
# We plot timeseries of the available data
if plant == "Nazeerabad":
    path = "../data/processed/Nazeerabad/Hybrid/SCADA_ATM_FC_30min_Hybrid.csv"
    df = pd.read_csv(path, sep = ";", parse_dates = True, index_col = [0])
    
    targets = [f'obs_power_{tech}']
    features = ['fc_ws_101.8m_4km_NIWE_NEW', 'fc_wdir_101.8m_4km_NIWE_NEW'] 
    
elif plant == "HPP1":
    path = "../data/processed/HPP1/HPP1_OBS_METEO_30min.csv"
    df = pd.read_csv(path, sep = ";", parse_dates = True, index_col = [0])
    
    targets = [f'power_obs_{tech}']
    features = ['WINDSPEED_100m', 'WINDDIR_100m']

# Available data Nazeerabad

In [None]:
fig = df.plot(y = ['obs_power_wind', 'obs_power_solar'])
fig.update_layout(title=f"Available power data {plant}", xaxis_title="Date", yaxis_title="Power (MW)")
fig.write_image(f"../reports/figures/Meeting 07_03_2022/available_power_{plant}.svg")

# HPP1

In [None]:
plant = "HPP1"

In [None]:
# We plot timeseries of the available data
if plant == "Nazeerabad":
    path = "../data/processed/Nazeerabad/Hybrid/SCADA_ATM_FC_30min_Hybrid.csv"
    df = pd.read_csv(path, sep = ";", parse_dates = True, index_col = [0])
    
    targets = [f'obs_power_{tech}']
    features = ['fc_ws_101.8m_4km_NIWE_NEW', 'fc_wdir_101.8m_4km_NIWE_NEW'] 
    
elif plant == "HPP1":
    path = "../data/processed/HPP1/HPP1_OBS_METEO_30min.csv"
    df = pd.read_csv(path, sep = ";", parse_dates = True, index_col = [0])
    
    targets = [f'power_obs_{tech}']
    features = ['WINDSPEED_100m', 'WINDDIR_100m']

In [None]:
dt_train, dt_test, X_train, X_test, Xscaler, Y_train, Y_test, Yscaler, horizon_train, horizon_test = build_features(df, features, targets, horizon = None)
print(dt_train.shape, dt_test.shape, X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

# Available data HPP1

In [None]:
fig = df.plot(y = ['power_obs_wind', 'power_obs_solar'])
fig.update_layout(title=f"Available power data {plant}", xaxis_title="Date", yaxis_title="Normalized Power")
fig.write_image(f"../reports/figures/Meeting 07_03_2022/available_power_{plant}.svg")

# Train test days

In [None]:
import plotly.express as px
df_merged = pd.concat([pd.DataFrame({'Y_train':Y_train}, index=dt_train), pd.DataFrame({'Y_test':Y_test}, index=dt_test)])
dt_range = pd.date_range(df_merged.index[0], df_merged.index[-1], freq='30min')
df_padded = pd.DataFrame(index = dt_range).join(df_merged)

In [None]:
fig = df_padded.plot(y = ['Y_train', 'Y_test'])
fig.update_layout(title=f"Train/test split example {plant}", xaxis_title="Date", yaxis_title="Normalized Power")
fig.write_image(f"../reports/figures/Meeting 07_03_2022/train_test_{plant}.svg")

# We load independent and hybrid predictions

In [None]:
hybrid = False
# We load the predictions
hybrid_bool = 'hybrid' if hybrid else 'independent'
filename = f'../reports/{plant}_{hybrid_bool}_{tech}_predictions.csv'
df_preds_independent = pd.read_csv(filename, sep=";", index_col = [0], parse_dates=[0])

hybrid = True
# We load the predictions
hybrid_bool = 'hybrid' if hybrid else 'independent'
filename = f'../reports/{plant}_{hybrid_bool}_{tech}_predictions.csv'
df_preds_hybrid = pd.read_csv(filename, sep=";", index_col = [0], parse_dates=[0])
df_preds_hybrid.columns = [x + "_hybrid" for x in df_preds_hybrid.columns]
df_preds_hybrid = df_preds_hybrid.drop(['Horizon_hybrid'], axis=1)

# We combine the predictions
df_preds = pd.concat([df_preds_independent, df_preds_hybrid], axis=1)

# Time series plot of test days independent models

In [None]:
if tech == "wind":
    fig = plot_test_day(df_preds_independent, 100)
elif tech == "solar":
    fig = plot_test_day(df_preds_independent, 31)
fig.write_image(f"../reports/figures/Meeting 07_03_2022/test_day_{plant}_{tech}.svg")

# Horizon plot for static models

In [None]:
fig = plot_horizons(df_preds, 'rmse', ['Y_pred_LR(static)', 'Y_pred_RF(static)', 'Y_pred_LGB(static)'], vline = 12)
fig.write_image(f"../reports/figures/Meeting 07_03_2022/RMSE_horizon_static_{plant}_{tech}.svg")

# Horizon plot for dynamic models

In [None]:
if tech == "wind":
    fig = plot_horizons(df_preds, 'rmse', ['Y_pred_PST', 'Y_pred_LR(dynamic)', 'Y_pred_RF(dynamic)', 'Y_pred_LGB(dynamic)', 'Y_pred_LSTM(recursive)', 'Y_pred_LSTM(full day)'], vline = 12)
elif tech == "solar":
    fig = plot_horizons(df_preds, 'rmse', ['Y_pred_LR(static)', 'Y_pred_LR(dynamic)', 'Y_pred_RF(dynamic)', 'Y_pred_LGB(dynamic)', 'Y_pred_LSTM(recursive)', 'Y_pred_LSTM(full day)'], vline = 12)
fig.write_image(f"../reports/figures/Meeting 07_03_2022/RMSE_horizon_dynamic_{plant}_{tech}.svg")

# Horizon plot for dynamic + hybrid models

In [None]:
fig = plot_horizons(df_preds, 'rmse', ['Y_pred_RF(dynamic)', 'Y_pred_RF(dynamic)_hybrid','Y_pred_LSTM(full day)', 'Y_pred_LSTM(full day)_hybrid' ], vline = 12)
fig.write_image(f"../reports/figures/Meeting 07_03_2022/RMSE_horizon_hybrid_{plant}_{tech}.svg")

# Mean error + std plots

In [None]:
fig = plot_mean_std_error_multiple_models(df_preds, ['Y_pred_LSTM(full day)', 'Y_pred_LSTM(full day)_hybrid'], vline = 12)
fig.write_image(f"../reports/figures/Meeting 07_03_2022/Mean_error_horizon_hybrid_{plant}_{tech}.svg")

# Matti scatter plot

In [None]:
df_scatter = pd.concat([pd.DataFrame({'ft1': X_test[:,0], 'ft2': X_test[:,1]}, index = dt_test), df_preds.dropna()], axis = 1)

In [None]:
df_scatter.plot.scatter(x = "ft1", y = "Y_pred_RF(static)")

In [None]:
df_scatter.plot.scatter(x = "ft2", y = "Y_pred_RF(static)")

# Create RMSE window tables

In [None]:
hybrid = True
# We load the predictions
hybrid_bool = 'hybrid' if hybrid else 'independent'
filename = f'../reports/{plant}_{hybrid_bool}_{tech}_predictions.csv'
df_preds_hybrid = pd.read_csv(filename, sep=";", index_col = [0], parse_dates=[0])

In [None]:
calculate_metric_horizon_windows(df_preds_independent, 'nrmse', [48]).sort_values(by=[48])

### Hybrid

In [None]:
calculate_metric_horizon_windows(df_preds_hybrid, 'nrmse', [48]).sort_values(by=[48])