In [6]:
import pandas as pd
pd.options.plotting.backend = "plotly"
pd.options.mode.chained_assignment = None  # default='warn'
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import sklearn.ensemble
from sklearn.linear_model import LinearRegression
import lightgbm as lgb

import datetime
from src.utils import setup_folders, shift1

# have to run pip install -e . before running
from src.utils import setup_folders, return_static_features, return_dynamic_features
from src.features.build_features import build_features, build_features_LSTM, build_features_fullday
from src.models.deterministic.models import RF, LR, LGB, Persistence, my_LSTM
from src.visualization.evaluate import calculate_metric_horizons, calculate_metric_horizon_windows, calculate_metric_horizons_all_models
from src.visualization.visualize import plot_horizons, plot_scatterplot, plot_test_day, plot_mean_std_error_multiple_models

import warnings
warnings.filterwarnings(action="ignore", message=r'.*Use subset.*of np.ndarray is not recommended')

# Select plant + tech

In [7]:
plant = "HPP1"
tech = "agr"

In [8]:
### We load the data
if plant == "Nazeerabad":
    # Load the data
    path = "../../data/processed/Nazeerabad/Nazeerabad_OBS_METEO_30min.csv"
    df = pd.read_csv(path, sep = ";", parse_dates = True, index_col = [0])

elif plant == "HPP1":
    path = "../../data/processed/HPP1/HPP1_OBS_METEO_30min.csv"
    df = pd.read_csv(path, sep = ";", parse_dates = True, index_col = [0])

# Build features

In [10]:
 ### We build the features
features, targets, meteo_features, obs_features  = return_dynamic_features(plant, tech)
dt_train, dt_test, X_train, X_test, Y_train, Y_test, features, horizon_train, horizon_test = build_features(df, features, targets, horizon = None)
print(dt_train.shape, dt_test.shape, X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

(30425,) (12144,) (30425, 29) (12144, 29) (30425,) (12144,)


In [11]:
#features, targets

In [13]:
n_lag = 144; n_out = 48
features, targets, meteo_features, obs_features  = return_dynamic_features(plant, tech)
dt_train_fullday, dt_test_fullday, X_train_fullday, X_test_fullday, Y_train_fullday, Y_test_fullday, features_fullday, targets_fullday = build_features_fullday(df, obs_features, meteo_features, targets, n_lag = n_lag, n_out = n_out, horizon = None)
print(dt_train_fullday.shape, dt_test_fullday.shape, X_train_fullday.shape, X_test_fullday.shape, Y_train_fullday.shape, Y_test_fullday.shape)

(30235,) (12143,) (30235, 864) (12143, 864) (30235, 48) (12143, 48)


In [14]:
n_samples = 1000

dt_train_fullday = dt_train_fullday[:n_samples]
dt_test_fullday = dt_test_fullday[:n_samples]
X_train_fullday = X_train_fullday[:n_samples,:]
X_test_fullday = X_test_fullday[:n_samples,:]
Y_train_fullday = Y_train_fullday[:n_samples]
Y_test_fullday = Y_test_fullday[:n_samples]

In [15]:
RF_model_fullday = RF(dt_train_fullday[:n_samples], dt_test_fullday, X_train_fullday, X_test_fullday, Y_train_fullday, Y_test_fullday, features_fullday, targets_fullday, static = False)
RF_model_fullday.train(plant)

2022-03-21 14:57:50,208 - root - INFO - Starting training of RF model for plant HPP1
2022-03-21 14:57:50,211 - root - INFO - Features: Index(['obs_power_agr(t-99)', 'obs_power_agr(t-98)', 'obs_power_agr(t-97)',
       'obs_power_agr(t-96)', 'obs_power_agr(t-95)', 'obs_power_agr(t-94)',
       'obs_power_agr(t-93)', 'obs_power_agr(t-92)', 'obs_power_agr(t-91)',
       'obs_power_agr(t-90)',
       ...
       'DSWRF(t+17)', 'DSWRF(t+16)', 'DSWRF(t+15)', 'DSWRF(t+14)',
       'DSWRF(t+13)', 'DSWRF(t+12)', 'DSWRF(t+11)', 'DSWRF(t+10)',
       'DSWRF(t+1)', 'DSWRF(t)'],
      dtype='object', length=864)
2022-03-21 14:57:50,213 - root - INFO - Targets: Index(['obs_power_agr(t)', 'obs_power_agr(t+1)', 'obs_power_agr(t+2)',
       'obs_power_agr(t+3)', 'obs_power_agr(t+4)', 'obs_power_agr(t+5)',
       'obs_power_agr(t+6)', 'obs_power_agr(t+7)', 'obs_power_agr(t+8)',
       'obs_power_agr(t+9)', 'obs_power_agr(t+10)', 'obs_power_agr(t+11)',
       'obs_power_agr(t+12)', 'obs_power_agr(t+13)', 

2022-03-21 14:59:52,022 - root - INFO - Model saved to: models/RF/HPP1/RF_HPP1_dynamic_numf864_obs_power_agr(t).sav


In [16]:
### RF
static = False
static_bool = 'static' if static else 'dynamic'
RF_model = RF(dt_train_fullday, dt_test_fullday, X_train_fullday, X_test_fullday, Y_train_fullday, Y_test_fullday, features_fullday, targets_fullday, static = False, fullday = True)
Y_pred_RF = RF_model.test(filename = f"models/RF/{plant}/RF_{plant}_{static_bool}_numf{len(features_fullday)}_{targets_fullday[0]}.sav")

Model loaded from: models/RF/HPP1/RF_HPP1_dynamic_numf864_obs_power_agr(t).sav
Yessir
Predicting full day sequences for test days:
Day 1 out of 20
Day 2 out of 20
Day 3 out of 20
Day 4 out of 20
Day 5 out of 20
Day 6 out of 20
Day 7 out of 20
Day 8 out of 20
Day 9 out of 20
Day 10 out of 20
Day 11 out of 20
Day 12 out of 20
Day 13 out of 20
Day 14 out of 20
Day 15 out of 20
Day 16 out of 20
Day 17 out of 20
Day 18 out of 20
Day 19 out of 20
Day 20 out of 20


In [18]:
Y_pred_RF.shape

(1000,)

In [23]:
dt_test_fullday

DatetimeIndex(['2019-01-04 00:30:00', '2019-01-04 01:00:00',
               '2019-01-04 01:30:00', '2019-01-04 02:00:00',
               '2019-01-04 02:30:00', '2019-01-04 03:00:00',
               '2019-01-04 03:30:00', '2019-01-04 04:00:00',
               '2019-01-04 04:30:00', '2019-01-04 05:00:00',
               ...
               '2019-03-15 15:30:00', '2019-03-15 16:00:00',
               '2019-03-15 16:30:00', '2019-03-15 17:00:00',
               '2019-03-15 17:30:00', '2019-03-15 18:00:00',
               '2019-03-15 18:30:00', '2019-03-15 19:00:00',
               '2019-03-15 19:30:00', '2019-03-15 20:00:00'],
              dtype='datetime64[ns]', length=1000, freq=None)

In [27]:
pd.DataFrame({'Y_pred_RF(full day)': Y_pred_RF, 'Y_true': df.loc[dt_test_fullday, 'obs_power_agr']}, index = dt_test_fullday).plot()