In [None]:
import numpy as np
import matplotlib.pyplot as plt
from config.definitions import ROOT_DIR
from utils import utils_gn, utils_noah, utils_ivc, utils_dgrd, utils_models
import importlib
from xgboost import XGBRegressor
importlib.reload(utils_gn)
importlib.reload(utils_noah)
importlib.reload(utils_ivc)
importlib.reload(utils_models)
importlib.reload(utils_dgrd)

In [None]:
# load training raw data
train_raw = utils_gn.read_data(
    path=f"{ROOT_DIR}/data",
    fname="train_1238.pkl"
)

In [None]:
# Create sub-sampling time step codes. Here, for instance, mapping a number  
# n to a time step t (in minutes) means that that 1/n of the total 
# length of the CC voltage vector corresponds to taking measurment
# at the end of every time t. Min and max time are 0.05 and 
# 4 minutes respectively.  

def create_time_steps():

    t = 0.05
    time_steps = []

    while t < 1:
        time_steps.append(t)
        t += 0.05

    return dict(zip(np.arange(len(time_steps))+1, time_steps))


In [None]:
# Check the codings
create_time_steps()

In [None]:
# Cross validation with data from voltage sub-sampling
def ccv_sampling_eol_cross_val():
   
    error_metrics = []
    mae_ci, rmse_ci = [], []

    time_map = create_time_steps()

    for time in time_map.keys():

        tr = utils_gn.FeatureTransformation(n=50, step_size=time)
        X_train, y_train = tr.fit_transform(data=train_raw, targets=['EOL'], with_eol=True)

        # Build model
        params = {'n_estimators': 100, 'reg_alpha': 0.1, 'max_depth': 2}
        model = XGBRegressor(**params)

        # Call k-fold cross-validation on the training set
        val_scores, val_scores_raw = utils_models.kfold_cross_validation(
            X=X_train,
            y=y_train,
            model=model,
            cv=3
        )
        error_metrics.append(list(val_scores.values()))

        # Calculate the 95% CI
        mae_ci.append(utils_models.confidence_interval_any(
            values=val_scores_raw['test_MAE'],
            n_bootstraps=10000,
            alpha=0.05
        ))
        rmse_ci.append(utils_models.confidence_interval_any(
            values=val_scores_raw['test_RMSE'],
            n_bootstraps=10000,
            alpha=0.05
        ))

        print(f'step size: {time_map[time]} done')
    
    utils_gn.dump_data(
        data=(list(time_map.values()), np.array(error_metrics), np.array(mae_ci), np.array(rmse_ci)),
        path=f"{ROOT_DIR}/data",
        fname= "ccv_subsample.pkl"
    )

    return list(time_map.values()), np.array(error_metrics), np.array(mae_ci), np.array(rmse_ci)

In [None]:
time, error, mae_ci, rmse_ci = ccv_sampling_eol_cross_val()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 4))
ax[0].plot(time[::2], error[:, 0][::2], label='EOL: MAE', color='blue')
ax[0].fill_between(time[::2], mae_ci[:, 0][::2], mae_ci[:, 1][::2], color='blue', alpha=0.15, label='MAE: 95% CI')

ax[1].plot(time[::2], error[:, 1][::2], label='EOL: RMSE', color='crimson')
ax[1].fill_between(time[::2], rmse_ci[:, 0][::2], rmse_ci[:, 1][::2], color='crimson', alpha=0.15, label='RMSE: 95% CI')

ax[0].legend()
ax[1].legend()

ax[0].set_xlabel("Sub-sampling time steps (mins)", fontsize=16)
ax[0].set_ylabel("Cross-validation errors (cycles)", fontsize=16)
ax[1].set_xlabel("Sub-sampling time steps (mins)", fontsize=16)