In [1]:
import pandas as pd
import seaborn as sns
import openpyxl
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle

In [2]:
from sklearn.model_selection import KFold, train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler, RobustScaler
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error
import optuna

In [3]:
from sklearn.ensemble import StackingRegressor, RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor
from sklearn.linear_model import ElasticNet, Lasso,  BayesianRidge, LassoLarsIC, RidgeCV, Ridge
from sklearn.kernel_ridge import KernelRidge
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.svm import LinearSVR
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from vmdpy import VMD

In [4]:
from Functions.helper_functions import * 

In [5]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)
pd.options.mode.chained_assignment = None  # default='warn'

# Data import

In [6]:
train_wp1 = pd.read_csv('Data/Preprocessing/WP1_train_preprocessed.csv', sep=',')
train_wp2 = pd.read_csv('Data/Preprocessing/WP2_train_preprocessed.csv', sep=',')
train_wp3 = pd.read_csv('Data/Preprocessing/WP3_train_preprocessed.csv', sep=',')
train_wp4 = pd.read_csv('Data/Preprocessing/WP4_train_preprocessed.csv', sep=',')
train_wp5 = pd.read_csv('Data/Preprocessing/WP5_train_preprocessed.csv', sep=',')
train_wp6 = pd.read_csv('Data/Preprocessing/WP6_train_preprocessed.csv', sep=',')

In [7]:
test_wp1 = pd.read_csv('Data/Preprocessing/WP1_test_preprocessed.csv', sep=',')
test_wp2 = pd.read_csv('Data/Preprocessing/WP2_test_preprocessed.csv', sep=',')
test_wp3 = pd.read_csv('Data/Preprocessing/WP3_test_preprocessed.csv', sep=',')
test_wp4 = pd.read_csv('Data/Preprocessing/WP4_test_preprocessed.csv', sep=',')
test_wp5 = pd.read_csv('Data/Preprocessing/WP5_test_preprocessed.csv', sep=',')
test_wp6 = pd.read_csv('Data/Preprocessing/WP6_test_preprocessed.csv', sep=',')
test_dates = pd.read_csv('Data/Initial/test.csv', sep=',').date.values

In [8]:
to_drop = ['date','wd','forecast_time', 'forecast', "forecast_dist"]

# Functions

In [9]:
def stacking_scaled_cross_validation(X, y, model):
    if model == None:
        return None

    print('-----------STACKING CROSS VALIDATION BEGINNING-----------')
    split = 10
    kf = KFold(n_splits=split, shuffle=True)       
    stack_rmse_scores = []
    stack_mae_scores = []
    i = 1
    for (train_index, test_index) in kf.split(pd.DataFrame(X), pd.DataFrame(y)):
        X_train, X_test = pd.DataFrame(X).iloc[train_index], pd.DataFrame(X).iloc[test_index]
        Y_train, Y_test = pd.DataFrame(y).iloc[train_index],pd.DataFrame(y).iloc[test_index]

        model.fit(X_train, Y_train)

        prediction = model.predict(X_test)
        stack_rmse_scores.append(mean_squared_error(Y_test, prediction,squared=False))
        stack_mae_scores.append(mean_absolute_error(Y_test, prediction))
        
        print(show_evaluation(prediction, Y_test))
        print(f'-------------------FOLD {i}-----------------')
        i+=1

    print('---------------CROSS VALIDATION COMPLETE-------------')
    print('--------------------------RMSE-----------------------')
    display_scores(stack_rmse_scores)
    print('--------------------------MAE------------------------')
    display_scores(stack_mae_scores)

In [10]:
# Blending ensemble 
def stacking_model(xgb_params_1, xgb_params_2, lgbm_params_1, lgbm_params_2):
    estimators = [
#         ('lr', RidgeCV()),
#         ('svr', LinearSVR(random_state=42)),
#         ('lgbm-1', Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**lgbm_params_1))])),
#         ('lgbm-2', Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**lgbm_params_2))])),        
#         ('xgb-1', Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**xgb_params_1))])),
#         ('xgb-2', Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**xgb_params_2))])),
        ('gboost', GradientBoostingRegressor(n_estimators=100, 
                                             learning_rate=0.05, 
                                             max_depth=7, max_features='sqrt', 
                                             min_samples_leaf=15, 
                                             min_samples_split=10, 
                                             loss='huber', random_state =5)),
        ('xtree', ExtraTreesRegressor(n_estimators=200)),
        ('lasso', Pipeline([('scaler', MaxAbsScaler()),('xgb', Lasso(alpha =0.0005, random_state=1))])),
        ('KRR', KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)),
        ('ENet', Pipeline([('scaler', RobustScaler()), ('Eet', ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))])), 
        ('ridgecv', Pipeline([('scaler', MaxAbsScaler()),('ridgecv', RidgeCV())])),
        ('linearsvr', Pipeline([('scaler', MaxAbsScaler()),('svr', LinearSVR())]))
    ]

    
    
    # # xtratree = ExtraTreesRegressor(n_estimators=100)
# # ridge = make_pipeline(RobustScaler(), RidgeCV())
# # lasso = make_pipeline(RobustScaler(), Lasso(alpha =0.0005, random_state=1))
# # SVR = make_pipeline(RobustScaler(), LinearSVR())
# # KRR = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)
# # ENet = make_pipeline(RobustScaler(), ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))
    
    
    reg = StackingRegressor(
         estimators=estimators,
         final_estimator=LinearSVR(random_state=42),
         verbose = 10,
#          n_jobs = -1
    )
    
#     reg = StackingRegressor(
#          estimators=estimators,
#          final_estimator=XGBRegressor(random_state=42),
#          n_jobs = -1,
#          verbose = 10
#     )
    
#     reg = StackingRegressor(
#          estimators=estimators,
#          final_estimator=RandomForestRegressor(random_state=42),
#          verbose = 10, 
#          n_jobs = -1,
#     )
    
    return reg 

In [11]:
def vmd(y,k):
    
    #Intrinsic mode generation
     #Empirical Mode Decomposition
    #. some sample parameters for VMD  
    alpha = 1       # moderate bandwidth constraint  
    tau = 0.           # noise-tolerance (no strict fidelity enforcement)  
    K = k              # k modes  
    DC = 0             # no DC part imposed  
    init = 1           # initialize omegas uniformly  
    tol = 1e-7
    u, u_hat, omega = VMD(y,alpha, tau, K, DC, init, tol)
    df_vmfs = pd.DataFrame()
    #Integration in the dataframe
    for num, imf in enumerate(u):
        #print('----Creating VMFwp{0} EMD columns----'.format(num+1))
        df_vmfs['IMFwp{0}'.format(num+1)] = imf
    return df_vmfs

# Data preparation and parameters

## WP1

In [12]:
wp1_X = train_wp1[[c for c in train_wp1 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X1 = wp1_X.drop('wp', axis=1)
y1 = wp1_X['wp']
vmf_1=vmd(y1,4)

In [13]:
params_lgbm_1 =   [{
    'reg_alpha': 0.25020407037516895,
    'reg_lambda': 7.183180037262842,
    'colsample_bytree': 1.0,
    'subsample': 1.0,
    'learning_rate': 0.11751089382716717,
    'max_depth': 84,
    'num_leaves': 596,
    'min_child_samples': 15,
}, {
    'reg_alpha': 0.8314449043001416,
    'reg_lambda': 9.093012403173608,
    'colsample_bytree': 0.9,
    'subsample': 0.4,
    'learning_rate': 0.2033256175102991,
    'max_depth': 55,
    'num_leaves': 964,
    'min_child_samples': 25,
}]

params_xgb_1 = [{
    'lambda': 0.3643806022565838,
    'alpha': 0.003650309466012506,
    'colsample_bytree': 0.9640425007241273,
    'subsample': 0.8,
    'learning_rate': 0.052762727588106954,
    'n_estimators': 700,
    'max_depth': 54,
    'min_child_weight': 96,
    'eta': 3.119364108002744e-05,
    'gamma': 5.177778739056542e-05,
    'grow_policy': 'lossguide',
}, {
    'lambda': 2.1359622347936646,
    'alpha': 0.016202766042783825,
    'colsample_bytree': 0.8075360516891219,
    'subsample': 0.8,
    'learning_rate': 0.06792370224097045,
    'n_estimators': 320,
    'max_depth': 58,
    'min_child_weight': 102,
    'eta': 6.934521001624072e-05,
    'gamma': 4.369012735807193e-06,
    'grow_policy': 'lossguide',
}]

In [14]:
model_1 = stacking_model(params_xgb_1[0], params_xgb_1[1], params_lgbm_1[0], params_lgbm_1[1])

In [15]:
stacking_scaled_cross_validation(X1, vmf_1['IMFwp1'], model_1)

-----------STACKING CROSS VALIDATION BEGINNING-----------


MemoryError: Unable to allocate 16.6 GiB for an array with shape (47174, 47174) and data type float64

## WP2

In [14]:
wp2_X = train_wp2[[c for c in train_wp2 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X2 = wp2_X.drop('wp', axis=1)
y2 = wp2_X['wp']

In [15]:
params_lgbm_2 =   [{
    'reg_alpha': 0.18268883436586145,
    'reg_lambda': 0.15916821051528962,
    'colsample_bytree': 1.0,
    'subsample': 0.6,
    'learning_rate': 0.18007000714755378,
    'max_depth': 77,
    'num_leaves': 425,
    'min_child_samples': 10,
    'min_data_per_groups': 19,
}, {
    'reg_alpha': 0.34026994469471555,
    'reg_lambda': 1.1032197453137866,
    'colsample_bytree': 0.9,
    'subsample': 0.6,
    'learning_rate': 0.13414826176962302,
    'max_depth': 81,
    'num_leaves': 987,
    'min_child_samples': 39,
}]

params_xgb_2 = [{
    'lambda': 0.005195058020286749,
    'alpha': 0.15427340616771562,
    'colsample_bytree': 0.4794118698886291,
    'subsample': 0.7,
    'learning_rate': 0.13969003989794868,
    'n_estimators': 583,
    'max_depth': 20,
    'min_child_weight': 81,
    'eta': 0.0006994052800675432,
    'gamma': 4.0927842177131904e-08,
    'grow_policy': 'depthwise',
}, {
    'lambda': 4.982427302967441,
    'alpha': 0.023879453147379343,
    'colsample_bytree': 0.29850970311481473,
    'subsample': 0.7,
    'learning_rate': 0.07986759823219342,
    'n_estimators': 634,
    'max_depth': 52,
    'min_child_weight': 142,
    'eta': 0.9698508070965183,
    'gamma': 6.168834828494383e-06,
    'grow_policy': 'depthwise',
}]

In [16]:
model_2 = stacking_model(params_xgb_2[0], params_xgb_2[1], params_lgbm_2[0], params_lgbm_2[1])

## WP3

In [17]:
wp3_X = train_wp3[[c for c in train_wp3 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X3 = wp3_X.drop('wp', axis = 1)
y3 = wp3_X['wp']

In [18]:
params_lgbm_3 = [{
    'reg_alpha': 0.2380367567801365,
    'reg_lambda': 0.005052844767806766,
    'colsample_bytree': 0.9,
    'subsample': 0.5,
    'learning_rate': 0.11958787026894079,
    'max_depth': 41,
    'num_leaves': 690,
}, {
    'reg_alpha': 0.26013926149282945,
    'reg_lambda': 0.002325658512162904,
    'colsample_bytree': 1.0,
    'subsample': 0.7,
    'learning_rate': 0.10619054458258967,
    'max_depth': 83,
    'num_leaves': 647,
    'min_child_samples': 3,
}]   
    
params_xgb_3 = [{
    'lambda': 0.018191871915246106,
    'alpha': 0.2397827070234125,
    'colsample_bytree': 0.4710946041352672,
    'subsample': 0.8,
    'learning_rate': 0.14812785561924302,
    'n_estimators': 688,
    'max_depth': 32,
    'min_child_weight': 218,
    'eta': 6.950960910550952e-08,
    'gamma': 2.0149702062428016e-07,
    'grow_policy': 'lossguide',
}, {
    'lambda': 0.018191871915246106,
    'alpha': 0.2397827070234125,
    'colsample_bytree': 0.4710946041352672,
    'subsample': 0.9,
    'learning_rate': 0.11812785561924302,
    'n_estimators': 400,
    'max_depth': 28,
    'min_child_weight': 220,
    'eta': 6.950960910550952e-08,
    'gamma': 2.0149702062428016e-07,
    'grow_policy': 'lossguide',
}]

In [19]:
model_3 = stacking_model(params_xgb_3[0], params_xgb_3[1], params_lgbm_3[0], params_lgbm_3[1])

## WP4

In [20]:
wp4_X = train_wp4[[c for c in train_wp4 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X4 = wp4_X.drop('wp', axis = 1)
y4 = wp4_X['wp']

In [21]:
params_lgbm_4 = [{
    'reg_alpha': 0.08714703614419553,
    'reg_lambda': 9.983645262139024,
    'colsample_bytree': 0.9,
    'subsample': 0.8,
    'learning_rate': 0.13413154768816146,
    'max_depth': 41,
    'num_leaves': 613,
    'min_child_samples': 15,
}, {
    'reg_alpha': 0.15331128149569725,
    'reg_lambda': 0.28560184971009756,
    'colsample_bytree': 0.7,
    'subsample': 0.5,
    'learning_rate': 0.11430869527789024,
    'max_depth': 24,
    'num_leaves': 856,
    'min_child_samples': 14,
}]

params_xgb_4 = [{
    'lambda': 0.13763482520556616,
    'alpha': 0.0010077676339636944,
    'colsample_bytree': 0.954734556572597,
    'subsample': 0.8,
    'learning_rate': 0.05499114408834853,
    'n_estimators': 546,
    'max_depth': 43,
    'min_child_weight': 94,
    'eta': 1.2784286267654713e-06,
    'gamma': 1.6935174502873177e-05,
    'grow_policy': 'depthwise',
}, {
    'lambda': 0.001340947773207149,
    'alpha': 0.002479638085657274,
    'colsample_bytree': 0.3030181981060389,
    'subsample': 0.7,
    'learning_rate': 0.07696248319007938,
    'n_estimators': 367,
    'max_depth': 31,
    'min_child_weight': 72,
    'eta': 3.704957186572025e-08,
    'gamma': 8.44315434172209e-05,
    'grow_policy': 'depthwise',
}]

In [22]:
model_4 = stacking_model(params_xgb_4[0], params_xgb_4[1], params_lgbm_4[0], params_lgbm_4[1])

## WP5

In [23]:
wp5_X = train_wp5[[c for c in train_wp5 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X5 = wp5_X.drop('wp', axis = 1)
y5 = wp5_X['wp']

In [24]:
params_lgbm_5 = [{
    'reg_alpha': 0.1420112281892889,
    'reg_lambda': 0.14745955581286027,
    'colsample_bytree': 0.9,
    'subsample': 0.7,
    'learning_rate': 0.15576554024588912,
    'max_depth': 61,
    'num_leaves': 483,
    'min_child_samples': 10,
}, {
    'reg_alpha': 0.04781362061382749,
    'reg_lambda': 9.716980953182604,
    'colsample_bytree': 0.9,
    'subsample': 0.7,
    'learning_rate': 0.14614317149730652,
    'max_depth': 57,
    'num_leaves': 532,
    'min_child_samples': 7,
}]


params_xgb_5 = [{
    'lambda': 4.7653031074423104,
    'alpha': 0.004963619239675007,
    'colsample_bytree': 0.8616303151950829,
    'subsample': 0.8,
    'learning_rate': 0.167247240657064,
    'n_estimators': 509,
    'max_depth': 31,
    'min_child_weight': 73,
    'eta': 0.1392993925005545,
    'gamma': 1.4909263616645174e-07,
    'grow_policy': 'depthwise',
}, {
    'lambda': 4.537995153532639,
    'alpha': 0.15887083612902936,
    'colsample_bytree': 0.35129085402309673,
    'subsample': 0.8,
    'learning_rate': 0.20146110291550628,
    'n_estimators': 354,
    'max_depth': 27,
    'min_child_weight': 91,
    'eta': 0.1963402390178624,
    'gamma': 4.730295821405375e-07,
    'grow_policy': 'lossguide',
}]

In [25]:
model_5 = stacking_model(params_xgb_5[0], params_xgb_5[1], params_lgbm_5[0], params_lgbm_5[1])

## WP6

In [26]:
wp6_X = train_wp6[[c for c in train_wp6 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X6 = wp6_X.drop('wp', axis = 1)
y6 = wp6_X['wp']

In [27]:
params_lgbm_6 = [{
    'reg_alpha': 0.19099691249064502,
    'reg_lambda': 0.3893771552082417,
    'colsample_bytree': 0.6,
    'subsample': 0.7,
    'learning_rate': 0.10214699989265669,
    'max_depth': 70,
    'num_leaves': 903,
    'min_child_samples': 1,
}, {
    'reg_alpha': 0.23451110075396234,
    'reg_lambda': 0.796705483623135,
    'colsample_bytree': 0.9,
    'subsample': 0.4,
    'learning_rate': 0.1561492653707781,
    'max_depth': 67,
    'num_leaves': 998,
    'min_child_samples': 45,
}]

params_xgb_6 = [{
    'lambda': 6.198890709955999,
    'alpha': 0.009212761583335095,
    'colsample_bytree': 0.9364947872025757,
    'subsample': 0.6,
    'learning_rate': 0.0377294321765545,
    'n_estimators': 458,
    'max_depth': 50,
    'min_child_weight': 28,
    'eta': 1.0671149195024988e-08,
    'gamma': 1.4697758952551594e-05,
    'grow_policy': 'depthwise',
}, {
    'lambda': 0.5705269295320163,
    'alpha': 0.06713843687958011,
    'colsample_bytree': 0.8718486759988152,
    'subsample': 0.8,
    'learning_rate': 0.07668854905667996,
    'n_estimators': 582,
    'max_depth': 49,
    'min_child_weight': 143,
    'eta': 9.055710235537663e-07,
    'gamma': 1.111486195598291e-06,
    'grow_policy': 'depthwise',
}]

In [28]:
model_6 = stacking_model(params_xgb_6[0], params_xgb_6[1], params_lgbm_6[0], params_lgbm_6[1])

# Super learner ensemble

In [29]:
# Stacking of multiple models
class StackingAveragedModels(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, base_models, meta_model, n_folds=6):
        self.base_models = base_models
        self.meta_model = meta_model
        self.n_folds = n_folds
   
    # We again fit the data on clones of the original models
    def fit(self, X, y):
        self.base_models_ = [list() for x in self.base_models]
        self.meta_model_ = clone(self.meta_model)
        kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=156)
        
        # Train cloned base models then create out-of-fold predictions
        # that are needed to train the cloned meta-model
        out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models)))
        for i, model in enumerate(self.base_models):
            print(f"-----Model {i}----")
            for train_index, holdout_index in kfold.split(X, y):
                instance = clone(model)
                self.base_models_[i].append(instance)
                instance.fit(X[train_index], y[train_index])
                y_pred = instance.predict(X[holdout_index])
                out_of_fold_predictions[holdout_index, i] = y_pred
                
        # Now train the cloned  meta-model using the out-of-fold predictions as new feature
        self.meta_model_.fit(out_of_fold_predictions, y)
        return self
   
    #Do the predictions of all base models on the test data and use the averaged predictions as 
    #meta-features for the final prediction which is done by the meta-model
    def predict(self, X):
        meta_features = np.column_stack([
            np.column_stack([model.predict(X) 
            for model in base_models]).mean(axis=1) 
            for base_models in self.base_models_ ])
        return self.meta_model_.predict(meta_features)

In [None]:
# Stacking of one model
class StackingSL_1Model(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, base_model, meta_model, n_folds=6):
        self.base_model = base_model
        self.meta_model = meta_model
        self.n_folds = n_folds
   
    # We again fit the data on clones of the original models
    def fit(self, X, y):
        self.base_models_ = []
        self.meta_model_ = clone(self.meta_model)
        kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=156)
        
        # Train cloned base models then create out-of-fold predictions
        # that are needed to train the cloned meta-model
        out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models)))
        
        
        for train_index, holdout_index in kfold.split(X, y):
            instance = clone(model)
            self.base_models_[i].append(instance)
            instance.fit(X[train_index], y[train_index])
            y_pred = instance.predict(X[holdout_index])
            out_of_fold_predictions[holdout_index, i] = y_pred
                
        # Now train the cloned  meta-model using the out-of-fold predictions as new feature
        self.meta_model_.fit(out_of_fold_predictions, y)
        return self
   
    #Do the predictions of all base models on the test data and use the averaged predictions as 
    #meta-features for the final prediction which is done by the meta-model
    def predict(self, X):
        meta_features = np.column_stack([
            np.column_stack([model.predict(X) 
            for model in base_models]).mean(axis=1) 
            for base_models in self.base_models_ ])
        return self.meta_model_.predict(meta_features)

## Used model

In [30]:
# # xtratree = ExtraTreesRegressor(n_estimators=100)
# # ridge = make_pipeline(RobustScaler(), RidgeCV())
# # lasso = make_pipeline(RobustScaler(), Lasso(alpha =0.0005, random_state=1))
# # SVR = make_pipeline(RobustScaler(), LinearSVR())
# # KRR = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)
# # ENet = make_pipeline(RobustScaler(), ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))
# # GBoost = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05,
# #                                    max_depth=4, max_features='sqrt',
# #                                    min_samples_leaf=15, min_samples_split=10, 
# #                                    loss='huber', random_state =5)

# xtratrees = ExtraTreesRegressor(n_estimators=100) 
# ridgecv = Pipeline([('scaler', MaxAbsScaler()),('ridgecv', RidgeCV())])
# linearsvr = Pipeline([('scaler', MaxAbsScaler()),('svr', LinearSVR())])

In [31]:
# def rmsle_cv(model, X, y):
#     kf = KFold(2, shuffle=True, random_state=42).get_n_splits(X)
#     rmse= np.sqrt(-cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv = kf))
#     return(rmse)

In [32]:
# lgbm_a_1 = Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**params_lgbm_1))])
# lgbm_b_1 = Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**params_lgbm_1_bis))])
# xgb_a_1 = Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**params_xgb_1))])
# xgb_b_1 =Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**params_xgb_1_bis))])

# lgbm_a_2 = Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**params_lgbm_2))])
# lgbm_b_2 = Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**params_lgbm_2_bis))])
# xgb_a_2 = Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**params_xgb_2))])
# xgb_b_2 =Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**params_xgb_2_bis))])

# lgbm_a_3 = Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**params_lgbm_3))])
# lgbm_b_3 = Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**params_lgbm_3_bis))])
# xgb_a_3 = Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**params_xgb_3))])
# xgb_b_3 =Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**params_xgb_3_bis))])

# lgbm_a_4 = Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**params_lgbm_4))])
# lgbm_b_4 = Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**params_lgbm_4_bis))])
# xgb_a_4 = Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**params_xgb_4))])
# xgb_b_4 =Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**params_xgb_4_bis))])

# lgbm_a_5 = Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**params_lgbm_5))])
# lgbm_b_5 = Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**params_lgbm_5_bis))])
# xgb_a_5 = Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**params_xgb_5))])
# xgb_b_5 =Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**params_xgb_5_bis))])

# lgbm_a_6 = Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**params_lgbm_6))])
# lgbm_b_6 = Pipeline([('scaler', MaxAbsScaler()),('lgbm', LGBMRegressor(**params_lgbm_6_bis))])
# xgb_a_6 = Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**params_xgb_6))])
# xgb_b_6 =Pipeline([('scaler', MaxAbsScaler()),('xgb', XGBRegressor(**params_xgb_6_bis))])

In [33]:
model_1 = StackingAveragedModels(base_models = (lgbm_a_1, lgbm_a_1, lgbm_a_1, lgbm_a_1, lgbm_a_1), meta_model = RidgeCV(), n_folds=10)
# model_2 = StackingAveragedModels(base_models = (lgbm_a_2, lgbm_b_2, xgb_a_2, xgb_b_2, xtratrees, ridgecv, linearsvr), meta_model = RidgeCV(), n_folds=5)
# model_3 = StackingAveragedModels(base_models = (lgbm_a_3, lgbm_b_3, xgb_a_3, xgb_b_3, xtratrees, ridgecv, linearsvr), meta_model = RidgeCV(), n_folds=5)
# model_4 = StackingAveragedModels(base_models = (lgbm_a_4, lgbm_b_4, xgb_a_4, xgb_b_4, xtratrees, ridgecv, linearsvr), meta_model = RidgeCV(), n_folds=5)
# model_5 = StackingAveragedModels(base_models = (lgbm_a_5, lgbm_b_5, xgb_a_5, xgb_b_5, xtratrees, ridgecv, linearsvr), meta_model = RidgeCV(), n_folds=5)
# model_6 = StackingAveragedModels(base_models = (lgbm_a_6, lgbm_b_6, xgb_a_6, xgb_b_6, xtratrees, ridgecv, linearsvr), meta_model = RidgeCV(), n_folds=5)

# Predictions 

In [34]:
to_drop_test = ['date','wd','forecast_time', 'forecast', "forecast_dist", 'wp']
def make_prediction_dataset(test, to_drop=to_drop_test):
    test_to_predict = test.dropna(subset=['ws','u','v'], how = 'any') # keeps only lines with u,v,ws,wd
    test_to_predict = test_to_predict[test_to_predict['wp'].isna()] # keeps only lines with no wp
    test_to_predict = test_to_predict.sort_values(by=['date', 'forecast_time'], ascending = [True, False]).drop_duplicates(subset='date')
    test_to_predict = test_to_predict.drop(to_drop, axis = 1)
    return test_to_predict

In [35]:
def make_submission_file(lst_X_trains, lst_y_trains, lst_tests, lst_models, dates):
    i = 1
    lst_prediction = []
    lst_models_trained = []
    for X, y, test, model in zip(lst_X_trains, lst_y_trains, lst_tests, lst_models):
        print(f'--------------Dataset {i}--------------')
        X = X.to_numpy()
        y = y.to_numpy()
        test = test.to_numpy()
        model.fit(X, y)
        print(f'True:\n\tMin:{min(y)}\n\tMax:{max(y)}\n\tMean:{y.mean()}')
        predictions = model.predict(test)
        print(f'Prediction:\n\tMin:{min(predictions)}\n\tMax:{max(predictions)}\n\tMean:{np.mean(predictions)}')
        predictions = [min(y) if i < 0 else i for i in predictions]
        predictions = [max(y) if i > max(y) else i for i in predictions]
        print(f'Prediction corrected:\n\tMin:{min(predictions)}\n\tMax:{max(predictions)}\n\tMean:{np.mean(predictions)}')
        lst_prediction.append(predictions)
        lst_models_trained.append(model)
        i+=1
    
    df_predictions = pd.DataFrame({
        'date': test_dates,
        'wp1': lst_prediction[0],
        'wp2': lst_prediction[1],
        'wp3': lst_prediction[2],
        'wp4': lst_prediction[3],
        'wp5': lst_prediction[4],
        'wp6': lst_prediction[5],        
    })
    return df_predictions, lst_models_trained

In [36]:
lst_models = [model_1, model_2, model_3, model_4, model_5, model_6]
lst_X_trains = [X1, X2, X3, X4, X5, X6]
lst_y_trains = [y1, y2, y3, y4, y5, y6]

In [37]:
lst_tests = []
for test in [test_wp1, test_wp2, test_wp3, test_wp4, test_wp5, test_wp6]:
    test = make_prediction_dataset(test)
    lst_tests.append(test)

## Submission

In [38]:
df_predictions, lst_models_trained = make_submission_file(lst_X_trains, lst_y_trains, lst_tests, lst_models, test_dates)

--------------Dataset 1--------------


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed: 17.4min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 34.1min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 50.6min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 67.1min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 83.5min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 83.5min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  6.8min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 13.9min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 20.8min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 27.7min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elap

True:
	Min:0.0
	Max:0.96
	Mean:0.2845981952075702
Prediction:
	Min:-0.030387654738908663
	Max:1.0710022895516644
	Mean:0.2978373449945766
Prediction corrected:
	Min:0.0
	Max:0.96
	Mean:0.2984618040749219
--------------Dataset 2--------------


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed: 16.3min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 32.5min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 48.7min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 65.0min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 81.2min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 81.2min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  7.2min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 14.5min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 21.8min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 29.0min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elap

True:
	Min:0.0
	Max:0.966
	Mean:0.25890153769841273
Prediction:
	Min:-0.051401744488862804
	Max:0.977216853697399
	Mean:0.2415722699522402
Prediction corrected:
	Min:0.0
	Max:0.966
	Mean:0.24212177774338323
--------------Dataset 3--------------


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed: 16.2min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 32.4min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 48.6min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 64.7min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 80.8min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 80.8min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  7.0min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 14.1min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 21.1min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 28.1min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elap

True:
	Min:0.0
	Max:0.989
	Mean:0.2625247252747253
Prediction:
	Min:-0.05402813628325692
	Max:1.1609366763403686
	Mean:0.2827977314362653
Prediction corrected:
	Min:0.0
	Max:0.989
	Mean:0.2834158511642645
--------------Dataset 4--------------


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed: 16.3min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 32.6min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 48.9min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 65.2min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 81.5min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 81.5min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  7.0min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 14.4min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 21.8min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 28.9min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elap

True:
	Min:0.0
	Max:0.992
	Mean:0.2763637820512821
Prediction:
	Min:-0.05196724211457392
	Max:1.0412599681581043
	Mean:0.2834744861676652
Prediction corrected:
	Min:0.0
	Max:0.992
	Mean:0.2851371106669493
--------------Dataset 5--------------


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed: 16.4min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 32.8min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 50.0min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 67.0min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 83.9min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 83.9min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  7.1min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 14.1min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 21.2min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 28.1min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elap

True:
	Min:0.0
	Max:0.978
	Mean:0.32622119200244204
Prediction:
	Min:-0.058406840100628785
	Max:1.0818159999649244
	Mean:0.33969518396240067
Prediction corrected:
	Min:0.0
	Max:0.978
	Mean:0.3393782859311205
--------------Dataset 6--------------


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed: 16.6min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 33.1min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 49.5min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 71.9min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 94.6min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 94.6min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed: 10.2min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 20.6min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 30.8min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 40.9min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elap

True:
	Min:0.0
	Max:0.947
	Mean:0.2425176472832723
Prediction:
	Min:-0.05182612954566682
	Max:1.0326131930789462
	Mean:0.23148321664565236
Prediction corrected:
	Min:0.0
	Max:0.947
	Mean:0.2322190249677836


In [39]:
nb_sub = 31
model = "blending_stacking"
lvl0 = "0-GBoost-RidgeCV-LinearSVR-ExtraTrees-KRR-ENet"
lvl1 = "1-LinearSVR"
prepro = 'MaxAbs for all'
postpro = "Prediction limited by X_train min&max"

In [40]:
df_predictions.to_csv(f'Predictions/submission_nb_{nb_sub}_{model}_{lvl0}_{lvl1}.csv', index=False, sep=';')

In [43]:
f = open(f"Predictions/submission-{nb_sub}_{model}_{lvl0}_{lvl1}.txt", "x")
# f.write(f"params_lgbm_1 = {str(params_lgbm_1)}\nparams_xgb_1 = {str(params_xgb_1)}\n")
# f.write(f"params_lgbm_1_bis = {str(params_lgbm_1_bis)}\nparams_xgb_1_bis = {str(params_xgb_1_bis)}\n\n")
# f.write(f"params_lgbm_2 = {str(params_lgbm_2)}\nparams_xgb_2 = {str(params_xgb_2)}\n")
# f.write(f"params_lgbm_2_bis = {str(params_lgbm_2_bis)}\nparams_xgb_2_bis = {str(params_xgb_2_bis)}\n\n")
# f.write(f"params_lgbm_3 = {str(params_lgbm_3)}\nparams_xgb_3 = {str(params_xgb_3)}\n")
# f.write(f"params_lgbm_3_bis = {str(params_lgbm_3_bis)}\nparams_xgb_3_bis = {str(params_xgb_3_bis)}\n\n")
# f.write(f"params_lgbm_4 = {str(params_lgbm_4)}\nparams_xgb_4 = {str(params_xgb_4)}\n")
# f.write(f"params_lgbm_4_bis = {str(params_lgbm_4_bis)}\nparams_xgb_4_bis = {str(params_xgb_4_bis)}\n\n")
# f.write(f"params_lgbm_5 = {str(params_lgbm_5)}\nparams_xgb_5 = {str(params_xgb_5)}\n")
# f.write(f"params_lgbm_5_bis = {str(params_lgbm_5_bis)}\nparams_xgb_5_bis = {str(params_xgb_5_bis)}\n\n")
# f.write(f"params_lgbm_6 = {str(params_lgbm_6)}\nparams_xgb_6 = {str(params_xgb_6)}\n")
# f.write(f"params_lgbm_6_bis = {str(params_lgbm_6_bis)}\nparams_xgb_6_bis = {str(params_xgb_6_bis)}\n\n")

f.write("Level 0 estimators:")

f.write("('gboost', GradientBoostingRegressor(n_estimators=3000,\nlearning_rate=0.05,\nmax_depth=4, max_features='sqrt',\nmin_samples_leaf=15, \nmin_samples_split=10, \nloss='huber', random_state =5)),\n\n('xtree', ExtraTreesRegressor(n_estimators=100)),\n('lasso', Pipeline([('scaler', MaxAbsScaler()),('xgb', Lasso(alpha =0.0005, random_state=1))])),\n('KRR', KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)),\n('ENet', Pipeline([('scaler', RobustScaler()), ('Eet', ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))])),\n('ridgecv', Pipeline([('scaler', MaxAbsScaler()),('ridgecv', RidgeCV())])),\n('linearsvr', Pipeline([('scaler', MaxAbsScaler()),('svr', LinearSVR())]))\n")
# f.write(f"xtratree = ExtraTreesRegressor(n_estimators=100)\n ridge = make_pipeline(RobustScaler(), RidgeCV())\n lasso = make_pipeline(RobustScaler(), Lasso(alpha =0.0005, random_state=1))\n SVR = make_pipeline(RobustScaler(), LinearSVR())\n KRR = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)\n ENet = make_pipeline(RobustScaler(), ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))\n GBoost = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05,\nmax_depth=4, max_features='sqrt',\nmin_samples_leaf=15, min_samples_split=10, \nloss='huber', random_state =5)\n")
# f.write(f"2 lgbm and 2 xgboost with parameters above and MaxAbsScaler.\ntratrees = ExtraTreesRegressor(n_estimators=100)\nridgecv = Pipeline([('scaler', MaxAbsScaler()),('ridgecv', RidgeCV())])\nlinearsvr = Pipeline([('scaler', MaxAbsScaler()),('svr', LinearSVR())])")

# f.write(f"Preprocessing: {prepro}\n")
# f.write(f"Postprocessing: {postpro}\n")

f.write(f"Level1 estimator: {lvl1}\n")
f.write(f"Models under the name: {model}_{lvl0}_{lvl1}")
f.close()

In [None]:
pkl_model = f"Models/Stacking/model_1-{model}_{lvl0}_{lvl1}.pkl"
with open(pkl_model, 'wb') as file:
    pickle.dump(lst_models_trained[0], file)
    
    
pkl_model = f"Models/Stacking/model_2-{model}_{lvl0}_{lvl1}.pkl"
with open(pkl_model, 'wb') as file:
    pickle.dump(lst_models_trained[1], file)
    

pkl_model = f"Models/Stacking/model_3-{model}_{lvl0}_{lvl1}.pkl"
with open(pkl_model, 'wb') as file:
    pickle.dump(lst_models_trained[2], file)


pkl_model = f"Models/Stacking/model_4-{model}_{lvl0}_{lvl1}.pkl"
with open(pkl_model, 'wb') as file:
    pickle.dump(lst_models_trained[3], file)


pkl_model = f"Models/Stacking/model_5-{model}_{lvl0}_{lvl1}.pkl"
with open(pkl_model, 'wb') as file:
    pickle.dump(lst_models_trained[4], file)


pkl_model = f"Models/Stacking/model_6-{model}_{lvl0}_{lvl1}.pkl"
with open(pkl_model, 'wb') as file:
    pickle.dump(lst_models_trained[5], file)