# Model development

Ici on fait nos modèles et prédictions. Le mieux c'est de faire des parties par modèles je pense ?

Il faut aussi qu'on trouve un nomenclature pour les modèles si on les enregistre, afin de garder en tête les différents résultats

In [1]:
import pandas as pd
import seaborn as sns
import openpyxl
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle

In [14]:
from sklearn.model_selection import KFold, train_test_split
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.preprocessing import PolynomialFeatures

import optuna

from vmdpy import VMD

In [3]:
from Functions.helper_functions import * 

In [4]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)
pd.options.mode.chained_assignment = None  # default='warn'

In [5]:
np.random.seed(42)

# Data

In [6]:
train_wp1 = pd.read_csv('Data/Preprocessing/WP1_train_preprocessed.csv', sep=',')
train_wp2 = pd.read_csv('Data/Preprocessing/WP2_train_preprocessed.csv', sep=',')
train_wp3 = pd.read_csv('Data/Preprocessing/WP3_train_preprocessed.csv', sep=',')
train_wp4 = pd.read_csv('Data/Preprocessing/WP4_train_preprocessed.csv', sep=',')
train_wp5 = pd.read_csv('Data/Preprocessing/WP5_train_preprocessed.csv', sep=',')
train_wp6 = pd.read_csv('Data/Preprocessing/WP6_train_preprocessed.csv', sep=',')

In [7]:
test_wp1 = pd.read_csv('Data/Preprocessing/WP1_test_preprocessed.csv', sep=',')
test_wp2 = pd.read_csv('Data/Preprocessing/WP2_test_preprocessed.csv', sep=',')
test_wp3 = pd.read_csv('Data/Preprocessing/WP3_test_preprocessed.csv', sep=',')
test_wp4 = pd.read_csv('Data/Preprocessing/WP4_test_preprocessed.csv', sep=',')
test_wp5 = pd.read_csv('Data/Preprocessing/WP5_test_preprocessed.csv', sep=',')
test_wp6 = pd.read_csv('Data/Preprocessing/WP6_test_preprocessed.csv', sep=',')
test_dates = pd.read_csv('Data/Initial/test.csv', sep=',').date.values

In [8]:
to_drop = ['date','wd','forecast_time', 'forecast', "forecast_dist"]

In [9]:
u_to_drop = [
    'u_T_1', 'u_T_2', 'u_T_3', 'u_T_4', 'u_T_5', 'u_T_6', 
    'u_T_2_mean', 'u_T_3_mean', 'u_T_4_mean', 'u_T_5_mean', 'u_T_6_mean', 'u_T_7_mean',
    'u_T_8_mean', 'u_T_9_mean', 'u_T_10_mean', 'u_T_11_mean', 'u_T_12_mean','u_T_24_mean',
    'u_T_2_std', 'u_T_4_std', 'u_T_5_std', 'u_T_6_std',
    'u_T_2_median', 'u_T_3_median', 'u_T_4_median', 'u_T_5_median', 'u_T_6_median', 'u_T_12_median','u_T_24_median', 'u_T_36_median',
    'u_T_2_max', 'u_T_3_max', 'u_T_4_max', 'u_T_5_max', 'u_T_6_max', 'u_T_12_max',
    'u_T_2_min', 'u_T_3_min', 'u_T_4_min', 'u_T_5_min', 'u_T_6_min', 'u_T_12_min',
    'u2_T_1', 'u2_T_2', 'u2_T_3', 'u2_T_4', 'u2_T_5', 'u2_T_6', 
    'u2_T_2_mean', 'u2_T_3_mean', 'u2_T_4_mean', 'u2_T_5_mean', 'u2_T_6_mean', 'u2_T_7_mean',
    'u2_T_8_mean', 'u2_T_9_mean', 'u2_T_10_mean', 'u2_T_11_mean', 'u2_T_12_mean','u2_T_24_mean',
    'u2_T_2_std', 'u2_T_4_std', 'u2_T_5_std', 'u2_T_6_std', 'u2_T_24_std',
    'u2_T_2_median', 'u2_T_3_median', 'u2_T_4_median', 'u2_T_5_median', 'u2_T_6_median', 'u2_T_12_median',
    'u2_T_2_max','u2_T_3_max', 'u2_T_4_max','u2_T_5_max', 'u2_T_6_max', 'u2_T_12_max',
    'u2_T_2_min', 'u2_T_3_min', 'u2_T_4_min', 'u2_T_5_min', 'u2_T_6_min',
    'u2_T_12', 'u2_T_36_mean', 'u2_T_36_std', 'u2_T_24_median', 'u2_T_24_max',
    'u_T_36_mean','u_T_12','u_T_24_max','u2_T_36_median','u_T_24_min'
]
ws_to_drop = [
    'ws_T_1', 'ws_T_2', 'ws_T_3', 'ws_T_4', 'ws_T_5', 'ws_T_6', 'ws_T_7', 'ws_T_8', 'ws_T_10','ws_T_11', 'ws_T_12',
    'ws_T_2_mean', 'ws_T_3_mean', 'ws_T_4_mean', 'ws_T_5_mean', 'ws_T_6_mean', 'ws_T_7_mean', 'ws_T_8_mean', 'ws_T_9_mean', 
    'ws_T_10_mean', 'ws_T_11_mean', 'ws_T_12_mean', 'ws_T_24_mean', 
    'ws_T_2_std', 'ws_T_3_std', 'ws_T_4_std', 'ws_T_5_std', 
    'ws_T_2_median', 'ws_T_3_median', 'ws_T_4_median', 'ws_T_5_median', 'ws_T_6_median',
    'ws_T_12_median', 'ws_T_24_median', 'ws_T_36_median',
    'ws_T_2_max', 'ws_T_3_max', 'ws_T_4_max', 'ws_T_5_max','ws_T_6_max', 'ws_T_12_max',
     'ws_T_2_min', 'ws_T_3_min', 'ws_T_4_min', 'ws_T_5_min', 'ws_T_6_min', 'ws_T_12_min','ws_T_24_max','ws_T_24_min'
]

v_to_drop = [
    'v_T_1', 'v_T_2', 'v_T_3', 'v_T_4', 'v_T_5', 'v_T_6', 
    'v_T_2_mean', 'v_T_3_mean', 'v_T_4_mean', 'v_T_5_mean', 'v_T_6_mean', 'v_T_7_mean',
    'v_T_8_mean', 'v_T_9_mean', 'v_T_10_mean', 'v_T_11_mean', 'v_T_12_mean', 'v_T_24_mean','v_T_36_mean',
    'v_T_3_std', 'v_T_4_std', 'v_T_5_std','v_T_6_std','v_T_24_std', 'v_T_36_median',
    'v_T_2_median', 'v_T_3_median', 'v_T_4_median', 'v_T_5_median', 'v_T_6_median', 
    'v_T_2_max', 'v_T_3_max', 'v_T_4_max', 'v_T_5_max', 'v_T_6_max', 'v_T_12_max', 
    'v_T_2_min', 'v_T_3_min', 'v_T_4_min', 'v_T_5_min', 'v_T_6_min', 'v_T_12_min', 
    'v_T_36_min', 'v_T_36', 'v_T_24_max',  'v_T_12_median', 'v_T_24_median',
]

wd_to_drop = [
    'coswd_1', 'coswd_2', 'coswd_3', 'coswd_4', 'coswd_5', 'coswd_6',
    'coswd_2_mean', 'coswd_3_mean', 'coswd_4_mean', 'coswd_5_mean', 'coswd_6_mean', 'coswd_7_mean', 
    'coswd_8_mean', 'coswd_9_mean', 'coswd_10_mean', 'coswd_11_mean', 'coswd_12_mean', 'coswd_24_mean', 
    'coswd_3_std', 'coswd_4_std','coswd_5_std','coswd_2_median', 'coswd_3_median','coswd_4_median', 
    'coswd_5_median', 'coswd_6_median', 'coswd_36_median', 'coswd_24_median', 'coswd_12_median',
    'coswd_2_max', 'coswd_3_max', 'coswd_4_max', 'coswd_5_max', 'coswd_6_max', 'coswd_12_max', 'coswd_24_max',
    'coswd_2_min', 'coswd_3_min', 'coswd_4_min', 'coswd_5_min', 'coswd_6_min', 'coswd_12_min', 'coswd_24_min',
    'ws_T_36_max', 'ws_T_36_min', 'coswd_12', 'coswd_24'
]

other_to_drop = [
    'cos_day', 'u', 'v'
]

feature_corr = u_to_drop+ws_to_drop+v_to_drop+wd_to_drop+other_to_drop
to_drop = feature_corr+to_drop

# RidgeCV

In [17]:
def ridge_cross_validation(X, y1,y2,y3,y4,y_true,scaler):
    if scaler == 'minmax':
        model1 = Pipeline([('scaler', MinMaxScaler()),('ridge', RidgeCV(alphas = [1e-3, 1e-2, 1e-1, 1]))])
        model2 = Pipeline([('scaler', MinMaxScaler()),('ridge', RidgeCV(alphas = [1e-3, 1e-2, 1e-1, 1]))])
        model3 = Pipeline([('scaler', MinMaxScaler()),('ridge', RidgeCV(alphas = [1e-3, 1e-2, 1e-1, 1]))])
        model4 = Pipeline([('scaler', MinMaxScaler()),('ridge', RidgeCV(alphas = [1e-3, 1e-2, 1e-1, 1]))])
    else:
        model1 = Pipeline([('scaler', StandardScaler()),('ridge', RidgeCV(alphas = [1e-3, 1e-2, 1e-1, 1]))])
        model2 = Pipeline([('scaler', StandardScaler()),('ridge', RidgeCV(alphas = [1e-3, 1e-2, 1e-1, 1]))])
        model3 = Pipeline([('scaler', StandardScaler()),('ridge', RidgeCV(alphas = [1e-3, 1e-2, 1e-1, 1]))])
        model4 = Pipeline([('scaler', StandardScaler()),('ridge', RidgeCV(alphas = [1e-3, 1e-2, 1e-1, 1]))])

    print('-----------RIDGECV CROSS VALIDATION BEGINNING-----------')
    split = 10
    kf = KFold(n_splits=split, shuffle=True)       
    ridge_rmse_scores = []
    ridge_mae_scores = []
    i = 1
    for (train_index, test_index) in kf.split(pd.DataFrame(X), pd.DataFrame(y_true)):
        X_train, X_test = pd.DataFrame(X).iloc[train_index], pd.DataFrame(X).iloc[test_index]
        Y1_train, Y1_test = pd.DataFrame(y1).iloc[train_index],pd.DataFrame(y1).iloc[test_index]
        Y2_train, Y2_test = pd.DataFrame(y2).iloc[train_index],pd.DataFrame(y2).iloc[test_index]
        Y3_train, Y3_test = pd.DataFrame(y3).iloc[train_index],pd.DataFrame(y3).iloc[test_index]
        Y4_train, Y4_test = pd.DataFrame(y4).iloc[train_index],pd.DataFrame(y4).iloc[test_index]
        Y_train, Y_test = pd.DataFrame(y_true).iloc[train_index],pd.DataFrame(y_true).iloc[test_index]

        model1.fit(X_train, Y1_train)
        model2.fit(X_train, Y2_train)
        model3.fit(X_train, Y3_train)
        model4.fit(X_train, Y4_train)

        prediction = model1.predict(X_test)+model2.predict(X_test)+model3.predict(X_test)+model4.predict(X_test)
       
        prediction = [0 if i < 0 else i for i in prediction]
        prediction = [1 if i > 1 else i for i in prediction]
        
        ridge_rmse_scores.append(mean_squared_error(Y_test, prediction,squared=False))
        ridge_mae_scores.append(mean_absolute_error(Y_test, prediction))
        
        print(show_evaluation(prediction, Y_test))
        print(f'-------------------FOLD {i}-----------------')
        i+=1

    print('---------------CROSS VALIDATION COMPLETE-------------')
    print('--------------------------RMSE-----------------------')
    display_scores(ridge_rmse_scores)
    print('--------------------------MAE------------------------')
    display_scores(ridge_mae_scores)

In [12]:
def vmd(y,k):
    
    #Intrinsic mode generation
     #Empirical Mode Decomposition
    #. some sample parameters for VMD  
    alpha = 1       # moderate bandwidth constraint  
    tau = 0.           # noise-tolerance (no strict fidelity enforcement)  
    K = k              # k modes  
    DC = 0             # no DC part imposed  
    init = 1           # initialize omegas uniformly  
    tol = 1e-7
    u, u_hat, omega = VMD(y,alpha, tau, K, DC, init, tol)
    df_vmfs = pd.DataFrame()
    #Integration in the dataframe
    for num, imf in enumerate(u):
        #print('----Creating VMFwp{0} EMD columns----'.format(num+1))
        df_vmfs['IMFwp{0}'.format(num+1)] = imf
    return df_vmfs

## WP1 

In [15]:
wp1_X = train_wp1[[c for c in train_wp1 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X1 = wp1_X.drop('wp', axis=1)
y1 = wp1_X['wp']
vmf_1=vmd(y1,4)

In [18]:
ridge_cross_validation(X1, vmf_1['IMFwp1'],vmf_1['IMFwp2'],vmf_1['IMFwp3'],vmf_1['IMFwp4'],y1, 'minmax')

-----------RIDGECV CROSS VALIDATION BEGINNING-----------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.156869813096763
MAE score: 0.11608810483633923
None
-------------------FOLD 1-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.16118133117827085
MAE score: 0.12203139498523614
None
-------------------FOLD 2-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15672993164830373
MAE score: 0.11670645618668161
None
-------------------FOLD 3-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.1573705350848462
MAE score: 0.11786827983217886
None
-------------------FOLD 4-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15733483937076873
MAE score: 0.1177667194365065
None
-------------------FOLD 5-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15599600840951403
MAE score: 0.11761400189133363
None
-------------------FOLD 6-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.16127411722514484
MAE score: 0.1213429146117129
None
-------------------FOLD 7-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.1548399528050556
MAE score: 0.11708690066717398
None
-------------------FOLD 8-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15405538550962988
MAE score: 0.11597452437407083
None
-------------------FOLD 9-----------------
RMSE score: 0.15855570212790557
MAE score: 0.11724811087065845
None
-------------------FOLD 10-----------------
---------------CROSS VALIDATION COMPLETE-------------
--------------------------RMSE-----------------------
Scores: [0.156869813096763, 0.16118133117827085, 0.15672993164830373, 0.1573705350848462, 0.15733483937076873, 0.15599600840951403, 0.16127411722514484, 0.1548399528050556, 0.15405538550962988, 0.15855570212790557]
Mean: 0.15742076164562024
Std: 0.0022602050937008266
--------------------------MAE------------------------
Scores: [0.11608810483633923, 0.12203139498523614, 0.11670645618668161, 0.11786827983217886, 0.1177667194365065, 0.11761400189133363, 0.1213429146117129, 0.11708690066717398, 0.11597452437407083, 0.11724811087065845]
Mean: 0.11797274076918922
Std: 0.0019613973441101034


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


## WP2

In [19]:
wp2_X = train_wp2[[c for c in train_wp2 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X2 = wp2_X.drop('wp', axis=1)
y2 = wp2_X['wp']
vmf_2=vmd(y2,4)

In [20]:
ridge_cross_validation(X2, vmf_2['IMFwp1'],vmf_2['IMFwp2'],vmf_2['IMFwp3'],vmf_2['IMFwp4'],y2, 'minmax')

-----------RIDGECV CROSS VALIDATION BEGINNING-----------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.16635543822616836
MAE score: 0.12215275934952423
None
-------------------FOLD 1-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.16754111071801567
MAE score: 0.12313641745789505
None
-------------------FOLD 2-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.16827246430041365
MAE score: 0.12427657499978796
None
-------------------FOLD 3-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.17023605297583322
MAE score: 0.12485798108042968
None
-------------------FOLD 4-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.1711156984073127
MAE score: 0.12708908941946048
None
-------------------FOLD 5-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.17002083501292325
MAE score: 0.12537177347744913
None
-------------------FOLD 6-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.17126603401760518
MAE score: 0.12701480656282688
None
-------------------FOLD 7-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.1666711901051543
MAE score: 0.12371012010363393
None
-------------------FOLD 8-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.16944986446068372
MAE score: 0.12586849726254826
None
-------------------FOLD 9-----------------
RMSE score: 0.1713116237042328
MAE score: 0.12671487824672453
None
-------------------FOLD 10-----------------
---------------CROSS VALIDATION COMPLETE-------------
--------------------------RMSE-----------------------
Scores: [0.16635543822616836, 0.16754111071801567, 0.16827246430041365, 0.17023605297583322, 0.1711156984073127, 0.17002083501292325, 0.17126603401760518, 0.1666711901051543, 0.16944986446068372, 0.1713116237042328]
Mean: 0.16922403119283427
Std: 0.0017972154034744813
--------------------------MAE------------------------
Scores: [0.12215275934952423, 0.12313641745789505, 0.12427657499978796, 0.12485798108042968, 0.12708908941946048, 0.12537177347744913, 0.12701480656282688, 0.12371012010363393, 0.12586849726254826, 0.12671487824672453]
Mean: 0.125019289796028
Std: 0.0016137023271018262


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


## WP3

In [21]:
wp3_X = train_wp3[[c for c in train_wp3 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X3 = wp3_X.drop('wp', axis = 1)
y3 = wp3_X['wp']
vmf_3=vmd(y3,4)

In [22]:
ridge_cross_validation(X3,vmf_3['IMFwp1'],vmf_3['IMFwp2'],vmf_3['IMFwp3'],vmf_3['IMFwp4'], y3, 'minmax')

-----------RIDGECV CROSS VALIDATION BEGINNING-----------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.17386146803550923
MAE score: 0.13153033853738744
None
-------------------FOLD 1-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.1699683203052604
MAE score: 0.12878391670369885
None
-------------------FOLD 2-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.1717148689554399
MAE score: 0.1297246200645458
None
-------------------FOLD 3-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.17345019292056948
MAE score: 0.13156637560461232
None
-------------------FOLD 4-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.17412828264630636
MAE score: 0.13206390143801797
None
-------------------FOLD 5-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.17245505000940403
MAE score: 0.13073197901173605
None
-------------------FOLD 6-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.1675763764302087
MAE score: 0.12640589120597653
None
-------------------FOLD 7-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.1721538248645716
MAE score: 0.13045403998662863
None
-------------------FOLD 8-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.17289733755818135
MAE score: 0.13073271340543738
None
-------------------FOLD 9-----------------
RMSE score: 0.17257986997892527
MAE score: 0.12960035669653727
None
-------------------FOLD 10-----------------
---------------CROSS VALIDATION COMPLETE-------------
--------------------------RMSE-----------------------
Scores: [0.17386146803550923, 0.1699683203052604, 0.1717148689554399, 0.17345019292056948, 0.17412828264630636, 0.17245505000940403, 0.1675763764302087, 0.1721538248645716, 0.17289733755818135, 0.17257986997892527]
Mean: 0.1720785591704376
Std: 0.0018757840603775724
--------------------------MAE------------------------
Scores: [0.13153033853738744, 0.12878391670369885, 0.1297246200645458, 0.13156637560461232, 0.13206390143801797, 0.13073197901173605, 0.12640589120597653, 0.13045403998662863, 0.13073271340543738, 0.12960035669653727]
Mean: 0.13015941326545782
Std: 0.0015720010124300621


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


## WP4

In [23]:
wp4_X = train_wp4[[c for c in train_wp4 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X4 = wp4_X.drop('wp', axis = 1)
y4 = wp4_X['wp']
vmf_4=vmd(y4,4)

In [24]:
ridge_cross_validation(X4,vmf_4['IMFwp1'],vmf_4['IMFwp2'], vmf_4['IMFwp3'],vmf_4['IMFwp4'],y4, 'minmax')

-----------RIDGECV CROSS VALIDATION BEGINNING-----------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15723334227507316
MAE score: 0.11855926931422053
None
-------------------FOLD 1-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15472512014700135
MAE score: 0.11708428139133621
None
-------------------FOLD 2-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15661188948558463
MAE score: 0.11900398346444549
None
-------------------FOLD 3-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15689549101248337
MAE score: 0.11974078215944592
None
-------------------FOLD 4-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.1576652432752629
MAE score: 0.11946265110347414
None
-------------------FOLD 5-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15653434686989517
MAE score: 0.11823888939863664
None
-------------------FOLD 6-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15686553525881988
MAE score: 0.11875018656161541
None
-------------------FOLD 7-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15773068263289655
MAE score: 0.11886742960819868
None
-------------------FOLD 8-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15631863320767042
MAE score: 0.11854058117712177
None
-------------------FOLD 9-----------------
RMSE score: 0.15812964477551297
MAE score: 0.11961445267424153
None
-------------------FOLD 10-----------------
---------------CROSS VALIDATION COMPLETE-------------
--------------------------RMSE-----------------------
Scores: [0.15723334227507316, 0.15472512014700135, 0.15661188948558463, 0.15689549101248337, 0.1576652432752629, 0.15653434686989517, 0.15686553525881988, 0.15773068263289655, 0.15631863320767042, 0.15812964477551297]
Mean: 0.15687099289402004
Std: 0.0009042327069335203
--------------------------MAE------------------------
Scores: [0.11855926931422053, 0.11708428139133621, 0.11900398346444549, 0.11974078215944592, 0.11946265110347414, 0.11823888939863664, 0.11875018656161541, 0.11886742960819868, 0.11854058117712177, 0.11961445267424153]
Mean: 0.11878625068527364
Std: 0.0007360735359175467


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


## WP5

In [25]:
wp5_X = train_wp5[[c for c in train_wp5 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X5 = wp5_X.drop('wp', axis = 1)
y5 = wp5_X['wp']
vmf_5=vmd(y5,4)

In [26]:
ridge_cross_validation(X5,vmf_5['IMFwp1'],vmf_5['IMFwp2'],vmf_5['IMFwp3'],vmf_5['IMFwp4'], y5, 'minmax')

-----------RIDGECV CROSS VALIDATION BEGINNING-----------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.17091723058983313
MAE score: 0.13078965900505712
None
-------------------FOLD 1-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.17006159376505697
MAE score: 0.12844730825558645
None
-------------------FOLD 2-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.169333815550362
MAE score: 0.1284225734082065
None
-------------------FOLD 3-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.1698534338268658
MAE score: 0.12859550324895175
None
-------------------FOLD 4-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.16860089998947939
MAE score: 0.12817365339135448
None
-------------------FOLD 5-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.16965263803966313
MAE score: 0.12801800240786826
None
-------------------FOLD 6-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.17608358733904664
MAE score: 0.132888394097698
None
-------------------FOLD 7-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.17261477590457505
MAE score: 0.13067638508906226
None
-------------------FOLD 8-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.17383678529962948
MAE score: 0.1313307745684053
None
-------------------FOLD 9-----------------
RMSE score: 0.17124305901478454
MAE score: 0.13029059901739973
None
-------------------FOLD 10-----------------
---------------CROSS VALIDATION COMPLETE-------------
--------------------------RMSE-----------------------
Scores: [0.17091723058983313, 0.17006159376505697, 0.169333815550362, 0.1698534338268658, 0.16860089998947939, 0.16965263803966313, 0.17608358733904664, 0.17261477590457505, 0.17383678529962948, 0.17124305901478454]
Mean: 0.1712197819319296
Std: 0.0022050636217537923
--------------------------MAE------------------------
Scores: [0.13078965900505712, 0.12844730825558645, 0.1284225734082065, 0.12859550324895175, 0.12817365339135448, 0.12801800240786826, 0.132888394097698, 0.13067638508906226, 0.1313307745684053, 0.13029059901739973]
Mean: 0.129763285248959
Std: 0.0015765323473786875


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


## WP6

In [27]:
wp6_X = train_wp6[[c for c in train_wp6 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X6 = wp6_X.drop('wp', axis = 1)
y6 = wp6_X['wp']
vmf_6=vmd(y6,4)

In [28]:
ridge_cross_validation(X6,vmf_6['IMFwp1'],vmf_6['IMFwp2'],vmf_6['IMFwp3'],vmf_6['IMFwp4'], y6, 'minmax')

-----------RIDGECV CROSS VALIDATION BEGINNING-----------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15142278814103993
MAE score: 0.11523051321442981
None
-------------------FOLD 1-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15266841010068055
MAE score: 0.1158781051501974
None
-------------------FOLD 2-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15347215261293556
MAE score: 0.1172367644176763
None
-------------------FOLD 3-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.14995164935024133
MAE score: 0.11453988205462666
None
-------------------FOLD 4-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15122498133214804
MAE score: 0.11552924527581616
None
-------------------FOLD 5-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15561967169366375
MAE score: 0.11785251897639994
None
-------------------FOLD 6-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.153611314695583
MAE score: 0.11617186283237874
None
-------------------FOLD 7-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.15043017115148902
MAE score: 0.11387367009830125
None
-------------------FOLD 8-----------------


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


RMSE score: 0.1496877872368281
MAE score: 0.11345396299967458
None
-------------------FOLD 9-----------------
RMSE score: 0.14991611393462229
MAE score: 0.11287493276668836
None
-------------------FOLD 10-----------------
---------------CROSS VALIDATION COMPLETE-------------
--------------------------RMSE-----------------------
Scores: [0.15142278814103993, 0.15266841010068055, 0.15347215261293556, 0.14995164935024133, 0.15122498133214804, 0.15561967169366375, 0.153611314695583, 0.15043017115148902, 0.1496877872368281, 0.14991611393462229]
Mean: 0.15180050402492315
Std: 0.001876092125007092
--------------------------MAE------------------------
Scores: [0.11523051321442981, 0.1158781051501974, 0.1172367644176763, 0.11453988205462666, 0.11552924527581616, 0.11785251897639994, 0.11617186283237874, 0.11387367009830125, 0.11345396299967458, 0.11287493276668836]
Mean: 0.11526414577861895
Std: 0.001526639657141864


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


# RidgeCV Predictions

## Functions

In [29]:
to_drop = ['date','wd','forecast_time', 'forecast', "forecast_dist", 'wp']+feature_corr
def make_prediction_dataset(test, to_drop=to_drop):
    test_to_predict = test.dropna(subset=['ws','u','v'], how = 'any') # keeps only lines with u,v,ws,wd
    test_to_predict = test_to_predict[test_to_predict['wp'].isna()] # keeps only lines with no wp
    test_to_predict = test_to_predict.sort_values(by=['date', 'forecast_time'], ascending = [True, False]).drop_duplicates(subset='date')
    test_to_predict = test_to_predict.drop(to_drop, axis = 1)
    return test_to_predict

In [37]:
def make_submission_file(lst_X_trains, lst_y_trains,lst_y_vmfs,lst_tests, lst_models, dates):
    i = 1
    lst_prediction = []
    lst_models_trained = []
    for X, y, test, model in zip(lst_X_trains, lst_y_vmfs, lst_tests, lst_models):
        print(f'--------------Model {i}--------------')
        for n in range(len(model)):
            model[n].fit(X,y[n])
        
        print(f'True:\n\tMin:{min(lst_y_trains[i-1])}\n\tMax:{max(lst_y_trains[i-1])}\n\tMean:{lst_y_trains[i-1].mean()}')
        
        for m in range(len(model)):
            if m==0:
                predictions = model[m].predict(test)
            else:
                predictions += model[m].predict(test)
        print(f'Prediction:\n\tMin:{min(predictions)}\n\tMax:{max(predictions)}\n\tMean:{np.mean(predictions)}')
#         predictions = [min(y) if i < 0 else i for i in predictions]
#         predictions = [max(y) if i > max(y) else i for i in predictions]
        predictions = [0 if i < 0 else i for i in predictions]
        predictions = [1 if i > 1 else i for i in predictions]
        print(f'Prediction corrected:\n\tMin:{min(predictions)}\n\tMax:{max(predictions)}\n\tMean:{np.mean(predictions)}')
        lst_prediction.append(predictions)
        lst_models_trained.append(model)
        i+=1
    
    df_predictions = pd.DataFrame({
        'date': test_dates,
        'wp1': lst_prediction[0],
        'wp2': lst_prediction[1],
        'wp3': lst_prediction[2],
        'wp4': lst_prediction[3],
        'wp5': lst_prediction[4],
        'wp6': lst_prediction[5],        
    })
    return df_predictions, lst_models_trained

## Submission

In [38]:
model_1=[]
model_2=[]
model_3=[]
model_4=[]
model_5=[]
model_6=[]
for i in range(4):
    model = Pipeline([('scaler', MinMaxScaler()),('ridge', RidgeCV(alphas = [1e-3, 1e-2, 1e-1, 1]))])
    model_1+=[model]
    model_2+=[model]
    model_3+=[model]
    model_4+=[model]
    model_5+=[model]
    model_6+=[model]

In [39]:
y_vmf1=[]
y_vmf2=[]
y_vmf3=[]
y_vmf4=[]
y_vmf5=[]
y_vmf6=[]
for i in range(1,5):
    y_vmf1.append(vmf_1['IMFwp'+str(i)])
    y_vmf2.append(vmf_2['IMFwp'+str(i)])
    y_vmf3.append(vmf_3['IMFwp'+str(i)])
    y_vmf4.append(vmf_4['IMFwp'+str(i)])
    y_vmf5.append(vmf_5['IMFwp'+str(i)])
    y_vmf6.append(vmf_6['IMFwp'+str(i)])

In [40]:
lst_tests = []
for test in [test_wp1, test_wp2, test_wp3, test_wp4, test_wp5, test_wp6]:
    test = make_prediction_dataset(test)
    lst_tests.append(test)

In [41]:
lst_models = [model_1, model_2, model_3, model_4, model_5, model_6]
lst_X_trains = [X1, X2, X3, X4, X5, X6]
lst_y_trains = [y1, y2, y3, y4, y5, y6]
lst_y_vmf=[y_vmf1, y_vmf2, y_vmf3, y_vmf4, y_vmf5, y_vmf6]

In [43]:
df_predictions, lst_models_trained = make_submission_file(lst_X_trains, lst_y_trains,lst_y_vmf, lst_tests, lst_models, test_dates)

--------------Model 1--------------
True:
	Min:0.0
	Max:0.96
	Mean:0.2845981952075702
Prediction:
	Min:-0.4776566179978518
	Max:1.4000487522397345
	Mean:0.3034715395479862
Prediction corrected:
	Min:0
	Max:1
	Mean:0.31067383517354935
--------------Model 2--------------
True:
	Min:0.0
	Max:0.966
	Mean:0.25890153769841273
Prediction:
	Min:-0.3097962802643937
	Max:1.3228111840792058
	Mean:0.26061721712727076
Prediction corrected:
	Min:0
	Max:1
	Mean:0.2673437814799184
--------------Model 3--------------
True:
	Min:0.0
	Max:0.989
	Mean:0.2625247252747253
Prediction:
	Min:-0.36520407802891036
	Max:1.562474852464969
	Mean:0.3008197933316526
Prediction corrected:
	Min:0
	Max:1
	Mean:0.30435408529076197
--------------Model 4--------------
True:
	Min:0.0
	Max:0.992
	Mean:0.2763637820512821
Prediction:
	Min:-0.4256949093907255
	Max:1.1674096120226487
	Mean:0.2889001861895937
Prediction corrected:
	Min:0
	Max:1
	Mean:0.29902055160819435
--------------Model 5--------------
True:
	Min:0.0
	Max:0.97

In [45]:
df_predictions.to_csv('Predictions/submission_nb_40_full_vmd_ridgecv-featselect.csv', index=False, sep=';')