In [1]:
import pandas as pd
import seaborn as sns
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import time

In [2]:
from sklearn.ensemble import StackingRegressor, RandomForestRegressor
from sklearn.linear_model import RidgeCV, Ridge 
from sklearn.svm import LinearSVR
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras.layers import Dense, Dropout,LSTM
from keras import regularizers
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from vmdpy import VMD

In [3]:
from sklearn.model_selection import KFold, train_test_split, cross_val_score

from sklearn.metrics import mean_squared_error, mean_absolute_error
#import optuna

In [4]:
from Functions.helper_functions import * 

In [5]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)
pd.options.mode.chained_assignment = None  # default='warn'

In [6]:
np.random.seed(42)

# Data

In [7]:
start_time = time.time()
train_wp1 = pd.read_csv('Data/Preprocessing/WP1_train_preprocessed.csv', sep=',')
train_wp2 = pd.read_csv('Data/Preprocessing/WP2_train_preprocessed.csv', sep=',')
train_wp3 = pd.read_csv('Data/Preprocessing/WP3_train_preprocessed.csv', sep=',')
train_wp4 = pd.read_csv('Data/Preprocessing/WP4_train_preprocessed.csv', sep=',')
train_wp5 = pd.read_csv('Data/Preprocessing/WP5_train_preprocessed.csv', sep=',')
train_wp6 = pd.read_csv('Data/Preprocessing/WP6_train_preprocessed.csv', sep=',')
print('this cell took',(time.time() - start_time),'s to run, be green, vectorize!')

this cell took 41.00195002555847 s to run, be green, vectorize!


In [8]:
train_wp1_imfs = pd.read_csv('Data/Preprocessing/WP1_train_imfs.csv', sep=',')
train_wp2_imfs = pd.read_csv('Data/Preprocessing/WP2_train_imfs.csv', sep=',')
train_wp3_imfs = pd.read_csv('Data/Preprocessing/WP3_train_imfs.csv', sep=',')
train_wp4_imfs = pd.read_csv('Data/Preprocessing/WP4_train_imfs.csv', sep=',')
train_wp5_imfs = pd.read_csv('Data/Preprocessing/WP5_train_imfs.csv', sep=',')
train_wp6_imfs = pd.read_csv('Data/Preprocessing/WP6_train_imfs.csv', sep=',')

In [9]:
test_wp1 = pd.read_csv('Data/Preprocessing/WP1_test_preprocessed.csv', sep=',')
test_wp2 = pd.read_csv('Data/Preprocessing/WP2_test_preprocessed.csv', sep=',')
test_wp3 = pd.read_csv('Data/Preprocessing/WP3_test_preprocessed.csv', sep=',')
test_wp4 = pd.read_csv('Data/Preprocessing/WP4_test_preprocessed.csv', sep=',')
test_wp5 = pd.read_csv('Data/Preprocessing/WP5_test_preprocessed.csv', sep=',')
test_wp6 = pd.read_csv('Data/Preprocessing/WP6_test_preprocessed.csv', sep=',')
test_dates = pd.read_csv('Data/Initial/test.csv', sep=',').date.values

In [10]:
to_drop = ['date','wd','forecast_time', 'forecast', "forecast_dist"]

# Functions

In [11]:
#Architecture du modèle:
def build_model(data,n_neurons,output_shape,activation):
    model = Sequential()
    model.add(Dense(n_neurons,activation='relu',input_shape=(data.shape[1],)))
    #model.add(Dropout(0.1))
    model.add(Dense(n_neurons/2,activation='relu',))
    #model.add(Dropout(0.1))
    model.add(Dense(n_neurons/4,activation='relu'))
    
    model.add(Dense(output_shape,))

  

    model.compile(loss='mse',
                optimizer='rmsprop',
                metrics=[tf.keras.metrics.RootMeanSquaredError()])
    return model

In [13]:
def dnn_cross_validation(X, y, epochs, n_neurons,output_shape,activation):
    model = build_model(X,n_neurons,output_shape,activation)
    print(model.summary())
    model.save_weights('model.h5')
    scaler_X=MinMaxScaler(feature_range=(y.min(),y.max()))
    X = scaler_X.fit_transform(X)

    print('-----------DNN CROSS VALIDATION BEGINNING-----------')
    split = 10
    kf = KFold(n_splits=split, shuffle=True)       
    dnn_rmse_scores = []
    dnn_mae_scores = []
    i = 1
    for (train_index, test_index) in kf.split(pd.DataFrame(X), pd.DataFrame(y)):
        start_time = time.time()
        X_train, X_test = pd.DataFrame(X).iloc[train_index], pd.DataFrame(X).iloc[test_index]
        Y_train, Y_test = pd.DataFrame(y).iloc[train_index],pd.DataFrame(y).iloc[test_index]

        model.fit(X_train, Y_train,epochs = epochs, verbose=0, shuffle=True)
        if output_shape > 1:
            prediction = model.predict(X_test).reshape(len(X_test),output_shape)
        else:
            prediction = model.predict(X_test).reshape(len(X_test),)
        #print('--first raw prediction--:', prediction[0])
        #print('--first raw y_true:',y.values[0])
        dnn_rmse_scores.append(mean_squared_error(Y_test, prediction,squared=False))
        dnn_mae_scores.append(mean_absolute_error(Y_test, prediction))
        model.load_weights('model.h5')
        print(show_evaluation(prediction, Y_test))
        print('this fold was executed in ',(time.time() - start_time),'seconds')
        print(f'-------------------FOLD {i}-----------------')
        i+=1

    print('---------------CROSS VALIDATION COMPLETE-------------')
    print('--------------------------RMSE-----------------------')
    display_scores(dnn_rmse_scores)
    print('--------------------------MAE------------------------')
    display_scores(dnn_mae_scores)

In [14]:
def dnn_cross_validation_fourmodel(X, y_true, y1,y2,y3,y4, epochs, n_neurons,output_shape,activation):
    
    #print(model.summary())
    #Scaling
    scaler_X1=MinMaxScaler(feature_range=(y1.min(),y1.max()))
    scaler_X2=MinMaxScaler(feature_range=(y2.min(),y2.max()))
    scaler_X3=MinMaxScaler(feature_range=(y3.min(),y3.max()))
    scaler_X4=MinMaxScaler(feature_range=(y4.min(),y4.max()))
    X1 = scaler_X1.fit_transform(X)
    X2 = scaler_X2.fit_transform(X)
    X3 = scaler_X3.fit_transform(X)
    X4 = scaler_X4.fit_transform(X)
    #Complex model for first VMFs, basic one for the remaining
    
    model_1 = build_model(X1,n_neurons,output_shape,activation)
    model_2 = build_model(X2,200,1,activation)
    model_3 = build_model(X3,200,1,activation)
    model_4 = build_model(X4,200,1,activation)
    #Save weight for reset in the loop
    model_1.save_weights('model_1.h5')
    model_2.save_weights('model_2.h5')
    model_3.save_weights('model_3.h5')
    model_4.save_weights('model_4.h5')

    print('-----------DNN CROSS VALIDATION BEGINNING-----------')
    split = 10
    kf = KFold(n_splits=split, shuffle=True)       
    dnn_rmse_scores = []
    dnn_mae_scores = []
    i = 1
    for (train_index, test_index) in kf.split(pd.DataFrame(X), pd.DataFrame(y1)):
        start_time = time.time()
        X1_train, X1_test = pd.DataFrame(X1).iloc[train_index], pd.DataFrame(X1).iloc[test_index]
        X2_train, X2_test = pd.DataFrame(X2).iloc[train_index], pd.DataFrame(X2).iloc[test_index]
        X3_train, X3_test = pd.DataFrame(X3).iloc[train_index], pd.DataFrame(X3).iloc[test_index]
        X4_train, X4_test = pd.DataFrame(X4).iloc[train_index], pd.DataFrame(X4).iloc[test_index]
        Y1_train, Y1_test = pd.DataFrame(y1).iloc[train_index],pd.DataFrame(y1).iloc[test_index]
        Y2_train, Y2_test = pd.DataFrame(y2).iloc[train_index],pd.DataFrame(y2).iloc[test_index]
        Y3_train, Y3_test = pd.DataFrame(y3).iloc[train_index],pd.DataFrame(y3).iloc[test_index]
        Y4_train, Y4_test = pd.DataFrame(y4).iloc[train_index],pd.DataFrame(y4).iloc[test_index]
        Y_train, Y_test = pd.DataFrame(y_true).iloc[train_index],pd.DataFrame(y_true).iloc[test_index]

        model_1.fit(X1_train, Y1_train,epochs = epochs, verbose=0, shuffle=True)
        model_2.fit(X2_train, Y2_train,epochs = 10, verbose=0, shuffle=True)
        model_3.fit(X3_train, Y3_train,epochs = 10, verbose=0, shuffle=True)
        model_4.fit(X4_train, Y4_train,epochs = 10, verbose=0, shuffle=True)
        prediction = (model_1.predict(X1_test).reshape(len(X1_test),)
                      +model_2.predict(X2_test).reshape(len(X2_test),)
                      +model_3.predict(X3_test).reshape(len(X3_test),)
                      +model_4.predict(X4_test).reshape(len(X4_test),))
        #print('--first raw prediction--:', prediction[0])
        #print('--first raw y_true:',y.values[0])
        dnn_rmse_scores.append(mean_squared_error(Y_test, prediction,squared=False))
        dnn_mae_scores.append(mean_absolute_error(Y_test, prediction))
        model_1.load_weights('model_1.h5')
        model_2.load_weights('model_2.h5')
        model_3.load_weights('model_3.h5')
        model_4.load_weights('model_4.h5')
        print(show_evaluation(prediction, Y_test))
        print('this fold was executed in ',(time.time() - start_time),'seconds')
        print(f'-------------------FOLD {i}-----------------')
        i+=1

    print('---------------CROSS VALIDATION COMPLETE-------------')
    print('--------------------------RMSE-----------------------')
    display_scores(dnn_rmse_scores)
    print('--------------------------MAE------------------------')
    display_scores(dnn_mae_scores)

In [15]:
def dnn_cross_validation_double(X, y_imfs,y_true, epochs, n_neurons):
    model_emd = build_model(X,n_neurons,15,'relu')
    model_recompose = build_model(y_imfs,n_neurons,1,'sigmoid')
    print(model_emd.summary())
    print(model_recompose.summary())
    model_emd.save_weights('model.h5')
    model_recompose.save_weights('model_recomp.h5')
    scaler_imfs = StandardScaler()
    y_imfs = scaler_imfs.fit_transform(y_imfs)
    scaler_X = MinMaxScaler(feature_range=(0,1))
    X = scaler_X.fit_transform(X)

    print('-----------DNN CROSS VALIDATION BEGINNING-----------')
    split = 3
    kf = KFold(n_splits=split, shuffle=True)       
    dnn_rmse_scores = []
    dnn_mae_scores = []
    i = 1
    for (train_index, test_index) in kf.split(pd.DataFrame(X), pd.DataFrame(y_true)):
        start_time = time.time()
        print('----Creating dataset----')
        X_train, X_test = pd.DataFrame(X).iloc[train_index], pd.DataFrame(X).iloc[test_index]
        Y_train, Y_test  = pd.DataFrame(y_true).iloc[train_index],pd.DataFrame(y_true).iloc[test_index]
        X_train_imfs, X_test_imfs = pd.DataFrame(y_imfs).iloc[train_index], pd.DataFrame(y_imfs).iloc[test_index]
        Y_train_imfs,Y_test_imfs = pd.DataFrame(y_imfs).iloc[train_index],pd.DataFrame(y_imfs).iloc[test_index]
        print('------First model-----')
        model_emd.fit(X_train, Y_train_imfs,epochs = epochs, verbose=0, shuffle=True)
        prediction_imfs = model_emd.predict(X_test).reshape(len(X_test),15)
        print('------Second model-----')
        model_recompose.fit(X_train_imfs,Y_train, epochs=epochs,verbose=0,shuffle=True)
        prediction_final = model_recompose.predict(prediction_imfs).reshape(len(X_test),)
        
        #print('--first raw prediction--:', prediction[0])
        #print('--first raw y_true:',y.values[0])
        
        dnn_rmse_scores.append(mean_squared_error(Y_test, prediction_final,squared=False))
        dnn_mae_scores.append(mean_absolute_error(Y_test, prediction_final))
        model_emd.load_weights('model.h5')
        model_recompose.load_weights('model_recomp.h5')
        print(show_evaluation(prediction_final, Y_test))
        print('this fold was executed in ',(time.time() - start_time),'seconds')
        print(f'-------------------FOLD {i}-----------------')
        i+=1

    print('---------------CROSS VALIDATION COMPLETE-------------')
    print('--------------------------RMSE-----------------------')
    display_scores(dnn_rmse_scores)
    print('--------------------------MAE------------------------')
    display_scores(dnn_mae_scores)

In [16]:
def dnn_cross_validation_sum(X, y_imfs,y_true, epochs, n_neurons):
    model_emd = build_model(X,n_neurons,15,'relu')
    #model_recompose = build_model(y_imfs,n_neurons,1,'sigmoid')
    print(model_emd.summary())
    #print(model_recompose.summary())
    model_emd.save_weights('model.h5')
    #model_recompose.save_weights('model_recomp.h5')
    #scaler_imfs = StandardScaler()
    #y_imfs = scaler_imfs.fit_transform(y_imfs)
    scaler_X = MinMaxScaler(feature_range=(-1,1))
    scaler_imfs = MinMaxScaler(feature_range=(-1,1))
    X = scaler_X.fit_transform(X)
    y_imfs = scaler_imfs.fit_transform(y_imfs)

    print('-----------DNN CROSS VALIDATION BEGINNING-----------')
    split = 3
    kf = KFold(n_splits=split, shuffle=True)       
    dnn_rmse_scores = []
    dnn_mae_scores = []
    i = 1
    for (train_index, test_index) in kf.split(pd.DataFrame(X), pd.DataFrame(y_true)):
        start_time = time.time()
        print('----Creating dataset----')
        X_train, X_test = pd.DataFrame(X).iloc[train_index], pd.DataFrame(X).iloc[test_index]
        Y_train, Y_test  = pd.DataFrame(y_true).iloc[train_index],pd.DataFrame(y_true).iloc[test_index]
        X_train_imfs, X_test_imfs = pd.DataFrame(y_imfs).iloc[train_index], pd.DataFrame(y_imfs).iloc[test_index]
        Y_train_imfs,Y_test_imfs = pd.DataFrame(y_imfs).iloc[train_index],pd.DataFrame(y_imfs).iloc[test_index]
        print('------First model-----')
        model_emd.fit(X_train, Y_train_imfs,epochs = epochs, verbose=0, shuffle=True)
        prediction_imfs = model_emd.predict(X_test).reshape(len(X_test),15)
        print('------combining results-----')
        #model_recompose.fit(X_train_imfs,Y_train, epochs=epochs,verbose=0,shuffle=True)
        prediction_final = scaler_imfs.inverse_transform(prediction_imfs).sum(axis=1).reshape(len(X_test),)
        
        #print('--first raw prediction--:', prediction[0])
        #print('--first raw y_true:',y.values[0])
        
        dnn_rmse_scores.append(mean_squared_error(Y_test, prediction_final,squared=False))
        dnn_mae_scores.append(mean_absolute_error(Y_test, prediction_final))
        model_emd.load_weights('model.h5')
        #model_recompose.load_weights('model_recomp.h5')
        print(show_evaluation(prediction_final, Y_test))
        print('this fold was executed in ',(time.time() - start_time),'seconds')
        print(f'-------------------FOLD {i}-----------------')
        i+=1

    print('---------------CROSS VALIDATION COMPLETE-------------')
    print('--------------------------RMSE-----------------------')
    display_scores(dnn_rmse_scores)
    print('--------------------------MAE------------------------')
    display_scores(dnn_mae_scores)

In [12]:
def vmd(y,k):
    
    #Intrinsic mode generation
     #Empirical Mode Decomposition
    #. some sample parameters for VMD  
    alpha = 1       # moderate bandwidth constraint  
    tau = 0.           # noise-tolerance (no strict fidelity enforcement)  
    K = k              # k modes  
    DC = 0             # no DC part imposed  
    init = 1           # initialize omegas uniformly  
    tol = 1e-7
    u, u_hat, omega = VMD(y,alpha, tau, K, DC, init, tol)
    df_vmfs = pd.DataFrame()
    #Integration in the dataframe
    for num, imf in enumerate(u):
        #print('----Creating VMFwp{0} EMD columns----'.format(num+1))
        df_vmfs['IMFwp{0}'.format(num+1)] = imf
    return df_vmfs

# WP1

## Data Prep + EMD

In [13]:
wp1_X = train_wp1[[c for c in train_wp1 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X1 = wp1_X.drop('wp', axis=1)
vmfs= vmd(wp1_X.wp,4)
#y1_1 = vmfs['IMFwp1']
#y1_2 = vmfs['IMFwp2']
#y1_3 = vmfs['IMFwp3']
#y1_4 = vmfs['IMFwp4']
y1_true = wp1_X['wp']

## Cross-Val Sums

In [17]:
#dnn_cross_validation_sum(X1,y1,y1_true,100,10)

## Cross_Val embedded models

In [18]:
#dnn_cross_validation_double(X1,y1,y1_true,100,1000)

## Cross-Val

In [49]:
#dnn_cross_validation(X1, y1, 50, 400,1,'relu')
#dnn_cross_validation(X1, y2, 10, 200,1,'relu')
#dnn_cross_validation(X1, y3, 10, 200,1,'relu')
#dnn_cross_validation(X1, y4, 10, 200,1,'relu')
#dnn_cross_validation(y1, y1_true, 2, 20,1,'sigmoid')
dnn_cross_validation_fourmodel(X1, y1_true,y1_1,y1_2,y1_3,y1_4, 50, 400,1,'relu')

-----------DNN CROSS VALIDATION BEGINNING-----------
RMSE score: 0.09732701013489164
MAE score: 0.07116780466721945
None
this fold was executed in  324.46967697143555 seconds
-------------------FOLD 1-----------------
RMSE score: 0.10018372870509974
MAE score: 0.07348860533351562
None
this fold was executed in  287.0926094055176 seconds
-------------------FOLD 2-----------------
RMSE score: 0.098730823994966
MAE score: 0.07186643187861211
None
this fold was executed in  294.97344493865967 seconds
-------------------FOLD 3-----------------
RMSE score: 0.09290383923958416
MAE score: 0.06615979616149946
None
this fold was executed in  290.0992343425751 seconds
-------------------FOLD 4-----------------
RMSE score: 0.12180670780430629
MAE score: 0.08879154508637434
None
this fold was executed in  304.54967522621155 seconds
-------------------FOLD 5-----------------
RMSE score: 0.10341936011223321
MAE score: 0.07768967296367633
None
this fold was executed in  293.27346563339233 seconds
----

# WP2

## Data Prep + EMD

In [14]:
wp2_X = train_wp2[[c for c in train_wp1 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X2 = wp2_X.drop('wp', axis=1)
vmfs_2= vmd(wp2_X.wp,4)
#y2_1 = vmfs_2['IMFwp1']
#y2_2 = vmfs_2['IMFwp2']
#y2_3 = vmfs_2['IMFwp3']
#y2_4 = vmfs_2['IMFwp4']
y2_true = wp2_X['wp']

## Cross-Val

In [54]:
#dnn_cross_validation(X2, y2, 10, 20,len(y2.columns),'relu')
#dnn_cross_validation(y2, y2_true, 10, 20,1,'sigmoid')
dnn_cross_validation_fourmodel(X2, y2_true,y2_1,y2_2,y2_3,y2_4, 50, 400,1,'relu')

-----------DNN CROSS VALIDATION BEGINNING-----------
RMSE score: 0.10114605284002343
MAE score: 0.07438661257283267
None
this fold was executed in  384.53218936920166 seconds
-------------------FOLD 1-----------------
RMSE score: 0.10819166171305307
MAE score: 0.08107454344962511
None
this fold was executed in  332.79622983932495 seconds
-------------------FOLD 2-----------------
RMSE score: 0.1112802208889649
MAE score: 0.08109208780179432
None
this fold was executed in  298.4663395881653 seconds
-------------------FOLD 3-----------------
RMSE score: 0.11345052087325462
MAE score: 0.08342394832102387
None
this fold was executed in  344.33134365081787 seconds
-------------------FOLD 4-----------------
RMSE score: 0.10168832071758209
MAE score: 0.07358053864845107
None
this fold was executed in  324.5699338912964 seconds
-------------------FOLD 5-----------------
RMSE score: 0.10315720100598738
MAE score: 0.07298497608898954
None
this fold was executed in  326.1068608760834 seconds
----

# WP3

## EMD

In [15]:
wp3_X = train_wp3[[c for c in train_wp3 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X3 = wp3_X.drop('wp', axis=1)
vmfs_3= vmd(wp3_X.wp,4)
#y3_1 = vmfs_3['IMFwp1']
#y3_2 = vmfs_3['IMFwp2']
#y3_3 = vmfs_3['IMFwp3']
#y3_4 = vmfs_3['IMFwp4']
y3_true = wp3_X['wp']

## Cross-Val

In [56]:
#dnn_cross_validation(X3, y3, 10, 20,len(y3.columns),'relu')
#dnn_cross_validation(y3, y3_true, 10, 20,1,'sigmoid')
dnn_cross_validation_fourmodel(X3, y3_true,y3_1,y3_2,y3_3,y3_4, 50, 400,1,'relu')

-----------DNN CROSS VALIDATION BEGINNING-----------
RMSE score: 0.08917313611192038
MAE score: 0.06614608783977911
None
this fold was executed in  355.3935639858246 seconds
-------------------FOLD 1-----------------
RMSE score: 0.09057487910469252
MAE score: 0.06635065834775432
None
this fold was executed in  315.4036018848419 seconds
-------------------FOLD 2-----------------
RMSE score: 0.0893588561414929
MAE score: 0.065267571627837
None
this fold was executed in  313.3716928958893 seconds
-------------------FOLD 3-----------------
RMSE score: 0.09053358803864875
MAE score: 0.06530221921298528
None
this fold was executed in  331.3442373275757 seconds
-------------------FOLD 4-----------------
RMSE score: 0.08850560978550209
MAE score: 0.06463404227907561
None
this fold was executed in  339.4361400604248 seconds
-------------------FOLD 5-----------------
RMSE score: 0.08756620818195278
MAE score: 0.06407043426580947
None
this fold was executed in  308.48728346824646 seconds
--------

# WP4

## EMD

In [16]:
wp4_X = train_wp4[[c for c in train_wp4 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X4 = wp4_X.drop('wp', axis=1)
vmfs_4= vmd(wp4_X.wp,4)
#y4_1 = vmfs_4['IMFwp1']
#y4_2 = vmfs_4['IMFwp2']
#y4_3 = vmfs_4['IMFwp3']
#y4_4 = vmfs_4['IMFwp4']
y4_true = wp4_X['wp']

## Cross-Val

In [58]:
#dnn_cross_validation(X4, y4, 10, 20,len(y4.columns),'relu')
#dnn_cross_validation(y4, y4_true, 10, 20,1,'sigmoid')
dnn_cross_validation_fourmodel(X4, y4_true,y4_1,y4_2,y4_3,y4_4, 50, 400,1,'relu')

-----------DNN CROSS VALIDATION BEGINNING-----------
RMSE score: 0.09394177222171435
MAE score: 0.06858893195669231
None
this fold was executed in  316.61136984825134 seconds
-------------------FOLD 1-----------------
RMSE score: 0.09549117068362831
MAE score: 0.0684493118514245
None
this fold was executed in  318.715779542923 seconds
-------------------FOLD 2-----------------
RMSE score: 0.0988361729935827
MAE score: 0.07497429145010169
None
this fold was executed in  308.10666942596436 seconds
-------------------FOLD 3-----------------
RMSE score: 0.09723660754495198
MAE score: 0.07238289809828405
None
this fold was executed in  305.4794225692749 seconds
-------------------FOLD 4-----------------
RMSE score: 0.09661359495802657
MAE score: 0.07098989148372596
None
this fold was executed in  305.7193353176117 seconds
-------------------FOLD 5-----------------
RMSE score: 0.09496139823725876
MAE score: 0.0699240349047061
None
this fold was executed in  308.2600562572479 seconds
--------

# WP5

## EMD

In [17]:
wp5_X = train_wp5[[c for c in train_wp5 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X5 = wp5_X.drop('wp', axis=1)
vmfs_5= vmd(wp5_X.wp,10)
#y5_1 = vmfs_5['IMFwp1']
#y5_2 = vmfs_5['IMFwp2']
#y5_3 = vmfs_5['IMFwp3']
#y5_4 = vmfs_5['IMFwp4']
y5_1imf = train_wp5_imfs['IMFwp1']
y5_true = wp5_X['wp']

## Cross-Val

In [87]:
#Architecture du modèle:
def build_model(data,n_neurons,output_shape,activation):
    model = Sequential()
    model.add(Dense(n_neurons,activation='relu',input_shape=(data.shape[1],)))
    model.add(Dropout(0.1))
    model.add(Dense(n_neurons/2,activation='relu',))
    model.add(Dropout(0.1))
    model.add(Dense(n_neurons/4,activation='relu'))
    #model.add(Dense(n_neurons/8,activation='relu'))
    
    model.add(Dense(output_shape,))

  

    model.compile(loss='mse',
                optimizer='rmsprop',
                metrics=[tf.keras.metrics.RootMeanSquaredError()])
    return model

In [49]:
alpha = 10000       # moderate bandwidth constraint  
tau = 0.           # noise-tolerance (no strict fidelity enforcement)  
K = 4              # k modes  
DC = 0             # no DC part imposed  
init = 1           # initialize omegas uniformly  
tol = 1e-7
vmfs_5= vmd(wp5_X.wp,K,alpha, tau,DC, init, tol)
y5_1 = vmfs_5['IMFwp1']

In [None]:
dnn_cross_validation(X5, y5_1, 5, 10,1,'relu')

In [47]:
#dnn_cross_validation(X5, y5_1, 50, 574,1,'relu')
#dnn_cross_validation(X5, train_wp5_imfs['IMFwp1'], 5, 10,1,'relu')
#dnn_cross_validation(X5, y5_2, 5, 20,1,'relu')
#dnn_cross_validation(X5, y5_3, 5, 20,1,'relu')
#dnn_cross_validation(X5, y5_4, 5, 20,1,'relu')
#dnn_cross_validation(y1, y1_true, 2, 20,1,'sigmoid')

In [25]:
#dnn_cross_validation(X5, y5, 10, 20,len(y5.columns),'relu')
#dnn_cross_validation(y5, y5_true, 10, 20,1,'sigmoid')
#dnn_cross_validation_fourmodel(X5, y5_true,y5_1,y5_2,y5_3,y5_4, 100, 1000,1,'relu')

Le nombre de mode ne semble pas avoir d'influence sur le RMSE (on peut se limiter à 3 ou 4) du premier mode (pas tester sur les autres, peut-être que lorsque K augmente les autres modes sont plus simples à prédire et on diminue l'erreur globale ?)

La structure du réseau de neurones n'a que peu d'influence sur le RMSE avec un nombre d'epochs petit, à 50 epoches l'ajout de deux couches à fait gagner 0,01 sur le RMSE.
Le nombre d'epochs lui oui.

Execution speed: VMD > EEMD.

Les RMSE sur les IMFs sont très bons (bien qu'indépendant du nombre de neurones et du nombre d'epochs -> Weird.) Il faut refaire un IMFs complet et entrainer 1 NN par IMFS (essayer avec des petits modèles 5 epochs et 10 neurones, une seule couche).
On peut tester aussi avec CEEMDAN et FFT.
Il faut tester aussi avec d'autres modèles comme XGBoost

# WP6

In [21]:
wp6_X = train_wp6[[c for c in train_wp6 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X6 = wp6_X.drop('wp', axis=1)
vmfs_6= vmd(wp6_X.wp,4)
y6_true = wp6_X['wp']

## EMD

In [18]:
wp6_X = train_wp6[[c for c in train_wp6 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X6 = wp6_X.drop('wp', axis=1)
vmfs_6= vmd(wp6_X.wp,4)
y6_1 = vmfs_6['IMFwp1']
y6_2 = vmfs_6['IMFwp2']
y6_3 = vmfs_6['IMFwp3']
y6_4 = vmfs_6['IMFwp4']
y6_true = wp6_X['wp']

## Cross-Val

In [19]:
dnn_cross_validation(X6, y6_1, 50, 400,1,'relu')
dnn_cross_validation(X6, y6_2, 10, 200,1,'relu')
dnn_cross_validation(X6, y6_3, 10, 200,1,'relu')
dnn_cross_validation(X6, y6_4, 10, 200,1,'relu')

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_16 (Dense)             (None, 400)               115200    
_________________________________________________________________
dense_17 (Dense)             (None, 200)               80200     
_________________________________________________________________
dense_18 (Dense)             (None, 100)               20100     
_________________________________________________________________
dense_19 (Dense)             (None, 1)                 101       
Total params: 215,601
Trainable params: 215,601
Non-trainable params: 0
_________________________________________________________________
None
-----------DNN CROSS VALIDATION BEGINNING-----------


KeyboardInterrupt: 

In [62]:
#dnn_cross_validation(X6, y6, 10, 20,len(y5.columns),'relu')
#dnn_cross_validation(y6, y6_true, 10, 20,1,'sigmoid')
dnn_cross_validation_fourmodel(X6, y6_true,y6_1,y6_2,y6_3,y6_4, 50, 400,1,'relu')

-----------DNN CROSS VALIDATION BEGINNING-----------
RMSE score: 0.08276164299818455
MAE score: 0.06172820880335714
None
this fold was executed in  392.608286857605 seconds
-------------------FOLD 1-----------------
RMSE score: 0.08320333372133203
MAE score: 0.062421739877188465
None
this fold was executed in  313.23662209510803 seconds
-------------------FOLD 2-----------------
RMSE score: 0.08200542877959022
MAE score: 0.06205025659468721
None
this fold was executed in  314.99518036842346 seconds
-------------------FOLD 3-----------------
RMSE score: 0.08355036744945708
MAE score: 0.06191393785543906
None
this fold was executed in  313.10744738578796 seconds
-------------------FOLD 4-----------------
RMSE score: 0.08836352208138551
MAE score: 0.06739736485704748
None
this fold was executed in  320.3291265964508 seconds
-------------------FOLD 5-----------------
RMSE score: 0.08005235471298426
MAE score: 0.05914774457766656
None
this fold was executed in  313.63411355018616 seconds
--

# Predictions

## Functions

In [19]:
to_drop_test = ['date','wd','forecast_time', 'forecast', "forecast_dist", 'wp']
def make_prediction_dataset(test, to_drop=to_drop_test):
    test_to_predict = test.dropna(subset=['ws','u','v'], how = 'any') # keeps only lines with u,v,ws,wd
    test_to_predict = test_to_predict[test_to_predict['wp'].isna()] # keeps only lines with no wp
    test_to_predict = test_to_predict.sort_values(by=['date', 'forecast_time'], ascending = [True, False]).drop_duplicates(subset='date')
    test_to_predict = test_to_predict.drop(to_drop, axis = 1)
    return test_to_predict

In [22]:
def make_predictions_IMFs(lst_X_trains, lst_y_trains_imfs, lst_tests, lst_models_imfs,epochs):
    i = 1
    lst_prediction = []
    for X, y, test, model in zip(lst_X_trains, lst_y_trains_imfs, lst_tests, lst_models_imfs):
        print(f'--------------Model {i}--------------')
        model.fit(X, y,epochs,verbose=0)
        print('-------Starting predictions of Imfs---------')
        predictions = model.predict(test).reshape(len(test),y.shape[1])
        print('-------Predictions finished, appending to list---------')
        lst_prediction.append(predictions)
        i +=1
    return lst_prediction
    

In [23]:
def make_submission_file(lst_X_trains, lst_y_trains, lst_tests, lst_models, dates,epochs):
    i = 1
    lst_prediction = []
    lst_models_trained = []
    for X, y, test, model in zip(lst_X_trains, lst_y_trains, lst_tests, lst_models):
        print(f'--------------Model {i}--------------')
        model.fit(X, y,epochs,verbose=0)
        print(f'True:\n\tMin:{min(y)}\n\tMax:{max(y)}\n\tMean:{y.mean()}')
        predictions = model.predict(test).reshape(len(test),)
        print(f'Prediction:\n\tMin:{min(predictions)}\n\tMax:{max(predictions)}\n\tMean:{np.mean(predictions)}')
        predictions = [min(y) if i < 0 else i for i in predictions]
        predictions = [max(y) if i > max(y) else i for i in predictions]
        print(f'Prediction corrected:\n\tMin:{min(predictions)}\n\tMax:{max(predictions)}\n\tMean:{np.mean(predictions)}')
        lst_prediction.append(predictions)
        lst_models_trained.append(model)
        i+=1
    
    df_predictions = pd.DataFrame({
        'date': test_dates,
        'wp1': lst_prediction[0],
        'wp2': lst_prediction[1],
        'wp3': lst_prediction[2],
        'wp4': lst_prediction[3],
        'wp5': lst_prediction[4],
        'wp6': lst_prediction[5],        
    })
    return df_predictions, lst_models_trained

In [125]:
def make_submission_file_vmfs(lst_X_trains, lst_y_trains, lst_tests, lst_models, dates,epochs,y_true):
    i = 1
    lst_prediction = []
    lst_models_trained = []
    for X, y, test, model in zip(lst_X_trains, lst_y_trains, lst_tests, lst_models):
        y_train_true = y_true[i-1]
        print('---------------Scaling---------------')
        scaler_X1=MinMaxScaler(feature_range=(y[0].min(),y[0].max()))
        scaler_X2=MinMaxScaler(feature_range=(y[1].min(),y[1].max()))
        scaler_X3=MinMaxScaler(feature_range=(y[2].min(),y[2].max()))
        scaler_X4=MinMaxScaler(feature_range=(y[3].min(),y[3].max()))
        print('---------------Scaling train---------------')
        X1=scaler_X1.fit_transform(X)
        X2=scaler_X2.fit_transform(X)
        X3=scaler_X3.fit_transform(X)
        X4=scaler_X4.fit_transform(X)
        print('---------------Scaling test---------------')
        test_1=scaler_X1.transform(test)
        test_2=scaler_X2.transform(test)
        test_3=scaler_X3.transform(test)
        test_4=scaler_X4.transform(test)      
        
        print(f'--------------Model {i}--------------')
        model[0].fit(X1, y[0],epochs,verbose=0,shuffle=True)
        model[1].fit(X2, y[1],5,verbose=0,shuffle=True)
        model[2].fit(X3, y[2],5,verbose=0,shuffle=True)
        model[3].fit(X4, y[3],5,verbose=0,shuffle=True)
        
        print(f'True:\n\tMin:{min(y_train_true)}\n\tMax:{max(y_train_true)}\n\tMean:{y_train_true.mean()}')
        predictions = (model[0].predict(test_1).reshape(len(test_1),)+
                      model[1].predict(test_2).reshape(len(test_2),)+
                      model[2].predict(test_3).reshape(len(test_3),)+
                      model[3].predict(test_4).reshape(len(test_4),))
        print(f'Prediction:\n\tMin:{min(predictions)}\n\tMax:{max(predictions)}\n\tMean:{np.mean(predictions)}')
        predictions = [min(y_train_true) if i < 0 else i for i in predictions]
        predictions = [max(y_train_true) if i > max(y_train_true) else i for i in predictions]
        print(f'Prediction corrected:\n\tMin:{min(predictions)}\n\tMax:{max(predictions)}\n\tMean:{np.mean(predictions)}')
        lst_prediction.append(predictions)
        lst_models_trained.append(model)
        i+=1
    
    df_predictions = pd.DataFrame({
        'date': test_dates,
        'wp1': lst_prediction[0],
        'wp2': lst_prediction[1],
        'wp3': lst_prediction[2],
        'wp4': lst_prediction[3],
        'wp5': lst_prediction[4],
        'wp6': lst_prediction[5],        
    })
    return df_predictions, lst_models_trained

## Submissions

In [26]:
#scaler_train_imfs_wp1 = StandardScaler()
#scaler_train_imfs_wp2 = StandardScaler()
#scaler_train_imfs_wp3 = StandardScaler()
#scaler_train_imfs_wp4 = StandardScaler()
#scaler_train_imfs_wp5 = StandardScaler()
#scaler_train_imfs_wp6 = StandardScaler()

In [27]:
#y1 = scaler_train_imfs_wp1.fit_transform(y1.values)
#y2 = scaler_train_imfs_wp2.fit_transform(y2.values)
#y3 = scaler_train_imfs_wp3.fit_transform(y3.values)
#y4 = scaler_train_imfs_wp4.fit_transform(y4.values)
#y5 = scaler_train_imfs_wp5.fit_transform(y5.values)
#y6 = scaler_train_imfs_wp6.fit_transform(y6.values)

### Data + Model WP1

In [126]:
model_1_1= build_model(X1,128,1,'relu')
model_1_2= build_model(X1,20,1,'relu')
model_1_3= build_model(X1,20,1,'relu')
model_1_4= build_model(X1,20,1,'relu')
lst_model_wp1=[model_1_1,model_1_2,model_1_3,model_1_4]
y_wp1=[]
for i in range(1,5):
    y_wp1.append(vmfs['IMFwp'+str(i)])


### Data + Model WP2

In [127]:
model_2_1= build_model(X2,128,1,'relu')
model_2_2= build_model(X2,20,1,'relu')
model_2_3= build_model(X2,20,1,'relu')
model_2_4= build_model(X2,20,1,'relu')
lst_model_wp2=[model_2_1,model_2_2,model_2_3,model_2_4]
y_wp2=[]
for i in range(1,5):
    y_wp2.append(vmfs_2['IMFwp'+str(i)])


### Data + Model WP3

In [128]:
model_3_1= build_model(X3,128,1,'relu')
model_3_2= build_model(X3,20,1,'relu')
model_3_3= build_model(X3,20,1,'relu')
model_3_4= build_model(X3,20,1,'relu')
lst_model_wp3=[model_3_1,model_3_2,model_3_3,model_3_4]
y_wp3=[]
for i in range(1,5):
    y_wp3.append(vmfs_3['IMFwp'+str(i)])


### Data + Model WP4

In [131]:
model_4_1= build_model(X4,128,1,'relu')
model_4_2= build_model(X4,20,1,'relu')
model_4_3= build_model(X4,20,1,'relu')
model_4_4= build_model(X4,20,1,'relu')
lst_model_wp4=[model_4_1,model_4_2,model_4_3,model_4_4]
y_wp4=[]
for i in range(1,5):
    y_wp4.append(vmfs_4['IMFwp'+str(i)])


### Data + Model WP5

In [132]:
model_5_1= build_model(X5,128,1,'relu')
model_5_2= build_model(X5,20,1,'relu')
model_5_3= build_model(X5,20,1,'relu')
model_5_4= build_model(X5,20,1,'relu')
lst_model_wp5=[model_5_1,model_5_2,model_5_3,model_5_4]
y_wp5=[]
for i in range(1,5):
    y_wp5.append(vmfs_5['IMFwp'+str(i)])


### Data + Model WP6

In [133]:
model_6_1= build_model(X6,128,1,'relu')
model_6_2= build_model(X6,20,1,'relu')
model_6_3= build_model(X6,20,1,'relu')
model_6_4= build_model(X6,20,1,'relu')
lst_model_wp6=[model_6_1,model_6_2,model_6_3,model_6_4]
y_wp6=[]
for i in range(1,5):
    y_wp6.append(vmfs_6['IMFwp'+str(i)])


In [134]:
lst_models=[lst_model_wp1,lst_model_wp2,lst_model_wp3,lst_model_wp4,lst_model_wp5,lst_model_wp6]
lst_X_trains = [X1, X2, X3, X4, X5, X6]
lst_y_trains = [y_wp1,y_wp2,y_wp3,y_wp4,y_wp5,y_wp6]
lst_y_trains_true = [y1_true, y2_true, y3_true, y4_true, y5_true, y6_true]

In [135]:
#model_1_imfs = build_model(X1,600,y1.shape[1],'relu')
#model_2_imfs = build_model(X2,600,y2.shape[1],'relu')
#model_3_imfs = build_model(X3,600,y3.shape[1],'relu')
#model_4_imfs = build_model(X4,600,y4.shape[1],'relu')
#model_5_imfs = build_model(X5,600,y5.shape[1],'relu')
#model_6_imfs = build_model(X6,600,y6.shape[1],'relu')
#model_1_true = build_model(y1,20,1,'sigmoid')
#model_2_true = build_model(y2,20,1,'sigmoid')
#model_3_true = build_model(y3,20,1,'sigmoid')
#model_4_true = build_model(y4,20,1,'sigmoid')
#model_5_true = build_model(y5,20,1,'sigmoid')
#model_6_true = build_model(y6,20,1,'sigmoid')

#lst_models_imfs = [model_1_imfs, model_2_imfs, model_3_imfs, model_4_imfs, model_5_imfs, model_6_imfs]
#lst_models = [model_1_true, model_2_true, model_3_true, model_4_true, model_5_true, model_6_true]
#lst_X_trains = [X1, X2, X3, X4, X5, X6]
#lst_y_trains_imfs = [y1, y2, y3, y4, y5, y6]
#lst_y_trains_true = [y1_true, y2_true, y3_true, y4_true, y5_true, y6_true]

In [136]:
lst_tests = []
for test in [test_wp1, test_wp2, test_wp3, test_wp4, test_wp5, test_wp6]:
    test = make_prediction_dataset(test)
    lst_tests.append(test)

In [137]:
epochs = 5

In [138]:
#lst_tests_imfs = make_predictions_IMFs(lst_X_trains,lst_y_trains_imfs,lst_tests,lst_models_imfs,epochs)

In [139]:
#df_predictions, lst_models_trained = make_submission_file(lst_y_trains_imfs, lst_y_trains_true, lst_tests_imfs, lst_models, test_dates,epochs)

In [140]:
df_predictions, lst_models_trained = make_submission_file_vmfs(lst_X_trains, lst_y_trains, lst_tests, lst_models, test_dates,epochs,lst_y_trains_true)

---------------Scaling---------------
---------------Scaling train---------------
---------------Scaling test---------------
--------------Model 1--------------
True:
	Min:0.0
	Max:0.96
	Mean:0.2845981952075702
Prediction:
	Min:-0.08959788829088211
	Max:1.033633828163147
	Mean:0.27683302760124207
Prediction corrected:
	Min:0.0
	Max:0.96
	Mean:0.2777666549098757
---------------Scaling---------------
---------------Scaling train---------------
---------------Scaling test---------------
--------------Model 2--------------
True:
	Min:0.0
	Max:0.966
	Mean:0.25890153769841273
Prediction:
	Min:-0.006351999007165432
	Max:0.9864106774330139
	Mean:0.22436754405498505
Prediction corrected:
	Min:0.0
	Max:0.966
	Mean:0.224358071494453
---------------Scaling---------------
---------------Scaling train---------------
---------------Scaling test---------------
--------------Model 3--------------
True:
	Min:0.0
	Max:0.989
	Mean:0.2625247252747253
Prediction:
	Min:-0.06661219149827957
	Max:1.47747480869

In [141]:
df_predictions.to_csv('Predictions/submission_nb_XX_full_4EEMDdnn.csv', index=False, sep=';')

In [142]:
df_predictions.head()

Unnamed: 0,date,wp1,wp2,wp3,wp4,wp5,wp6
0,2011010101,0.534261,0.357932,0.027795,0.456634,0.60927,0.412266
1,2011010102,0.518538,0.39827,0.042506,0.449256,0.60976,0.411037
2,2011010103,0.510604,0.405404,0.081804,0.44853,0.619996,0.411495
3,2011010104,0.520218,0.392948,0.163433,0.458774,0.639812,0.409357
4,2011010105,0.545196,0.370599,0.32133,0.477879,0.664969,0.404788
