In [2]:
import pandas as pd
import seaborn as sns
import openpyxl
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras.layers import Dense, Dropout,LSTM
from keras import regularizers
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from vmdpy import VMD

In [3]:
from sklearn.model_selection import KFold, train_test_split, cross_val_score

from sklearn.metrics import mean_squared_error, mean_absolute_error
#import optuna

In [4]:
from Functions.helper_functions import * 

In [5]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)
pd.options.mode.chained_assignment = None  # default='warn'

In [6]:
np.random.seed(42)

# Data

In [7]:
train_wp1 = pd.read_csv('Data/Preprocessing/WP1_train_preprocessed.csv', sep=',')
train_wp2 = pd.read_csv('Data/Preprocessing/WP2_train_preprocessed.csv', sep=',')
train_wp3 = pd.read_csv('Data/Preprocessing/WP3_train_preprocessed.csv', sep=',')
train_wp4 = pd.read_csv('Data/Preprocessing/WP4_train_preprocessed.csv', sep=',')
train_wp5 = pd.read_csv('Data/Preprocessing/WP5_train_preprocessed.csv', sep=',')
train_wp6 = pd.read_csv('Data/Preprocessing/WP6_train_preprocessed.csv', sep=',')

In [8]:
test_wp1 = pd.read_csv('Data/Preprocessing/WP1_test_preprocessed.csv', sep=',')
test_wp2 = pd.read_csv('Data/Preprocessing/WP2_test_preprocessed.csv', sep=',')
test_wp3 = pd.read_csv('Data/Preprocessing/WP3_test_preprocessed.csv', sep=',')
test_wp4 = pd.read_csv('Data/Preprocessing/WP4_test_preprocessed.csv', sep=',')
test_wp5 = pd.read_csv('Data/Preprocessing/WP5_test_preprocessed.csv', sep=',')
test_wp6 = pd.read_csv('Data/Preprocessing/WP6_test_preprocessed.csv', sep=',')
test_dates = pd.read_csv('Data/Initial/test.csv', sep=',').date.values

In [9]:
to_drop = ['date','wd','forecast_time', 'forecast', "forecast_dist"]

# DNN

In [10]:
#Architecture du modèle:
def build_model(data,n_neurons):
    model = Sequential()
    model.add(Dense(n_neurons,activation='relu',input_shape=(data.shape[1],)))
    model.add(Dropout(0.1))
    model.add(Dense(n_neurons/2,activation='relu',))
    model.add(Dropout(0.1))
    model.add(Dense(n_neurons/4,activation='relu'))
    
    model.add(Dense(1,activation='sigmoid'))

  

    model.compile(loss='mse',
                optimizer='rmsprop',
                metrics=[tf.keras.metrics.RootMeanSquaredError()])
    return model

In [11]:
def dnn_cross_validation(X, y, epochs, n_neurons):
    model = build_model(X,n_neurons)
    print(model.summary())
    model.save_weights('model.h5')
    scaler_X = MinMaxScaler(feature_range=(0,1))
    X = scaler_X.fit_transform(X)

    print('-----------DNN CROSS VALIDATION BEGINNING-----------')
    split = 10
    kf = KFold(n_splits=split, shuffle=True)       
    dnn_rmse_scores = []
    dnn_mae_scores = []
    i = 1
    for (train_index, test_index) in kf.split(pd.DataFrame(X), pd.DataFrame(y)):
        X_train, X_test = pd.DataFrame(X).iloc[train_index], pd.DataFrame(X).iloc[test_index]
        Y_train, Y_test = pd.DataFrame(y).iloc[train_index],pd.DataFrame(y).iloc[test_index]

        model.fit(X_train, Y_train,epochs = epochs, verbose=0)

        prediction = model.predict(X_test)
        dnn_rmse_scores.append(mean_squared_error(Y_test, prediction,squared=False))
        dnn_mae_scores.append(mean_absolute_error(Y_test, prediction))
        model.load_weights('model.h5')
        print(show_evaluation(prediction, Y_test))
        print(f'-------------------FOLD {i}-----------------')
        i+=1

    print('---------------CROSS VALIDATION COMPLETE-------------')
    print('--------------------------RMSE-----------------------')
    display_scores(dnn_rmse_scores)
    print('--------------------------MAE------------------------')
    display_scores(dnn_mae_scores)

### WP1

In [11]:
wp1_X = train_wp1[[c for c in train_wp1 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X1 = wp1_X.drop('wp', axis=1)
y1 = wp1_X['wp']

In [12]:
#model = build_model(X1,10)
#model.fit(X1,y1)
#print(type((model.predict(X1).reshape(len(X1),))[0]))

In [15]:
dnn_cross_validation(X1, y1, 50, 10)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 10)                2880      
_________________________________________________________________
dropout_2 (Dropout)          (None, 10)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 5)                 55        
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 6         
Total params: 2,941
Trainable params: 2,941
Non-trainable params: 0
_________________________________________________________________
None
-----------DNN CROSS VALIDATION BEGINNING-----------
RMSE score: 0.13975077515672946
MAE score: 0.09968840078769046
None
-------------------FOLD 1-----------------
RMSE score: 0.14892016367015873
MAE score: 0.1049557400327382
None
--------------

KeyboardInterrupt: 

Results without reset neurons weight between fold

| |  | Mean | Std | Sum up |
| --- | --- | --- | --- | --- |
| 3 layers (n_neurons:128) - No reg - Epochs:5 | RMSE | 0.11387696817321147 |0.019360597643740048 |  |
| 3 layers (n_neurons:574) - No reg - Epochs:5| RMSE | 0.10581869444135347 | 0.016112412605112078
 | |
| 3 layers (n_neurons:574) - No reg - Epochs: 10| RMSE | 0.09500339274786188 | 0.014699457371691507 | |
| 3 layers (n_neurons:574) - No reg - Epochs: 20 | RMSE | 0.08470525575616475 | 0.01555594791796782 | |
| 3 layers (n_neurons:574) - No reg+metrics RMSE - Epochs: 20 | RMSE | 0.08584321871552428 | 0.014062441309249172 | |
| --- | --- | --- | --- | --- |
| 3 layers (n_neurons: 128) - No reg - Epochs:5 | MAE | 0.07759830346201885 | 0.014005305039506337 |  |
| 3 layers (n_neurons: 574) - No reg - Epochs:5| MAE | 0.07221003819608135 | 0.012180947255665464 |  |
| 3 layers (n_neurons: 574) - No reg - Epochs: 10| MAE | 0.06377131083192987 | 0.010754593723353358 |  |
| 3 layers (n_neurons: 574) - No reg - Epochs: 20 | MAE | 0.05672700625500034 | 0.011635978124601475 | |
| 3 layers (n_neurons: 574) - No reg+metrics RMSE - Epochs: 20 | MAE | 0.05729012621535744 | 0.009964560515202656 | |

With reset weights between folds

| |  | Mean | Std | Sum up |
| --- | --- | --- | --- | --- |
| 3 layers (n_neurons:574) - No reg - Epochs:50| RMSE | 0.09719135743992048 | 0.006571211892458601
 | |
| 3 layers (n_neurons:574) - No reg - Epochs:100| RMSE | 0.091|xx
 |Ca a commencé a sur entrainer, j'ai deux fold à 0.4! |
| --- | --- | --- | --- | --- |
| 3 layers (n_neurons:574) - No reg - Epochs:50| RMSE | 0.06359706250620521 | 0.004258178831029554
 | 

### WP2

In [20]:
wp2_X = train_wp2[[c for c in train_wp2 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X2 = wp2_X.drop('wp', axis=1)
y2 = wp2_X['wp']

In [17]:
dnn_cross_validation(X2, y2, 20, 574)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 574)               165312    
_________________________________________________________________
dense_5 (Dense)              (None, 287)               165025    
_________________________________________________________________
dense_6 (Dense)              (None, 143)               41184     
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 144       
Total params: 371,665
Trainable params: 371,665
Non-trainable params: 0
_________________________________________________________________
None
-----------DNN CROSS VALIDATION BEGINNING-----------
RMSE score: 0.14613982743404302
MAE score: 0.10543152598450538
None
-------------------FOLD 1-----------------
RMSE score: 0.11729049097874228
MAE score: 0.07952987583222648
None
---------

### WP3

In [21]:
wp3_X = train_wp3[[c for c in train_wp3 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X3 = wp3_X.drop('wp', axis = 1)
y3 = wp3_X['wp']

In [None]:
dnn_cross_validation(X3, y3, 20, 574)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 574)               165312    
_________________________________________________________________
dense_9 (Dense)              (None, 287)               165025    
_________________________________________________________________
dense_10 (Dense)             (None, 143)               41184     
_________________________________________________________________
dense_11 (Dense)             (None, 1)                 144       
Total params: 371,665
Trainable params: 371,665
Non-trainable params: 0
_________________________________________________________________
None
-----------DNN CROSS VALIDATION BEGINNING-----------
RMSE score: 0.09520211679778731
MAE score: 0.06718830374526838
None
-------------------FOLD 1-----------------
RMSE score: 0.1210098617629066
MAE score: 0.08153099828326776
None
----------

### WP4

In [22]:
wp4_X = train_wp4[[c for c in train_wp4 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X4 = wp4_X.drop('wp', axis = 1)
y4 = wp4_X['wp']

In [None]:
dnn_cross_validation(X4, y4, 20, 574)

### WP5

In [23]:
wp5_X = train_wp5[[c for c in train_wp5 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X5 = wp5_X.drop('wp', axis = 1)
y5 = wp5_X['wp']

In [None]:
dnn_cross_validation(X5, y5, 20, 574)

### WP6

In [24]:
wp6_X = train_wp6[[c for c in train_wp6 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X6 = wp6_X.drop('wp', axis = 1)
y6 = wp6_X['wp']

In [25]:
dnn_cross_validation(X6, y6, 20, 574)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 574)               165312    
_________________________________________________________________
dropout_3 (Dropout)          (None, 574)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 287)               165025    
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 288       
Total params: 330,625
Trainable params: 330,625
Non-trainable params: 0
_________________________________________________________________
None
-----------DNN CROSS VALIDATION BEGINNING-----------


KeyboardInterrupt: 

# Predictions

## Functions

In [31]:
to_drop_test = ['date','wd','forecast_time', 'forecast', "forecast_dist", 'wp']
def make_prediction_dataset(test, to_drop=to_drop_test):
    test_to_predict = test.dropna(subset=['ws','u','v'], how = 'any') # keeps only lines with u,v,ws,wd
    test_to_predict = test_to_predict[test_to_predict['wp'].isna()] # keeps only lines with no wp
    test_to_predict = test_to_predict.sort_values(by=['date', 'forecast_time'], ascending = [True, False]).drop_duplicates(subset='date')
    test_to_predict = test_to_predict.drop(to_drop, axis = 1)
    scaler_X = MinMaxScaler(feature_range=(0,1))
    test_to_predict = scaler_X.fit_transform(test_to_predict)
    return test_to_predict

In [32]:
def make_submission_file(lst_X_trains, lst_y_trains, lst_tests, lst_models, dates,epochs):
    i = 1
    lst_prediction = []
    lst_models_trained = []
    for X, y, test, model in zip(lst_X_trains, lst_y_trains, lst_tests, lst_models):
        print(f'--------------Model {i}--------------')
        model.fit(X, y,epochs,verbose=0)
        print(f'True:\n\tMin:{min(y)}\n\tMax:{max(y)}\n\tMean:{y.mean()}')
        predictions = model.predict(test).reshape(len(test),)
        print(f'Prediction:\n\tMin:{min(predictions)}\n\tMax:{max(predictions)}\n\tMean:{np.mean(predictions)}')
        predictions = [min(y) if i < 0 else i for i in predictions]
        predictions = [max(y) if i > max(y) else i for i in predictions]
        print(f'Prediction corrected:\n\tMin:{min(predictions)}\n\tMax:{max(predictions)}\n\tMean:{np.mean(predictions)}')
        lst_prediction.append(predictions)
        lst_models_trained.append(model)
        i+=1
    
    df_predictions = pd.DataFrame({
        'date': test_dates,
        'wp1': lst_prediction[0],
        'wp2': lst_prediction[1],
        'wp3': lst_prediction[2],
        'wp4': lst_prediction[3],
        'wp5': lst_prediction[4],
        'wp6': lst_prediction[5],        
    })
    return df_predictions, lst_models_trained

## Submission

In [28]:
scaler_x1=MinMaxScaler(feature_range=(0,1))
scaler_x2=MinMaxScaler(feature_range=(0,1))
scaler_x3=MinMaxScaler(feature_range=(0,1))
scaler_x4=MinMaxScaler(feature_range=(0,1))
scaler_x5=MinMaxScaler(feature_range=(0,1))
scaler_x6=MinMaxScaler(feature_range=(0,1))

In [29]:
X1_scaled = scaler_x1.fit_transform(X1)
X2_scaled = scaler_x2.fit_transform(X2)
X3_scaled = scaler_x3.fit_transform(X3)
X4_scaled = scaler_x4.fit_transform(X4)
X5_scaled = scaler_x5.fit_transform(X5)
X6_scaled = scaler_x6.fit_transform(X6)

In [39]:
model_1 = build_model(X1,574)
model_2 = build_model(X2,574)
model_3 = build_model(X3,574)
model_4 = build_model(X4,574)
model_5 = build_model(X5,574)
model_6 = build_model(X6,574)

lst_models = [model_1, model_2, model_3, model_4, model_5, model_6]
lst_X_trains = [X1_scaled, X2_scaled, X3_scaled, X4_scaled, X5_scaled, X6_scaled]
lst_y_trains = [y1, y2, y3, y4, y5, y6]

In [33]:
lst_tests = []
for test in [test_wp1, test_wp2, test_wp3, test_wp4, test_wp5, test_wp6]:
    test = make_prediction_dataset(test)
    lst_tests.append(test)

In [44]:
epochs = 300

In [45]:
df_predictions, lst_models_trained = make_submission_file(lst_X_trains, lst_y_trains, lst_tests, lst_models, test_dates,epochs)

--------------Model 1--------------
True:
	Min:0.0
	Max:0.96
	Mean:0.2845981952075702
Prediction:
	Min:0.001212984323501587
	Max:0.9948259592056274
	Mean:0.3020660877227783
Prediction corrected:
	Min:0.001212984323501587
	Max:0.96
	Mean:0.3019307010784264
--------------Model 2--------------
True:
	Min:0.0
	Max:0.966
	Mean:0.25890153769841273
Prediction:
	Min:0.0015547573566436768
	Max:0.9997174739837646
	Mean:0.3030332028865814
Prediction corrected:
	Min:0.0015547573566436768
	Max:0.966
	Mean:0.3025516349773574
--------------Model 3--------------
True:
	Min:0.0
	Max:0.989
	Mean:0.2625247252747253
Prediction:
	Min:0.00033482909202575684
	Max:0.9846994876861572
	Mean:0.23501531779766083
Prediction corrected:
	Min:0.00033482909202575684
	Max:0.9846994876861572
	Mean:0.23501531779766083
--------------Model 4--------------
True:
	Min:0.0
	Max:0.992
	Mean:0.2763637820512821
Prediction:
	Min:0.0023966431617736816
	Max:0.9926393032073975
	Mean:0.40357404947280884
Prediction corrected:
	Min:0.0

In [46]:
df_predictions.to_csv('Predictions/submission_nb_32_full_dnn.csv', index=False, sep=';')

In [43]:
df_predictions.head()

Unnamed: 0,date,wp1,wp2,wp3,wp4,wp5,wp6
0,2011010101,0.28352,0.340265,0.011366,0.74842,0.220975,0.288126
1,2011010102,0.279755,0.368738,0.01996,0.7401,0.212943,0.290689
2,2011010103,0.27829,0.376938,0.04131,0.73447,0.207613,0.295307
3,2011010104,0.283313,0.367728,0.103884,0.739035,0.209091,0.300454
4,2011010105,0.298197,0.346064,0.196782,0.751766,0.215435,0.299504


In [60]:
type(df_predictions["wp1"][0])

numpy.float64

In [30]:
## Saving models

In [31]:
pkl_model = "Models/DNN/DNN-wp1-3layers-574neurons.pkl"
with open(pkl_model, 'wb') as file:
    pickle.dump(lst_models_trained[0], file)
    
    
pkl_model = "Models/DNN/DNN-wp2-3layers-574neurons.pkl"
with open(pkl_model, 'wb') as file:
    pickle.dump(lst_models_trained[1], file)
    

pkl_model = "Models/DNN/DNN-wp3-3layers-574neurons.pkl"
with open(pkl_model, 'wb') as file:
    pickle.dump(lst_models_trained[2], file)


pkl_model = "Models/DNN/DNN-wp4-3layers-574neurons.pkl"
with open(pkl_model, 'wb') as file:
    pickle.dump(lst_models_trained[3], file)


pkl_model = "Models/DNN/DNN-wp5-3layers-574neurons.pkl"
with open(pkl_model, 'wb') as file:
    pickle.dump(lst_models_trained[4], file)


pkl_model = "Models/DNN/DNN-wp6-3layers-574neurons.pkl"
with open(pkl_model, 'wb') as file:
    pickle.dump(lst_models_trained[5], file)

TypeError: cannot pickle 'weakref' object