# LGBM Model development - CV one model per fold - optim NelderMead

In [3]:
import pandas as pd
import seaborn as sns
import openpyxl
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import time

In [4]:
from sklearn.model_selection import KFold, train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error
import optuna
from vmdpy import VMD

In [5]:
from scipy.optimize import minimize

In [6]:
from Functions.helper_functions import * 

In [7]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)
pd.options.mode.chained_assignment = None  # default='warn'

In [8]:
np.random.seed(42)

In [9]:
from Functions.preprocessing import *

# Data

In [10]:
train_wp1 = pd.read_csv('Data/Preprocessing/WP1_train_preprocessed.csv', sep=',')
train_wp2 = pd.read_csv('Data/Preprocessing/WP2_train_preprocessed.csv', sep=',')
train_wp3 = pd.read_csv('Data/Preprocessing/WP3_train_preprocessed.csv', sep=',')
train_wp4 = pd.read_csv('Data/Preprocessing/WP4_train_preprocessed.csv', sep=',')
train_wp5 = pd.read_csv('Data/Preprocessing/WP5_train_preprocessed.csv', sep=',')
train_wp6 = pd.read_csv('Data/Preprocessing/WP6_train_preprocessed.csv', sep=',')

In [11]:
test_wp1 = pd.read_csv('Data/Preprocessing/WP1_test_preprocessed.csv', sep=',')
test_wp2 = pd.read_csv('Data/Preprocessing/WP2_test_preprocessed.csv', sep=',')
test_wp3 = pd.read_csv('Data/Preprocessing/WP3_test_preprocessed.csv', sep=',')
test_wp4 = pd.read_csv('Data/Preprocessing/WP4_test_preprocessed.csv', sep=',')
test_wp5 = pd.read_csv('Data/Preprocessing/WP5_test_preprocessed.csv', sep=',')
test_wp6 = pd.read_csv('Data/Preprocessing/WP6_test_preprocessed.csv', sep=',')
test_dates = pd.read_csv('Data/Initial/test.csv', sep=',').date.values

In [12]:
train_wp1_vmf = pd.read_csv('Data/Preprocessing/WP1_train_Signal.csv', sep=',')
train_wp2_vmf = pd.read_csv('Data/Preprocessing/WP2_train_Signal.csv', sep=',')
train_wp3_vmf = pd.read_csv('Data/Preprocessing/WP3_train_Signal.csv', sep=',')
train_wp4_vmf = pd.read_csv('Data/Preprocessing/WP4_train_Signal.csv', sep=',')
train_wp5_vmf = pd.read_csv('Data/Preprocessing/WP5_train_Signal.csv', sep=',')
train_wp6_vmf = pd.read_csv('Data/Preprocessing/WP6_train_Signal.csv', sep=',')

In [13]:
to_drop = ['date','wd','forecast_time', 'forecast', "forecast_dist"]

In [14]:
u_to_drop = [
    'u_T_1', 'u_T_2', 'u_T_3', 'u_T_4', 'u_T_5', 'u_T_6', 
    'u_T_2_mean', 'u_T_3_mean', 'u_T_4_mean', 'u_T_5_mean', 'u_T_6_mean', 'u_T_7_mean',
    'u_T_8_mean', 'u_T_9_mean', 'u_T_10_mean', 'u_T_11_mean', 'u_T_12_mean','u_T_24_mean',
    'u_T_2_std', 'u_T_4_std', 'u_T_5_std', 'u_T_6_std',
    'u_T_2_median', 'u_T_3_median', 'u_T_4_median', 'u_T_5_median', 'u_T_6_median', 'u_T_12_median','u_T_24_median', 'u_T_36_median',
    'u_T_2_max', 'u_T_3_max', 'u_T_4_max', 'u_T_5_max', 'u_T_6_max', 'u_T_12_max',
    'u_T_2_min', 'u_T_3_min', 'u_T_4_min', 'u_T_5_min', 'u_T_6_min', 'u_T_12_min',
    'u2_T_1', 'u2_T_2', 'u2_T_3', 'u2_T_4', 'u2_T_5', 'u2_T_6', 
    'u2_T_2_mean', 'u2_T_3_mean', 'u2_T_4_mean', 'u2_T_5_mean', 'u2_T_6_mean', 'u2_T_7_mean',
    'u2_T_8_mean', 'u2_T_9_mean', 'u2_T_10_mean', 'u2_T_11_mean', 'u2_T_12_mean','u2_T_24_mean',
    'u2_T_2_std', 'u2_T_4_std', 'u2_T_5_std', 'u2_T_6_std', 'u2_T_24_std',
    'u2_T_2_median', 'u2_T_3_median', 'u2_T_4_median', 'u2_T_5_median', 'u2_T_6_median', 'u2_T_12_median',
    'u2_T_2_max','u2_T_3_max', 'u2_T_4_max','u2_T_5_max', 'u2_T_6_max', 'u2_T_12_max',
    'u2_T_2_min', 'u2_T_3_min', 'u2_T_4_min', 'u2_T_5_min', 'u2_T_6_min',
    'u2_T_12', 'u2_T_36_mean', 'u2_T_36_std', 'u2_T_24_median', 'u2_T_24_max',
    'u_T_36_mean','u_T_12','u_T_24_max','u2_T_36_median','u_T_24_min'
]
ws_to_drop = [
    'ws_T_1', 'ws_T_2', 'ws_T_3', 'ws_T_4', 'ws_T_5', 'ws_T_6', 'ws_T_7', 'ws_T_8', 'ws_T_10','ws_T_11', 'ws_T_12',
    'ws_T_2_mean', 'ws_T_3_mean', 'ws_T_4_mean', 'ws_T_5_mean', 'ws_T_6_mean', 'ws_T_7_mean', 'ws_T_8_mean', 'ws_T_9_mean', 
    'ws_T_10_mean', 'ws_T_11_mean', 'ws_T_12_mean', 'ws_T_24_mean', 
    'ws_T_2_std', 'ws_T_3_std', 'ws_T_4_std', 'ws_T_5_std', 
    'ws_T_2_median', 'ws_T_3_median', 'ws_T_4_median', 'ws_T_5_median', 'ws_T_6_median',
    'ws_T_12_median', 'ws_T_24_median', 'ws_T_36_median',
    'ws_T_2_max', 'ws_T_3_max', 'ws_T_4_max', 'ws_T_5_max','ws_T_6_max', 'ws_T_12_max',
     'ws_T_2_min', 'ws_T_3_min', 'ws_T_4_min', 'ws_T_5_min', 'ws_T_6_min', 'ws_T_12_min','ws_T_24_max','ws_T_24_min'
]

v_to_drop = [
    'v_T_1', 'v_T_2', 'v_T_3', 'v_T_4', 'v_T_5', 'v_T_6', 
    'v_T_2_mean', 'v_T_3_mean', 'v_T_4_mean', 'v_T_5_mean', 'v_T_6_mean', 'v_T_7_mean',
    'v_T_8_mean', 'v_T_9_mean', 'v_T_10_mean', 'v_T_11_mean', 'v_T_12_mean', 'v_T_24_mean','v_T_36_mean',
    'v_T_3_std', 'v_T_4_std', 'v_T_5_std','v_T_6_std','v_T_24_std', 'v_T_36_median',
    'v_T_2_median', 'v_T_3_median', 'v_T_4_median', 'v_T_5_median', 'v_T_6_median', 
    'v_T_2_max', 'v_T_3_max', 'v_T_4_max', 'v_T_5_max', 'v_T_6_max', 'v_T_12_max', 
    'v_T_2_min', 'v_T_3_min', 'v_T_4_min', 'v_T_5_min', 'v_T_6_min', 'v_T_12_min', 
    'v_T_36_min', 'v_T_36', 'v_T_24_max',  'v_T_12_median', 'v_T_24_median',
]

wd_to_drop = [
    'coswd_1', 'coswd_2', 'coswd_3', 'coswd_4', 'coswd_5', 'coswd_6',
    'coswd_2_mean', 'coswd_3_mean', 'coswd_4_mean', 'coswd_5_mean', 'coswd_6_mean', 'coswd_7_mean', 
    'coswd_8_mean', 'coswd_9_mean', 'coswd_10_mean', 'coswd_11_mean', 'coswd_12_mean', 'coswd_24_mean', 
    'coswd_3_std', 'coswd_4_std','coswd_5_std','coswd_2_median', 'coswd_3_median','coswd_4_median', 
    'coswd_5_median', 'coswd_6_median', 'coswd_36_median', 'coswd_24_median', 'coswd_12_median',
    'coswd_2_max', 'coswd_3_max', 'coswd_4_max', 'coswd_5_max', 'coswd_6_max', 'coswd_12_max', 'coswd_24_max',
    'coswd_2_min', 'coswd_3_min', 'coswd_4_min', 'coswd_5_min', 'coswd_6_min', 'coswd_12_min', 'coswd_24_min',
    'ws_T_36_max', 'ws_T_36_min', 'coswd_12', 'coswd_24'
]

other_to_drop = [
    'cos_day', 'u', 'v'
]

feature_corr = u_to_drop+ws_to_drop+v_to_drop+wd_to_drop+other_to_drop
#to_drop = to_drop+feature_corr

# LGBM functions

In [15]:
from lightgbm import LGBMRegressor

In [16]:
def create_dataset(data,n,split):
    n_batch=int(len(data)/84)
    new_data=np.array_split(data,n_batch)
    train = pd.DataFrame()
    val=pd.DataFrame()
    for i in range(n_batch):
        if (i+n)%split ==0:
            val = pd.concat([val,new_data[i]])
        else:
            train=pd.concat([train,new_data[i]])
    return train,val

In [17]:
def create_lst_dataset(x,y,cv):
    lst_X=[]
    lst_Y=[]
    split=cv
    for n in range(cv):
        print('-----Creating {0} Xs-----'.format(n+1))
        X_train,X_test=create_dataset(data=x,n=n,split=split)
        lst_X.append(X_train)
        print('-----Creating {0} Ys-----'.format(n+1))
        Y_train,Y_test=create_dataset(data=y,n=n,split=split)
        lst_Y.append(Y_train)
        try:
            len(X_train)==len(Y_train)
        except:
            print('/!\ lengh non identicale')
    return lst_X, lst_Y

In [18]:
def create_lst_lst_dataset(x,y,cv):
    lst_dataset_Y=[]
    lst_dataset_X=[]
    i=0
    for x,y in zip(x,y):
        start_time = time.time()
        

        print('----Start Creating {0} dataset list----'.format(i+1))
        X_train,Y_train=create_lst_dataset(x=x,y=y,cv=cv)
        print('--------Appending-----')
        lst_dataset_X.append(X_train)
        lst_dataset_Y.append(Y_train)
        print('-----completed round {0}'.format(i+1))
        i+=1
        print("--- %s seconds ---" % (time.time() - start_time))
    return lst_dataset_X,lst_dataset_Y

In [19]:
def create_lst_model(cv,n_estimators):
    model_1=[]
    model_2=[]
    model_3=[]
    model_4=[]
    model_5=[]
    model_6=[]
    for i in range(cv):
        model=Pipeline([('scaler', MaxAbsScaler()),('ridge', LGBMRegressor(num_iterations=n_estimators,n_jobs=-1))])
        model_1+=[model]
        model_2+=[model]
        model_3+=[model]
        model_4+=[model]
        model_5+=[model]
        model_6+=[model]
    return [model_1,model_2,model_3,model_4,model_5,model_6]

# DATA

In [20]:
wp1_X = train_wp1[[c for c in train_wp1 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X1 = wp1_X.drop('wp', axis=1)
y1 = wp1_X['wp']

In [21]:
wp2_X = train_wp2[[c for c in train_wp2 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X2 = wp2_X.drop('wp', axis=1)
y2 = wp2_X['wp']

In [22]:
wp3_X = train_wp3[[c for c in train_wp3 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X3 = wp3_X.drop('wp', axis = 1)
y3 = wp3_X['wp']


In [23]:
wp4_X = train_wp4[[c for c in train_wp4 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X4 = wp4_X.drop('wp', axis = 1)
y4 = wp4_X['wp']

In [24]:
wp5_X = train_wp5[[c for c in train_wp5 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X5 = wp5_X.drop('wp', axis = 1)
y5 = wp5_X['wp']

In [25]:
wp6_X = train_wp6[[c for c in train_wp6 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X6 = wp6_X.drop('wp', axis = 1)
y6 = wp6_X['wp']

# Creating the 8 dataset per WP

In [26]:
lst_X1_train, lst_y1_train,lst_X1_test,lst_y1_test =  splitting_train_test_forecast(df_wp=train_wp1)
lst_y1_vmf_train,lst_y1_vmf_test = splitting_train_test_vmd(df_wp=train_wp1_vmf)

In [27]:
for i in range(len(lst_X1_train)):
    lst_X1_train[i] = lst_X1_train[i].drop(to_drop,axis=1)
    lst_X1_test[i] = lst_X1_test[i].drop(to_drop,axis=1)

In [28]:
lst_X2_train, lst_y2_train,lst_X2_test,lst_y2_test =  splitting_train_test_forecast(df_wp=train_wp2)
lst_y2_vmf_train,lst_y2_vmf_test = splitting_train_test_vmd(df_wp=train_wp2_vmf)

In [29]:
for i in range(len(lst_X1_train)):
    lst_X2_train[i] = lst_X2_train[i].drop(to_drop,axis=1)
    lst_X2_test[i] = lst_X2_test[i].drop(to_drop,axis=1)

In [30]:
lst_X3_train, lst_y3_train,lst_X3_test,lst_y3_test =  splitting_train_test_forecast(df_wp=train_wp3)
lst_y3_vmf_train,lst_y3_vmf_test = splitting_train_test_vmd(df_wp=train_wp3_vmf)

In [31]:
for i in range(len(lst_X3_train)):
    lst_X3_train[i] = lst_X3_train[i].drop(to_drop,axis=1)
    lst_X3_test[i] = lst_X3_test[i].drop(to_drop,axis=1)

In [32]:
lst_X4_train, lst_y4_train,lst_X4_test,lst_y4_test =  splitting_train_test_forecast(df_wp=train_wp4)
lst_y4_vmf_train,lst_y4_vmf_test = splitting_train_test_vmd(df_wp=train_wp4_vmf)

In [33]:
for i in range(len(lst_X4_train)):
    lst_X4_train[i] = lst_X4_train[i].drop(to_drop,axis=1)
    lst_X4_test[i] = lst_X4_test[i].drop(to_drop,axis=1)

In [34]:
lst_X5_train, lst_y5_train,lst_X5_test,lst_y5_test =  splitting_train_test_forecast(df_wp=train_wp5)
lst_y5_vmf_train,lst_y5_vmf_test = splitting_train_test_vmd(df_wp=train_wp5_vmf)

In [35]:
for i in range(len(lst_X5_train)):
    lst_X5_train[i] = lst_X5_train[i].drop(to_drop,axis=1)
    lst_X5_test[i] = lst_X5_test[i].drop(to_drop,axis=1)

In [36]:
lst_X6_train, lst_y6_train,lst_X6_test,lst_y6_test =  splitting_train_test_forecast(df_wp=train_wp6)
lst_y6_vmf_train,lst_y6_vmf_test = splitting_train_test_vmd(df_wp=train_wp6_vmf)

In [37]:
for i in range(len(lst_X6_train)):
    lst_X6_train[i] = lst_X6_train[i].drop(to_drop,axis=1)
    lst_X6_test[i] = lst_X6_test[i].drop(to_drop,axis=1)

In [51]:
def create_lst_model(cv,n_estimators):
    model_1=[]
    model_2=[]
    model_3=[]
    model_4=[]
    model_5=[]
    model_6=[]
    model_1_vmf=[]
    model_2_vmf=[]
    model_3_vmf=[]
    model_4_vmf=[]
    model_5_vmf=[]
    model_6_vmf=[]
    
    for i in range(cv):
        model=Pipeline([('scaler', RobustScaler()),('ridge', LGBMRegressor(num_iterations=n_estimators,n_jobs=-1))])
        model_1+=[model]
        model_2+=[model]
        model_3+=[model]
        model_4+=[model]
        model_5+=[model]
        model_6+=[model]
    for i in range(4):
        model_1_vmf.append(model_1)
        model_2_vmf.append(model_2)
        model_3_vmf.append(model_3)
        model_4_vmf.append(model_4)
        model_5_vmf.append(model_5)
        model_6_vmf.append(model_6)
    return [model_1_vmf,model_2_vmf,model_3_vmf,model_4_vmf,model_5_vmf,model_6_vmf]

In [39]:
lst_X = [X1, X2, X3, X4, X5, X6]
lst_Y = [y1, y2, y3, y4, y5, y6]

In [52]:
lst_model = create_lst_model(cv=8,n_estimators=50)

In [59]:
lst_X_trains_split = [lst_X1_train,lst_X2_train,lst_X3_train,lst_X4_train,lst_X5_train,lst_X6_train]
lst_Y_trains_split = [lst_y1_train,lst_y2_train,lst_y3_train,lst_y4_train,lst_y5_train,lst_y6_train]
lst_Y_trains_vmf = [lst_y1_vmf_train,lst_y2_vmf_train,lst_y3_vmf_train,lst_y4_vmf_train,lst_y5_vmf_train,lst_y6_vmf_train]

# WIP

In [63]:
lst_predictions_all=[]
t=1
for model,x_all,x,y in zip(lst_model,lst_X,lst_X_trains_split,lst_Y_trains_vmf):
    lst_predictions_wp=[]
    for n  in range(4):
        lst_predictions=[]
        #print(t)
        for i in range(8):
            print('wpn°',t,'vmf n°',n,'set n°',i)
            clf=model[n][i]
            clf.fit(x[i],y[n][i])
            lst_predictions.append(clf.predict(x_all))
        lst_predictions_wp.append(lst_predictions)
    lst_predictions_all.append(lst_predictions_wp)
    t+=1

wpn° 1 vmf n° 0 set n° 0
wpn° 1 vmf n° 0 set n° 1
wpn° 1 vmf n° 0 set n° 2
wpn° 1 vmf n° 0 set n° 3
wpn° 1 vmf n° 0 set n° 4
wpn° 1 vmf n° 0 set n° 5
wpn° 1 vmf n° 0 set n° 6
wpn° 1 vmf n° 0 set n° 7
wpn° 1 vmf n° 1 set n° 0
wpn° 1 vmf n° 1 set n° 1
wpn° 1 vmf n° 1 set n° 2
wpn° 1 vmf n° 1 set n° 3
wpn° 1 vmf n° 1 set n° 4
wpn° 1 vmf n° 1 set n° 5
wpn° 1 vmf n° 1 set n° 6
wpn° 1 vmf n° 1 set n° 7
wpn° 1 vmf n° 2 set n° 0
wpn° 1 vmf n° 2 set n° 1
wpn° 1 vmf n° 2 set n° 2
wpn° 1 vmf n° 2 set n° 3
wpn° 1 vmf n° 2 set n° 4
wpn° 1 vmf n° 2 set n° 5
wpn° 1 vmf n° 2 set n° 6
wpn° 1 vmf n° 2 set n° 7
wpn° 1 vmf n° 3 set n° 0
wpn° 1 vmf n° 3 set n° 1
wpn° 1 vmf n° 3 set n° 2
wpn° 1 vmf n° 3 set n° 3
wpn° 1 vmf n° 3 set n° 4
wpn° 1 vmf n° 3 set n° 5
wpn° 1 vmf n° 3 set n° 6
wpn° 1 vmf n° 3 set n° 7
wpn° 2 vmf n° 0 set n° 0
wpn° 2 vmf n° 0 set n° 1
wpn° 2 vmf n° 0 set n° 2
wpn° 2 vmf n° 0 set n° 3
wpn° 2 vmf n° 0 set n° 4
wpn° 2 vmf n° 0 set n° 5
wpn° 2 vmf n° 0 set n° 6
wpn° 2 vmf n° 0 set n° 7


In [None]:
print(np.shape(lst_predictions_all))

(6, 4, 8, 52416)


In [79]:
lst_Y_vmf=[train_wp1_vmf,train_wp2_vmf,train_wp3_vmf,train_wp4_vmf,train_wp5_vmf,train_wp6_vmf]

In [86]:
weights_all=[]


for pred,y in zip(lst_predictions_all,lst_Y_vmf):
    weights_wp=[]
    for n in y:
        t=0
        weights=[]
        for i in range(8):
            weights.append(0)
        def mae_func(weights):
            #final_prediction=0
            for i in range(len(weights)):
                if i==0:
                    vmf_prediction = weights[i]*pred[t][i]
                else:
                    vmf_prediction += weights[i]*pred[t][i]
            return mean_absolute_error(y[n], vmf_prediction)
        res = minimize(mae_func, weights, method='Nelder-Mead')
        weights_wp.append(res['x'])
        t+=1
    weights_all.append(weights_wp)
        
    

In [None]:
np.shape(weights_all)

In [121]:
weights_all[6]

[array([ 0.16152797,  0.15239149,  0.27934466, -0.04361564,  0.14972733,
         0.28971335, -0.0020108 ,  0.03745717]),
 array([-0.00965762,  0.00750477,  0.00623397,  0.00347515,  0.00301775,
         0.01089148, -0.01031694, -0.00373615]),
 array([-0.05969936,  0.03095898,  0.00498088,  0.00423984,  0.02918677,
         0.01444835, -0.02558098,  0.01725592]),
 array([-0.00302778,  0.00416696,  0.00018796, -0.00036801,  0.00105584,
        -0.00420941,  0.00040992,  0.00125723])]

In [88]:
to_drop_test = ['date','wd','forecast_time', 'forecast', "forecast_dist", 'wp']#+feature_corr
def make_prediction_dataset(test, to_drop=to_drop_test):
    test_to_predict = test.dropna(subset=['ws','u','v'], how = 'any') # keeps only lines with u,v,ws,wd
    test_to_predict = test_to_predict[test_to_predict['wp'].isna()] # keeps only lines with no wp
    test_to_predict = test_to_predict.sort_values(by=['date', 'forecast_time'], ascending = [True, False]).drop_duplicates(subset='date')
    test_to_predict = test_to_predict.drop(to_drop, axis = 1)
    return test_to_predict

In [89]:
lst_tests = []
for test in [test_wp1, test_wp2, test_wp3, test_wp4, test_wp5, test_wp6]:
    test = make_prediction_dataset(test)
    lst_tests.append(test)

In [98]:
test_lst = [[1,2],[1,2],[1,2],[1,2]]
np.sum(lst_Y_vmf[0],axis=1), np.sum(test_lst,axis=1)

(0       -0.000561
 1        0.000560
 2       -0.000524
 3        0.000538
 4        0.024409
            ...   
 52411    0.884414
 52412    0.883417
 52413    0.859542
 52414    0.832535
 52415    0.833492
 Length: 52416, dtype: float64,
 array([3, 3, 3, 3]))

In [103]:
lst_final_prediction=[]
for weights,model,test,x_train,y_train in zip(weights_all,lst_model,lst_tests,lst_X_trains_split,lst_Y_trains_vmf):
    lst_wp_prediction=[]
    for t in range(4):
        #lst_vmf_prediction=[]
        for i in range(8):
            clf=model[t][i]
            clf.fit(x_train[i],y_train[t][i])
            if i==0:
                y_pred=clf.predict(test)*weights[t][i]
            else:
                y_pred+=clf.predict(test)*weights[t][i]
        lst_wp_prediction.append(y_pred)
    lst_final_prediction.append(np.sum(lst_wp_prediction,axis=0))
    
        

In [104]:
np.shape(lst_final_prediction)

(6, 7440)

In [106]:
len(lst_tests[0])

7440

In [105]:
lst_final_prediction[0]

array([0.64193229, 0.62571231, 0.62381889, ..., 0.11791451, 0.09170484,
       0.12799395])

In [107]:
df_predictions = pd.DataFrame({
        'date': test_dates,
        'wp1': lst_final_prediction[0],
        'wp2': lst_final_prediction[1],
        'wp3': lst_final_prediction[2],
        'wp4': lst_final_prediction[3],
        'wp5': lst_final_prediction[4],
        'wp6': lst_final_prediction[5],        
    })

In [108]:
nb_sub = 46
model = "vmdlgm_7models_traintestsplit"
prepro = 'RobustScaler'
postpro = "Prediction limited by 0-1"

In [109]:
# df_predictions.to_csv('Predictions/submission_nb_10_full_maxabs-lgbm-featselect.csv', index=False, sep=';')
df_predictions.to_csv(f'Predictions/submission_nb_{nb_sub}_{model}.csv', index=False, sep=';')

In [111]:
df_predictions.tail()

Unnamed: 0,date,wp1,wp2,wp3,wp4,wp5,wp6
7435,2012062420,0.136342,0.083236,0.206316,0.144016,0.208217,0.172234
7436,2012062421,0.144033,0.077648,0.255161,0.11388,0.205506,0.161259
7437,2012062422,0.117915,0.078133,0.311428,0.101872,0.205371,0.157525
7438,2012062423,0.091705,0.076934,0.355547,0.099505,0.187013,0.171684
7439,2012062500,0.127994,0.065098,0.363785,0.105747,0.195329,0.19938
