In [2]:
import pandas as pd
import seaborn as sns
import openpyxl
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle

In [3]:
from sklearn.model_selection import KFold, train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error
import optuna

In [4]:
from sklearn.ensemble import StackingRegressor, RandomForestRegressor
from sklearn.linear_model import RidgeCV, Ridge 
from sklearn.svm import LinearSVR
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor

In [5]:
from Functions.helper_functions import * 

In [6]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)
pd.options.mode.chained_assignment = None  # default='warn'

# Data import

In [7]:
train_wp1 = pd.read_csv('Data/Preprocessing/WP1_train_preprocessed.csv', sep=',')
train_wp2 = pd.read_csv('Data/Preprocessing/WP2_train_preprocessed.csv', sep=',')
train_wp3 = pd.read_csv('Data/Preprocessing/WP3_train_preprocessed.csv', sep=',')
train_wp4 = pd.read_csv('Data/Preprocessing/WP4_train_preprocessed.csv', sep=',')
train_wp5 = pd.read_csv('Data/Preprocessing/WP5_train_preprocessed.csv', sep=',')
train_wp6 = pd.read_csv('Data/Preprocessing/WP6_train_preprocessed.csv', sep=',')

test_wp1 = pd.read_csv('Data/Preprocessing/WP1_test_preprocessed.csv', sep=',')
test_wp2 = pd.read_csv('Data/Preprocessing/WP2_test_preprocessed.csv', sep=',')
test_wp3 = pd.read_csv('Data/Preprocessing/WP3_test_preprocessed.csv', sep=',')
test_wp4 = pd.read_csv('Data/Preprocessing/WP4_test_preprocessed.csv', sep=',')
test_wp5 = pd.read_csv('Data/Preprocessing/WP5_test_preprocessed.csv', sep=',')
test_wp6 = pd.read_csv('Data/Preprocessing/WP6_test_preprocessed.csv', sep=',')
test_dates = pd.read_csv('Data/Initial/test.csv', sep=',').date.values

In [8]:
to_drop = ['date','wd','forecast_time', 'forecast', "forecast_dist"]

# Voting from previous submissions

In [57]:
pred_stack = pd.read_csv('Predictions/submission_nb_16_stacking_0-2xgboost+2lgbm_1-LinearSVR.csv', sep=';')
pred_dnn =  pd.read_csv('Predictions/submission_nb_6_full_dnn.csv', sep=';')
pred_lgbm_maxabs = pd.read_csv('Predictions/submission_nb_9_full_maxabs-lgbm.csv', sep=';')
pred_xgboost = pd.read_csv('Predictions/submission_nb_8_full_absmax-xgboost.csv', sep=';')
pred_ridge = pd.read_csv('Predictions/submission_nb_6_full_ridgecv.csv', sep=';')
pred_lgbm_std = pd.read_csv('Predictions/submission_nb_7_full_std-lgbm.csv', sep=';')

# Voting method 

## Simple average

In [66]:
w_stack = 3
w_lgbm_maxabs = 2
w_dnn = 1
w_xgboost = 1.5
w_ridge = 1
w_lgbm_std = 1
sum_weights = w_stack + w_lgbm_std + w_ridge + w_xgboost + w_dnn + w_lgbm_maxabs

pred_av = (w_stack*pred_stack + w_lgbm_maxabs*pred_lgbm_maxabs + w_dnn*pred_dnn + w_xgboost*pred_xgboost + w_ridge*pred_ridge + w_lgbm_std*pred_lgbm_std)/sum_weights

pred_av["date"] = pred_av["date"].astype(int)

In [67]:
pred_stack

Unnamed: 0,date,wp1,wp2,wp3,wp4,wp5,wp6
0,2011010101,0.723491,0.290215,0.073634,0.543231,0.769381,0.641463
1,2011010102,0.680920,0.330876,0.036355,0.485683,0.751763,0.660941
2,2011010103,0.627073,0.355641,0.100430,0.492717,0.772331,0.648006
3,2011010104,0.623136,0.377850,0.172904,0.532702,0.708793,0.661650
4,2011010105,0.738028,0.311827,0.277828,0.598646,0.727939,0.654823
...,...,...,...,...,...,...,...
7435,2012062420,0.135034,0.031992,0.282083,0.113497,0.208935,0.167370
7436,2012062421,0.129431,0.019523,0.334578,0.097063,0.293264,0.213511
7437,2012062422,0.086057,0.037189,0.435155,0.065048,0.233037,0.191023
7438,2012062423,0.069453,0.032257,0.453232,0.025873,0.151968,0.172628


In [68]:
nb_sub = 23
model = "votingreg"
models = "dnn-stdlgbm-maxabslgbm-xgboost-ridge-stack"
weight = 'Weighted'

pred_av.to_csv(f'Predictions/submission_nb_{nb_sub}_{model}_{models}.csv', index=False, sep=';')
f = open(f"Predictions/submission-{nb_sub}_{model}_{models}.txt", "x")
f.write(f"Model1: Predictions/submission_nb_6_full_dnn.csv\n")
f.write(f"Model1 weight: {w_dnn}\n\n")
f.write(f"Model2: Predictions/submission_nb_9_full_maxabs-lgbm.csv\n")
f.write(f"Model2 weight: {w_lgbm_maxabs}\n\n")
f.write(f"Model3: Predictions/submission_nb_8_full_absmax-xgboost.csv\n")
f.write(f"Model3 weight: {w_xgboost}\n\n")
f.write(f"Model4: Predictions/submission_nb_6_full_ridgecv.csv \n")
f.write(f"Model4 weight: {w_ridge}\n\n")
f.write(f"Model5: Predictions/submission_nb_7_full_std-lgbm.csv\n")
f.write(f"Model5 weight: {w_lgbm_std}\n\n")
f.write(f"Model5: Predictions/submission_nb_16_stacking_0-2xgboost+2lgbm_1-LinearSVR.csv\n")
f.write(f"Model5 weight: {w_stack}\n\n")
f.write(f"Type of weighted : {weight}")
f.close()