# Averaging base models

In [70]:
import pandas as pd
import seaborn as sns
import openpyxl
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle

In [71]:
from sklearn.model_selection import KFold, train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error
import optuna

In [72]:
from sklearn.ensemble import StackingRegressor, RandomForestRegressor
from sklearn.linear_model import RidgeCV, Ridge 
from sklearn.svm import LinearSVR
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor

In [73]:
from Functions.helper_functions import * 

In [74]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)
pd.options.mode.chained_assignment = None  # default='warn'

# Data import

In [78]:
# train_wp1 = pd.read_csv('Data/Preprocessing/WP1_train_preprocessed.csv', sep=',')
# train_wp2 = pd.read_csv('Data/Preprocessing/WP2_train_preprocessed.csv', sep=',')
# train_wp3 = pd.read_csv('Data/Preprocessing/WP3_train_preprocessed.csv', sep=',')
# train_wp4 = pd.read_csv('Data/Preprocessing/WP4_train_preprocessed.csv', sep=',')
# train_wp5 = pd.read_csv('Data/Preprocessing/WP5_train_preprocessed.csv', sep=',')
# train_wp6 = pd.read_csv('Data/Preprocessing/WP6_train_preprocessed.csv', sep=',')

# test_wp1 = pd.read_csv('Data/Preprocessing/WP1_test_preprocessed.csv', sep=',')
# test_wp2 = pd.read_csv('Data/Preprocessing/WP2_test_preprocessed.csv', sep=',')
# test_wp3 = pd.read_csv('Data/Preprocessing/WP3_test_preprocessed.csv', sep=',')
# test_wp4 = pd.read_csv('Data/Preprocessing/WP4_test_preprocessed.csv', sep=',')
# test_wp5 = pd.read_csv('Data/Preprocessing/WP5_test_preprocessed.csv', sep=',')
# test_wp6 = pd.read_csv('Data/Preprocessing/WP6_test_preprocessed.csv', sep=',')
# test_dates = pd.read_csv('Data/Initial/test.csv', sep=',').date.values

In [76]:
to_drop = ['date','wd','forecast_time', 'forecast', "forecast_dist"]

# Voting from previous submissions

In [77]:
pred_stack = pd.read_csv('Predictions/submission_nb_16_stacking_0-2xgboost+2lgbm_1-LinearSVR.csv', sep=';')
pred_dnn =  pd.read_csv('Predictions/submission_nb_6_full_dnn.csv', sep=';')
pred_lgbm_maxabs = pd.read_csv('Predictions/submission_nb_9_full_maxabs-lgbm.csv', sep=';')
pred_xgboost = pd.read_csv('Predictions/submission_nb_8_full_absmax-xgboost.csv', sep=';')
pred_ridge = pd.read_csv('Predictions/submission_nb_6_full_ridgecv.csv', sep=';')
pred_lgbm_std = pd.read_csv('Predictions/submission_nb_7_full_std-lgbm.csv', sep=';')
pred_stack_2 = pd.read_csv('Predictions/submission_nb_25_cv_stacking_0-ENet-GBoost-KRR-SVR-ridge-xtratree_1-Lasso.csv', sep=';')

# Voting method 

## Simple average

In [127]:
# w_stack = 3
w_stack_2 = 2
w_lgbm_maxabs = 1.7
w_dnn = 1
w_xgboost = 1.3
w_ridge = 1
w_lgbm_std = 1.2
sum_weights = w_stack_2 + w_lgbm_maxabs + w_xgboost + w_lgbm_std + w_ridge + w_dnn

pred_av = (pred_lgbm_std*w_lgbm_std + w_ridge*pred_ridge + w_dnn*pred_dnn + w_stack_2*pred_stack_2 + w_lgbm_maxabs*pred_lgbm_maxabs + w_xgboost*pred_xgboost)/sum_weights

pred_av["date"] = pred_av["date"].astype(int)

In [128]:
sum_weights

8.2

In [129]:
pred_av

Unnamed: 0,date,wp1,wp2,wp3,wp4,wp5,wp6
0,2011010101,0.649911,0.363403,0.059179,0.543417,0.735196,0.607317
1,2011010102,0.606110,0.375853,0.049719,0.504106,0.709168,0.633917
2,2011010103,0.573756,0.379575,0.101526,0.502508,0.698208,0.644225
3,2011010104,0.584983,0.365566,0.177145,0.539205,0.703019,0.645604
4,2011010105,0.649048,0.308137,0.282475,0.597372,0.749244,0.651318
...,...,...,...,...,...,...,...
7435,2012062420,0.117406,0.066067,0.254181,0.142087,0.241056,0.216190
7436,2012062421,0.101403,0.052761,0.335033,0.114088,0.260828,0.238274
7437,2012062422,0.070279,0.041621,0.416943,0.089511,0.207959,0.230926
7438,2012062423,0.071717,0.035070,0.453314,0.075263,0.148364,0.197434


In [130]:
nb_sub = 27
model = "votingreg"
models = "stacknb2-maxabslgbm-xgboost-ridge-dnn-stack1"
weight = 'Weighted'

pred_av.to_csv(f'Predictions/submission_nb_{nb_sub}_{model}_{models}.csv', index=False, sep=';')
f = open(f"Predictions/submission-{nb_sub}_{model}_{models}.txt", "x")
f.write(f"Model1: Predictions/submission_nb_6_full_dnn.csv\n")
f.write(f"Model1 weight: {w_dnn}\n\n")
f.write(f"Model2: Predictions/submission_nb_9_full_maxabs-lgbm.csv\n")
f.write(f"Model2 weight: {w_lgbm_maxabs}\n\n")
f.write(f"Model3: Predictions/submission_nb_8_full_absmax-xgboost.csv\n")
f.write(f"Model3 weight: {w_xgboost}\n\n")
f.write(f"Model6: Predictions/submission_nb_25_cv_stacking_0-ENet-GBoost-KRR-SVR-ridge-xtratree_1-Lasso.csv\n")
f.write(f"Model6 weight: {w_stack_2}\n\n")
f.write(f"Model4: Predictions/submission_nb_6_full_ridgecv.csv \n")
f.write(f"Model4 weight: {w_ridge}\n\n")
f.write(f"Model5: Predictions/submission_nb_7_full_std-lgbm.csv\n")
f.write(f"Model5 weight: {w_lgbm_std}\n\n")
f.write(f"Model5: Predictions/submission_nb_16_stacking_0-2xgboost+2lgbm_1-LinearSVR.csv\n")
f.write(f"Model5 weight: {w_stack}\n\n")
f.write(f"Type of weighted : {weight}")
f.close()