In [339]:
import pandas as pd
import numpy as np
from functools import reduce

import datetime
import seaborn as sns
from matplotlib import rcParams
import matplotlib.pyplot as plt
# figure size in inches
rcParams['figure.figsize'] = 30,15
pd.set_option('display.max_columns', None)


In [340]:
df_sales = pd.read_csv ('data/raw_data/sales_clean.csv')
df_climate = pd.read_csv('data/raw_data/climate_clean.csv')
df_holidays = pd.read_csv('data/raw_data/holidays_clean.csv')

In [341]:
df_climate

Unnamed: 0,date,average_temp,total_precip_mm,did_rain,did_snow
0,2015-09-21,20.9,0.0,0,0
1,2015-09-22,20.4,0.0,0,0
2,2015-09-23,19.0,0.0,0,0
3,2015-09-24,19.9,0.0,0,0
4,2015-09-25,21.4,0.0,0,0
...,...,...,...,...,...
2055,2021-05-07,28.0,0.0,0,0
2056,2021-05-08,29.0,0.0,0,0
2057,2021-05-09,22.0,1.5,1,0
2058,2021-05-10,18.0,0.0,0,0


In [342]:
df_holidays

Unnamed: 0,date,day_type,holiday_type,holiday_name
0,2015-09-21,laborable,,
1,2015-09-22,laborable,,
2,2015-09-23,laborable,,
3,2015-09-24,laborable,,
4,2015-09-25,laborable,,
...,...,...,...,...
2055,2021-05-07,laborable,,
2056,2021-05-08,sábado,,
2057,2021-05-09,domingo,,
2058,2021-05-10,laborable,,


In [343]:
df1= pd.merge(df_sales, df_climate, how="right", on="date")

In [344]:
df1

Unnamed: 0,date,total_sales,day_of_week,month_name,day,year,average_temp,total_precip_mm,did_rain,did_snow
0,2015-09-21,233.00,Monday,September,21,2015,20.9,0.0,0,0
1,2015-09-22,95.80,Tuesday,September,22,2015,20.4,0.0,0,0
2,2015-09-23,156.50,Wednesday,September,23,2015,19.0,0.0,0,0
3,2015-09-24,141.80,Thursday,September,24,2015,19.9,0.0,0,0
4,2015-09-25,1095.15,Friday,September,25,2015,21.4,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...
2055,2021-05-07,2154.00,Friday,May,7,2021,28.0,0.0,0,0
2056,2021-05-08,6241.70,Saturday,May,8,2021,29.0,0.0,0,0
2057,2021-05-09,6611.10,Sunday,May,9,2021,22.0,1.5,1,0
2058,2021-05-10,946.40,Monday,May,10,2021,18.0,0.0,0,0


In [345]:
df2 = pd.merge(df1, df_holidays, how="right", on="date")

In [346]:
df2

Unnamed: 0,date,total_sales,day_of_week,month_name,day,year,average_temp,total_precip_mm,did_rain,did_snow,day_type,holiday_type,holiday_name
0,2015-09-21,233.00,Monday,September,21,2015,20.9,0.0,0,0,laborable,,
1,2015-09-22,95.80,Tuesday,September,22,2015,20.4,0.0,0,0,laborable,,
2,2015-09-23,156.50,Wednesday,September,23,2015,19.0,0.0,0,0,laborable,,
3,2015-09-24,141.80,Thursday,September,24,2015,19.9,0.0,0,0,laborable,,
4,2015-09-25,1095.15,Friday,September,25,2015,21.4,0.0,0,0,laborable,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2055,2021-05-07,2154.00,Friday,May,7,2021,28.0,0.0,0,0,laborable,,
2056,2021-05-08,6241.70,Saturday,May,8,2021,29.0,0.0,0,0,sábado,,
2057,2021-05-09,6611.10,Sunday,May,9,2021,22.0,1.5,1,0,domingo,,
2058,2021-05-10,946.40,Monday,May,10,2021,18.0,0.0,0,0,laborable,,


In [347]:
df2['holiday_type'] = df2['holiday_type'].str.upper()
df2['holiday_name'] = df2['holiday_name'].str.upper()

In [348]:
df2.sample(40)

Unnamed: 0,date,total_sales,day_of_week,month_name,day,year,average_temp,total_precip_mm,did_rain,did_snow,day_type,holiday_type,holiday_name
1859,2020-10-23,2402.35,Friday,October,23,2020,11.7,7.37,1,0,laborable,,
1950,2021-01-22,1207.5,Friday,January,22,2021,8.7,4.06,1,0,laborable,,
1406,2019-07-28,2969.05,Sunday,July,28,2019,23.3,0.0,0,0,domingo,,
1289,2019-04-02,908.2,Tuesday,April,2,2019,12.8,0.0,0,0,laborable,,
239,2016-05-17,234.7,Tuesday,May,17,2016,17.6,0.0,0,0,laborable,,
980,2018-05-28,708.4,Monday,May,28,2018,16.4,0.25,1,0,laborable,,
859,2018-01-27,8582.05,Saturday,January,27,2018,6.4,0.0,0,0,sábado,,
319,2016-08-05,838.65,Friday,August,5,2016,27.4,0.0,0,0,laborable,,
1366,2019-06-18,1440.9,Tuesday,June,18,2019,25.7,0.0,0,0,laborable,,
840,2018-01-08,885.6,Monday,January,8,2018,2.3,0.0,0,0,laborable,,


In [349]:
df2['is_closed'] = df2['total_sales'].apply(lambda x: 1 if x == 0 else 0)


In [350]:
df2['is_lockdown'] = df2['date'].apply(lambda x: 1 if x >= '2020-03-13' and x <= '2020-06-26' else 0)


In [351]:
df2['is_curfew'] = df2['date'].apply(lambda x: 1 if x >= '2020-03-13' and x <= '2021-05-09' else 0)


In [352]:
df2.head()

Unnamed: 0,date,total_sales,day_of_week,month_name,day,year,average_temp,total_precip_mm,did_rain,did_snow,day_type,holiday_type,holiday_name,is_closed,is_lockdown,is_curfew
0,2015-09-21,233.0,Monday,September,21,2015,20.9,0.0,0,0,laborable,,,0,0,0
1,2015-09-22,95.8,Tuesday,September,22,2015,20.4,0.0,0,0,laborable,,,0,0,0
2,2015-09-23,156.5,Wednesday,September,23,2015,19.0,0.0,0,0,laborable,,,0,0,0
3,2015-09-24,141.8,Thursday,September,24,2015,19.9,0.0,0,0,laborable,,,0,0,0
4,2015-09-25,1095.15,Friday,September,25,2015,21.4,0.0,0,0,laborable,,,0,0,0


In [353]:
df2.corr()

Unnamed: 0,total_sales,day,year,average_temp,total_precip_mm,did_rain,did_snow,is_closed,is_lockdown,is_curfew
total_sales,1.0,-0.029586,-0.013776,-0.264064,0.070638,0.107799,0.002931,-0.258799,-0.245639,-0.157188
day,-0.029586,1.0,-0.023629,0.012277,-0.021252,-0.007165,0.021833,0.034367,0.0158,0.002079
year,-0.013776,-0.023629,1.0,-0.046058,0.007636,0.044646,0.006112,0.226014,0.272899,0.68838
average_temp,-0.264064,0.012277,-0.046058,1.0,-0.106723,-0.205768,-0.095801,0.040597,0.0492,-0.000332
total_precip_mm,0.070638,-0.021252,0.007636,-0.106723,1.0,0.457043,0.093648,0.054079,0.049235,0.034876
did_rain,0.107799,-0.007165,0.044646,-0.205768,0.457043,1.0,0.141058,0.094187,0.095855,0.062359
did_snow,0.002931,0.021833,0.006112,-0.095801,0.093648,0.141058,1.0,0.058615,0.041986,0.020193
is_closed,-0.258799,0.034367,0.226014,0.040597,0.054079,0.094187,0.058615,1.0,0.949148,0.430816
is_lockdown,-0.245639,0.0158,0.272899,0.0492,0.049235,0.095855,0.041986,0.949148,1.0,0.458189
is_curfew,-0.157188,0.002079,0.68838,-0.000332,0.034876,0.062359,0.020193,0.430816,0.458189,1.0


In [354]:
df2.to_csv("data/db_load_files/clean_data.csv", index=False)

In [355]:
df3 = df2.set_index("date")


In [356]:
df3

Unnamed: 0_level_0,total_sales,day_of_week,month_name,day,year,average_temp,total_precip_mm,did_rain,did_snow,day_type,holiday_type,holiday_name,is_closed,is_lockdown,is_curfew
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2015-09-21,233.00,Monday,September,21,2015,20.9,0.0,0,0,laborable,,,0,0,0
2015-09-22,95.80,Tuesday,September,22,2015,20.4,0.0,0,0,laborable,,,0,0,0
2015-09-23,156.50,Wednesday,September,23,2015,19.0,0.0,0,0,laborable,,,0,0,0
2015-09-24,141.80,Thursday,September,24,2015,19.9,0.0,0,0,laborable,,,0,0,0
2015-09-25,1095.15,Friday,September,25,2015,21.4,0.0,0,0,laborable,,,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-07,2154.00,Friday,May,7,2021,28.0,0.0,0,0,laborable,,,0,0,1
2021-05-08,6241.70,Saturday,May,8,2021,29.0,0.0,0,0,sábado,,,0,0,1
2021-05-09,6611.10,Sunday,May,9,2021,22.0,1.5,1,0,domingo,,,0,0,1
2021-05-10,946.40,Monday,May,10,2021,18.0,0.0,0,0,laborable,,,0,0,0


In [357]:
df3['year'] = df3.year.astype('category')

In [358]:
del df3['day']
del df3['holiday_type']
del df3['holiday_name' ]

In [359]:
del df3['did_snow']


In [360]:
df4 = pd.get_dummies(df3 ,dummy_na=True)

In [361]:
df4

Unnamed: 0_level_0,total_sales,average_temp,total_precip_mm,did_rain,is_closed,is_lockdown,is_curfew,day_of_week_Friday,day_of_week_Monday,day_of_week_Saturday,day_of_week_Sunday,day_of_week_Thursday,day_of_week_Tuesday,day_of_week_Wednesday,day_of_week_nan,month_name_April,month_name_August,month_name_December,month_name_February,month_name_January,month_name_July,month_name_June,month_name_March,month_name_May,month_name_November,month_name_October,month_name_September,month_name_nan,year_2015.0,year_2016.0,year_2017.0,year_2018.0,year_2019.0,year_2020.0,year_2021.0,year_nan,day_type_domingo,day_type_festivo,day_type_laborable,day_type_sábado,day_type_nan
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
2015-09-21,233.00,20.9,0.0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0
2015-09-22,95.80,20.4,0.0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0
2015-09-23,156.50,19.0,0.0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0
2015-09-24,141.80,19.9,0.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0
2015-09-25,1095.15,21.4,0.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-07,2154.00,28.0,0.0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
2021-05-08,6241.70,29.0,0.0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0
2021-05-09,6611.10,22.0,1.5,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0
2021-05-10,946.40,18.0,0.0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0


In [362]:
del df4['day_type_nan']
del df4['month_name_nan']
del df4['day_of_week_nan']
del df4['year_nan']

In [363]:
df4

Unnamed: 0_level_0,total_sales,average_temp,total_precip_mm,did_rain,is_closed,is_lockdown,is_curfew,day_of_week_Friday,day_of_week_Monday,day_of_week_Saturday,day_of_week_Sunday,day_of_week_Thursday,day_of_week_Tuesday,day_of_week_Wednesday,month_name_April,month_name_August,month_name_December,month_name_February,month_name_January,month_name_July,month_name_June,month_name_March,month_name_May,month_name_November,month_name_October,month_name_September,year_2015.0,year_2016.0,year_2017.0,year_2018.0,year_2019.0,year_2020.0,year_2021.0,day_type_domingo,day_type_festivo,day_type_laborable,day_type_sábado
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
2015-09-21,233.00,20.9,0.0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0
2015-09-22,95.80,20.4,0.0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0
2015-09-23,156.50,19.0,0.0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0
2015-09-24,141.80,19.9,0.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0
2015-09-25,1095.15,21.4,0.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-07,2154.00,28.0,0.0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0
2021-05-08,6241.70,29.0,0.0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1
2021-05-09,6611.10,22.0,1.5,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0
2021-05-10,946.40,18.0,0.0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0


In [364]:
#add total sales lag 
number_lags=1
for lag in range(1, number_lags + 1):
    df4['prev_sales'] = df4.total_sales.shift(lag)

df4.dropna(subset = ["prev_sales"], inplace=True)

In [365]:
number_lags=1
for lag in range(1, number_lags + 1):
    df4['is_post_holiday'] = df4.day_type_festivo.shift(lag)

df4.dropna(subset = ["is_post_holiday"], inplace=True)

In [366]:
df4['is_pre_holiday'] = df4.day_type_festivo.shift(-1)
df4.dropna(subset = ["is_pre_holiday"], inplace=True)

In [367]:
df4

Unnamed: 0_level_0,total_sales,average_temp,total_precip_mm,did_rain,is_closed,is_lockdown,is_curfew,day_of_week_Friday,day_of_week_Monday,day_of_week_Saturday,day_of_week_Sunday,day_of_week_Thursday,day_of_week_Tuesday,day_of_week_Wednesday,month_name_April,month_name_August,month_name_December,month_name_February,month_name_January,month_name_July,month_name_June,month_name_March,month_name_May,month_name_November,month_name_October,month_name_September,year_2015.0,year_2016.0,year_2017.0,year_2018.0,year_2019.0,year_2020.0,year_2021.0,day_type_domingo,day_type_festivo,day_type_laborable,day_type_sábado,prev_sales,is_post_holiday,is_pre_holiday
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1
2015-09-23,156.50,19.0,0.0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,95.80,0.0,0.0
2015-09-24,141.80,19.9,0.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,156.50,0.0,0.0
2015-09-25,1095.15,21.4,0.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,141.80,0.0,0.0
2015-09-26,2588.05,20.8,0.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1095.15,0.0,0.0
2015-09-27,1316.90,21.2,0.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,2588.05,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-06,649.40,27.0,0.0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,751.70,0.0,0.0
2021-05-07,2154.00,28.0,0.0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,649.40,0.0,0.0
2021-05-08,6241.70,29.0,0.0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,2154.00,0.0,0.0
2021-05-09,6611.10,22.0,1.5,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,6241.70,0.0,0.0


In [368]:
df4.to_excel("data/test.xlsx", index=True)

In [369]:


from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier , GradientBoostingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis , QuadraticDiscriminantAnalysis

from sklearn.linear_model import LinearRegression,Ridge,Lasso,RidgeCV, ElasticNet,SGDRegressor
from sklearn.ensemble import RandomForestRegressor,BaggingRegressor,GradientBoostingRegressor,AdaBoostRegressor 
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor

from sklearn.preprocessing import  Normalizer , scale
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFECV
from sklearn.model_selection import GridSearchCV, KFold , cross_val_score,RandomizedSearchCV


from sklearn.preprocessing import MinMaxScaler , StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_log_error,mean_squared_error, r2_score,mean_absolute_error 

from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

#### 

In [387]:
X = df4.drop(['total_sales'], axis=1)
y = df4['total_sales']

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.19)

In [388]:
models = { "ridge": Ridge(),
          "lasso": Lasso(),
          "sgd": SGDRegressor(),
          "knn": KNeighborsRegressor(),
          "gradient": GradientBoostingRegressor()
}

In [389]:
for name, model in models.items():
    print(f"Entrenando modelo ---> {name}")
    model.fit(X_train,y_train)
    print(f"He acabado :)")

Entrenando modelo ---> ridge
He acabado :)
Entrenando modelo ---> lasso
He acabado :)
Entrenando modelo ---> sgd
He acabado :)
Entrenando modelo ---> knn
He acabado :)
Entrenando modelo ---> gradient
He acabado :)


In [390]:
for name, model in models.items():
    y_pred = model.predict(X_test)
    print(f"--------{name}--------")
    print("MAE: ", mean_absolute_error(y_test, y_pred))
    print("MSE: ", mean_squared_error(y_test,y_pred))
    print("RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)))
    print("R2: ", r2_score(y_test,y_pred))
    print("\n")

--------ridge--------
MAE:  849.4439373732768
MSE:  1277522.28117303
RMSE:  1130.275312113394
R2:  0.7347588867922166


--------lasso--------
MAE:  848.006187095361
MSE:  1275798.2177184832
RMSE:  1129.5123805069527
R2:  0.7351168394609597


--------sgd--------
MAE:  1784633278661610.5
MSE:  5.761782504110976e+30
RMSE:  2400371326297449.5
R2:  -1.1962700204713966e+24


--------knn--------
MAE:  1348.2342404092071
MSE:  3743603.438413913
RMSE:  1934.839383104942
R2:  0.22274737744561945


--------gradient--------
MAE:  628.0157258694991
MSE:  939727.937798814
RMSE:  969.3956559624218
R2:  0.80489226058324




In [391]:
model = RandomForestRegressor()

params = {'n_estimators': [10,30,40,50,100],
          'max_features': ["sqrt", 0.5],
          'max_depth': [15,20,25],
          'min_samples_leaf': [1,2,4,6,8,10]}

grid_search = GridSearchCV(model, param_grid=params, verbose=1, n_jobs=-1,cv=5)

grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 180 candidates, totalling 900 fits


GridSearchCV(cv=5, estimator=RandomForestRegressor(), n_jobs=-1,
             param_grid={'max_depth': [15, 20, 25],
                         'max_features': ['sqrt', 0.5],
                         'min_samples_leaf': [1, 2, 4, 6, 8, 10],
                         'n_estimators': [10, 30, 40, 50, 100]},
             verbose=1)

In [392]:
bestscore = grid_search.best_score_
print("Best GridSearch Score: ", bestscore)
best_rf = grid_search.best_estimator_
print("Best Estimator: ", best_rf)
print("Best RF SCORE: ", best_rf.score(X, y))


Best GridSearch Score:  0.8176397505692184
Best Estimator:  RandomForestRegressor(max_depth=25, max_features='sqrt')
Best RF SCORE:  0.9471911306605445


In [332]:
X["predicted_sales"] = best_rf.predict(X)


In [333]:
X["actual_total_sales"] = y

In [334]:
X

Unnamed: 0_level_0,average_temp,total_precip_mm,did_rain,is_closed,is_lockdown,is_curfew,day_of_week_Friday,day_of_week_Monday,day_of_week_Saturday,day_of_week_Sunday,day_of_week_Thursday,day_of_week_Tuesday,day_of_week_Wednesday,month_name_April,month_name_August,month_name_December,month_name_February,month_name_January,month_name_July,month_name_June,month_name_March,month_name_May,month_name_November,month_name_October,month_name_September,year_2015.0,year_2016.0,year_2017.0,year_2018.0,year_2019.0,year_2020.0,year_2021.0,day_type_domingo,day_type_festivo,day_type_laborable,day_type_sábado,prev_sales,is_post_holiday,is_pre_holiday,predicted_sales,actual_total_sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
2015-09-23,19.0,0.0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,95.80,0.0,0.0,247.174044,156.50
2015-09-24,19.9,0.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,156.50,0.0,0.0,339.448949,141.80
2015-09-25,21.4,0.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,141.80,0.0,0.0,1343.055727,1095.15
2015-09-26,20.8,0.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1095.15,0.0,0.0,3152.689667,2588.05
2015-09-27,21.2,0.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,2588.05,0.0,0.0,1548.678333,1316.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-06,27.0,0.0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,751.70,0.0,0.0,807.682611,649.40
2021-05-07,28.0,0.0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,649.40,0.0,0.0,1894.789937,2154.00
2021-05-08,29.0,0.0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,2154.00,0.0,0.0,5603.551667,6241.70
2021-05-09,22.0,1.5,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,6241.70,0.0,0.0,4537.332667,6611.10


In [335]:
import pickle

In [393]:
# save the model to disk
pickle.dump(best_rf, open("models/best_rf", 'wb'))

'''
# load the model from disk
loaded_model = pickle.load(open("mi_mejor_modelo", 'rb'))
loaded_model.predict(X_test)
'''

'\n# load the model from disk\nloaded_model = pickle.load(open("mi_mejor_modelo", \'rb\'))\nloaded_model.predict(X_test)\n'

In [116]:
import joblib

#autoMachineLearning
import h2o
from h2o.automl import H2OAutoML

In [394]:
h2o.init() #To start h2o


Checking whether there is an H2O instance running at http://localhost:54321 . connected.


0,1
H2O_cluster_uptime:,2 hours 54 mins
H2O_cluster_timezone:,Europe/Paris
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.32.1.2
H2O_cluster_version_age:,15 days
H2O_cluster_name:,H2O_from_python_fran_qkh2t3
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,2.970 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


In [118]:
h2train = h2o.H2OFrame(df4)

Parse progress: |█████████████████████████████████████████████████████████| 100%


In [119]:
x = list(df4.columns)
x.remove('total_sales')

y = "total_sales"

print("X:", x)
print("y:", y)

X: ['average_temp', 'total_precip_mm', 'did_rain', 'is_closed', 'is_lockdown', 'is_curfew', 'day_of_week_Friday', 'day_of_week_Monday', 'day_of_week_Saturday', 'day_of_week_Sunday', 'day_of_week_Thursday', 'day_of_week_Tuesday', 'day_of_week_Wednesday', 'month_name_April', 'month_name_August', 'month_name_December', 'month_name_February', 'month_name_January', 'month_name_July', 'month_name_June', 'month_name_March', 'month_name_May', 'month_name_November', 'month_name_October', 'month_name_September', 'year_2015.0', 'year_2016.0', 'year_2017.0', 'year_2018.0', 'year_2019.0', 'year_2020.0', 'year_2021.0', 'day_type_domingo', 'day_type_festivo', 'day_type_laborable', 'day_type_sábado', 'prev_sales', 'is_post_holiday', 'is_pre_holiday']
y: total_sales


In [120]:
#TRAINING all the h20 models

automl = H2OAutoML(max_models=40, max_runtime_secs=3600, sort_metric='RMSE')
automl.train(x=x, y=y, training_frame=h2train)

AutoML progress: |████████████████████████████████████████████████████████| 100%


In [121]:
#Showing the best performers

leader_board = automl.leaderboard
leader_board.head()

model_id,rmse,mean_residual_deviance,mse,mae,rmsle
StackedEnsemble_AllModels_AutoML_20210515_140725,892.668,796856,796856,543.198,
StackedEnsemble_BestOfFamily_AutoML_20210515_140725,895.127,801251,801251,539.72,
XGBoost_grid__1_AutoML_20210515_140725_model_11,925.74,856995,856995,568.747,
XGBoost_grid__1_AutoML_20210515_140725_model_6,932.126,868859,868859,569.004,
GBM_2_AutoML_20210515_140725,934.344,872998,872998,572.962,
XGBoost_grid__1_AutoML_20210515_140725_model_12,935.098,874409,874409,577.775,
XGBoost_grid__1_AutoML_20210515_140725_model_10,935.854,875823,875823,585.564,
DRF_1_AutoML_20210515_140725,937.706,879293,879293,571.511,0.777098
DeepLearning_grid__3_AutoML_20210515_140725_model_1,939.158,882017,882017,613.967,1.54895
GBM_3_AutoML_20210515_140725,942.082,887518,887518,578.792,




In [127]:
# save the model to disk
model_path = h2o.save_model(model=automl.leader, path="models/autostacked", force=True)
print (model_path)

/mnt/c/Users/lesto/Desktop/Ironhack/CityPlayForecast/models/autostacked/StackedEnsemble_AllModels_AutoML_20210515_140725


In [151]:
#Loading the TEST dataset

stacked_test = X
h2test_stacked = h2o.H2OFrame(stacked_test) #Conversion into a H20 frame to train
h2test_stacked.head() #preview

Parse progress: |█████████████████████████████████████████████████████████| 100%


average_temp,total_precip_mm,did_rain,is_closed,is_lockdown,is_curfew,day_of_week_Friday,day_of_week_Monday,day_of_week_Saturday,day_of_week_Sunday,day_of_week_Thursday,day_of_week_Tuesday,day_of_week_Wednesday,month_name_April,month_name_August,month_name_December,month_name_February,month_name_January,month_name_July,month_name_June,month_name_March,month_name_May,month_name_November,month_name_October,month_name_September,year_2015.0,year_2016.0,year_2017.0,year_2018.0,year_2019.0,year_2020.0,year_2021.0,day_type_domingo,day_type_festivo,day_type_laborable,day_type_sábado,prev_sales,is_post_holiday,is_pre_holiday,predicted_sales,actual_total_sales
19.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,95.8,0,0,551.509,156.5
19.9,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,156.5,0,0,458.303,141.8
21.4,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,141.8,0,0,1119.26,1095.15
20.8,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1095.15,0,0,2833.82,2588.05
21.2,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,2588.05,0,0,1761.45,1316.9
18.4,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1316.9,0,0,1545.56,1929.0
18.1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1929.0,0,0,1038.16,578.0
18.4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,578.0,0,0,651.457,552.2
18.4,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,552.2,0,0,590.023,429.3
19.6,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,429.3,0,0,1722.11,1955.4




In [152]:
predicted_price_h2_stacked = automl.leader.predict(h2test_stacked).as_data_frame() #PREDICTING the Sales on the TEST dataset
predicted_price_h2_stacked #Result

stackedensemble prediction progress: |████████████████████████████████████| 100%


Unnamed: 0,predict
0,285.445675
1,260.876360
2,1071.732738
3,2688.026402
4,2006.573758
...,...
2052,786.339809
2053,2003.971774
2054,5674.968076
2055,5787.562846


In [409]:
pred = X
pred


Unnamed: 0_level_0,average_temp,total_precip_mm,did_rain,is_closed,is_lockdown,is_curfew,day_of_week_Friday,day_of_week_Monday,day_of_week_Saturday,day_of_week_Sunday,day_of_week_Thursday,day_of_week_Tuesday,day_of_week_Wednesday,month_name_April,month_name_August,month_name_December,month_name_February,month_name_January,month_name_July,month_name_June,month_name_March,month_name_May,month_name_November,month_name_October,month_name_September,year_2015.0,year_2016.0,year_2017.0,year_2018.0,year_2019.0,year_2020.0,year_2021.0,day_type_domingo,day_type_festivo,day_type_laborable,day_type_sábado,prev_sales,is_post_holiday,is_pre_holiday
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
2015-09-23,19.0,0.0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,95.80,0.0,0.0
2015-09-24,19.9,0.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,156.50,0.0,0.0
2015-09-25,21.4,0.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,141.80,0.0,0.0
2015-09-26,20.8,0.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1095.15,0.0,0.0
2015-09-27,21.2,0.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,2588.05,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-06,27.0,0.0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,751.70,0.0,0.0
2021-05-07,28.0,0.0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,649.40,0.0,0.0
2021-05-08,29.0,0.0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,2154.00,0.0,0.0
2021-05-09,22.0,1.5,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,6241.70,0.0,0.0


In [410]:
pred["total_sales"] = y

In [411]:
pred

Unnamed: 0_level_0,average_temp,total_precip_mm,did_rain,is_closed,is_lockdown,is_curfew,day_of_week_Friday,day_of_week_Monday,day_of_week_Saturday,day_of_week_Sunday,day_of_week_Thursday,day_of_week_Tuesday,day_of_week_Wednesday,month_name_April,month_name_August,month_name_December,month_name_February,month_name_January,month_name_July,month_name_June,month_name_March,month_name_May,month_name_November,month_name_October,month_name_September,year_2015.0,year_2016.0,year_2017.0,year_2018.0,year_2019.0,year_2020.0,year_2021.0,day_type_domingo,day_type_festivo,day_type_laborable,day_type_sábado,prev_sales,is_post_holiday,is_pre_holiday,total_sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1
2015-09-23,19.0,0.0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,95.80,0.0,0.0,156.50
2015-09-24,19.9,0.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,156.50,0.0,0.0,141.80
2015-09-25,21.4,0.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,141.80,0.0,0.0,1095.15
2015-09-26,20.8,0.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1095.15,0.0,0.0,2588.05
2015-09-27,21.2,0.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,2588.05,0.0,0.0,1316.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-06,27.0,0.0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,751.70,0.0,0.0,649.40
2021-05-07,28.0,0.0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,649.40,0.0,0.0,2154.00
2021-05-08,29.0,0.0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,2154.00,0.0,0.0,6241.70
2021-05-09,22.0,1.5,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,6241.70,0.0,0.0,6611.10


In [412]:
pred = pred.set_index(predicted_price_h2_stacked.index)

In [413]:
pred['predict'] = predicted_price_h2_stacked['predict']


In [415]:
pred.to_csv("data/h20_stacked_pred.csv", index=True)

In [413]:
df2.to_csv("data/final.csv", index=True)

In [94]:
X.to_csv("data/model_outputs/rndForrest.csv", index=True)