In [1]:
import pandas as pd

In [2]:
from sklearn import model_selection, metrics
from sklearn import (svm, 
                     neighbors, 
                     ensemble, 
                     multioutput,
                    linear_model, 
                    naive_bayes, 
                    tree, 
                    discriminant_analysis)

In [3]:
train_load_data = pd.read_csv('./data/preprocessed_train_load_data.csv')

In [4]:
train_load_data.head()

Unnamed: 0,zone_id,year,month,day,h1,h2,h3,h4,h5,h6,...,week_of_year,day_of_year,is_month_start,is_month_end,area1,area2,area3,area4,area5,area6
0,1,2004,1,1,16853.0,16450.0,16517.0,16873.0,17064.0,17727.0,...,1,1,1,0,0,1,0,0,0,0
1,1,2004,1,2,14155.0,14038.0,14019.0,14489.0,14920.0,16072.0,...,1,2,0,0,0,1,0,0,0,0
2,1,2004,1,3,14439.0,14272.0,14109.0,14081.0,14775.0,15491.0,...,1,3,0,0,0,1,0,0,0,0
3,1,2004,1,4,11273.0,10415.0,9943.0,9859.0,9881.0,10248.0,...,1,4,0,0,0,1,0,0,0,0
4,1,2004,1,5,10750.0,10321.0,10107.0,10065.0,10419.0,12101.0,...,2,5,0,0,0,1,0,0,0,0


In [5]:
features_list = ['zone_id', 'year', 'month', 'day', 'weekday', 'week_of_year', 'day_of_year', 'is_month_start', 'is_month_end', 'area1', 'area2', 'area3', 'area4', 'area5', 'area6']
features = train_load_data[features_list]
targets_list = [f'h{i}' for i in range(1,25)]
targets = train_load_data[targets_list]

In [6]:
train_X, test_X, train_y, test_y = model_selection.train_test_split(features, targets, random_state=0)

In [7]:
train_X.head()

Unnamed: 0,zone_id,year,month,day,weekday,week_of_year,day_of_year,is_month_start,is_month_end,area1,area2,area3,area4,area5,area6
16933,11,2007,6,13,2,24,164,0,0,0,0,0,1,0,0
8321,6,2005,4,8,4,14,98,0,0,0,0,0,0,1,0
18505,12,2007,5,30,2,22,150,0,0,0,0,0,1,0,0
7384,5,2007,3,1,3,9,60,1,0,1,0,0,0,0,0
20946,14,2005,4,10,6,14,100,0,0,0,1,0,0,0,0


In [8]:
train_y.head()

Unnamed: 0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,...,h15,h16,h17,h18,h19,h20,h21,h22,h23,h24
16933,80943.0,75827.0,72996.0,71550.0,71907.0,77154.0,89113.0,99494.0,105521.0,112795.0,...,129246.0,129225.0,128029.0,123457.0,117468.0,112108.0,111162.0,107381.0,95750.0,82524.0
8321,123895.0,120510.0,119221.0,119413.0,121784.0,133294.0,155383.0,168591.0,165027.0,163679.0,...,157826.0,155461.0,153503.0,155466.0,153455.0,154744.0,164496.0,156523.0,144871.0,129495.0
18505,88599.0,80486.0,76941.0,75276.0,77164.0,86141.0,105005.0,109539.0,108004.0,113125.0,...,162296.0,171744.0,182760.0,191824.0,189659.0,177080.0,169449.0,161761.0,137826.0,111857.0
7384,8004.0,8233.0,8519.0,8795.0,9384.0,10598.0,12521.0,12263.0,10285.0,8900.0,...,7344.0,7514.0,8300.0,9529.0,10378.0,10564.0,10020.0,9230.0,8193.0,7149.0
20946,14703.0,14197.0,14251.0,14627.0,15282.0,13495.0,14992.0,17472.0,19040.0,18203.0,...,12211.0,11842.0,11876.0,12330.0,12863.0,14070.0,16582.0,15479.0,16019.0,13300.0


In [9]:
# List of MLA Algorithms
MLA = [
#     ensemble
    ensemble.AdaBoostRegressor(),
    ensemble.RandomForestRegressor(),
    ensemble.GradientBoostingRegressor(),
    ensemble.ExtraTreesRegressor(),
    
#     Nearest Neighbor
    
#     svm
    svm.LinearSVR(), 
#     svm.SVR(), 
#     svm.NuSVR(),
    
    
    # tree
    tree.DecisionTreeRegressor(),    
]

In [10]:
for alg in MLA: 
    alg_name = alg.__class__.__name__
    print("starting:", alg_name)
    clf = multioutput.MultiOutputRegressor(alg)
    clf.fit(train_X, train_y)
    
    print("\ttrainig score:", clf.score(train_X, train_y))
    print("\ttesting score:",clf.score(test_X, test_y))
    print("\ttrainig mse:", metrics.mean_squared_error(y_true=train_y, y_pred=clf.predict(train_X)))
    print("\ttesting mse:",metrics.mean_squared_error(y_true=test_y, y_pred=clf.predict(test_X)))
    print('-'*50)


starting: AdaBoostRegressor
	trainig score: 0.8059605093409997
	testing score: 0.7943439164497387
	trainig mse: 1091574146.4363282
	testing mse: 1101078595.4463332
--------------------------------------------------
starting: RandomForestRegressor
	trainig score: 0.9948979340910912
	testing score: 0.9710183342486274
	trainig mse: 28266291.15681796
	testing mse: 152422588.81950864
--------------------------------------------------
starting: GradientBoostingRegressor
	trainig score: 0.9492043878982951
	testing score: 0.9477749499387992
	trainig mse: 284414252.998204
	testing mse: 277507394.62677556
--------------------------------------------------
starting: ExtraTreesRegressor
	trainig score: 0.9999999910429125
	testing score: 0.9752785539518887
	trainig mse: 57.99908833502166
	testing mse: 129809205.29525809
--------------------------------------------------
starting: LinearSVR
	trainig score: -0.045796215961112746
	testing score: -0.03234249570663507
	trainig mse: 6009601680.24458
	tes