In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
from sklearn.kernel_ridge import KernelRidge
import math
import time
from sklearn.ensemble import GradientBoostingRegressor

from sklearn.metrics import max_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
def preprocess_features(x1, x2, reversed=False):
    x1_size, x2_size = len(x1), len(x2)
    features = np.array([np.concatenate((x2[j][:-1], x1[i][:-1])) if reversed else np.concatenate((x1[i][:-1], x2[j][:-1])) 
                         for i in range(x1_size) for j in range(x2_size)])   
    labels = np.array([(x2[j][-1] - x1[i][-1]) if reversed else (x1[i][-1] - x2[j][-1]) 
                       for i in range(x1_size) for j in range(x2_size)])
    return features, labels

In [4]:
def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(((y_pred - y_true) ** 2).mean())

In [5]:
def find_yn(z, y_sum, N):
    return np.array([(y_sum + sum(z[i: i + N])) / N for i in range(0, len(z), N)])

In [6]:
def find_yn2(z1, z2, y_sum, N):
    return np.array([(y_sum + (sum(z1[i: i + N]) - sum(z2[i: i + N])) / 2) / N for i in range(0, len(z1), N)])

In [7]:
# train_data = pd.read_csv("./datasets/train_energy_eff_Y1.csv").drop(['Unnamed: 0'],axis=1).to_numpy()
# test_data = pd.read_csv("./datasets/test_energy_eff_Y1.csv").drop(['Unnamed: 0'],axis=1).to_numpy()

In [8]:
# train_data = np.loadtxt("./datasets/Nzone1_train.txt",delimiter=',')
# test_data = np.loadtxt("./datasets/Nzone1_test.txt",delimiter=',')

data = pd.read_csv("./datasets/ENB2012_data.csv").to_numpy()
X_train, X_test, y_train, y_test = train_test_split(data[:,:-1],data[:,-1:],test_size=0.2, random_state=42)
train_data = np.concatenate((X_train, y_train), axis=1)
test_data = np.concatenate((X_test, y_test), axis=1)

In [9]:
y_sum = sum([a[-1] for a in train_data])
N = len(train_data)
y_target_test = np.array([a[-1] for a in test_data])
y_target_train = np.array([a[-1] for a in train_data])

In [10]:
train_features, train_labels = preprocess_features(train_data, train_data)
train_features2, train_labels2 = preprocess_features(train_data, train_data, reversed=True)
test_features, test_labels = preprocess_features(test_data, train_data)
test_features2, test_labels2 = preprocess_features(test_data, train_data, reversed=True)

In [11]:
scaler = MaxAbsScaler()
scaler.fit(train_features)
train_features = scaler.transform(train_features)
train_features2 = scaler.transform(train_features2)
test_features = scaler.transform(test_features)
test_features2 = scaler.transform(test_features2)

### SVR подвоєння входів

In [12]:
from sklearn.svm import SVR
svr = SVR(kernel='rbf', gamma='scale', coef0=0.0, epsilon=0.001, max_iter=-1)

start_time = time.time()
svr.fit(train_features, train_labels)
print("--- %s seconds ---" % (time.time() - start_time))

train_pred_z1 = svr.predict(train_features)
train_pred_z2 = svr.predict(train_features2)

pred_z1 = svr.predict(test_features)
pred_z2 = svr.predict(test_features2)

yn_train = find_yn(train_pred_z1, y_sum, N)
yn_test = find_yn(pred_z1, y_sum, N)

print('Training erros func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_target_train, yn_train)))
print("RMSE: " + str(root_mean_squared_error(y_target_train, yn_train)))
print("MAE: " + str(mean_absolute_error(y_target_train, yn_train)))
print("Max error: " + str(max_error(y_target_train, yn_train)))
print("Median absolute error: " + str(median_absolute_error(y_target_train, yn_train)))
print("Mean Squared error: " + str(mean_squared_error(y_target_train, yn_train)))
print("R2: " + str(r2_score(y_target_train, yn_train)))
print()
print('Testing errors func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_target_test, yn_test)))
print("RMSE: " + str(root_mean_squared_error(y_target_test, yn_test)))
print("MAE: " + str(mean_absolute_error(y_target_test, yn_test)))
print("Max error: " + str(max_error(y_target_test, yn_test)))
print("Median absolute error: " + str(median_absolute_error(y_target_test, yn_test)))
print("Mean Squared error: " + str(mean_squared_error(y_target_test, yn_test)))
print("R2: " + str(r2_score(y_target_test, yn_test)))
print()
print("Train errors")
print(str(mean_absolute_percentage_error(y_target_train, yn_train)))
print(str(root_mean_squared_error(y_target_train, yn_train)))
print(str(mean_absolute_error(y_target_train, yn_train)))
print(str(max_error(y_target_train, yn_train)))
print(str(median_absolute_error(y_target_train, yn_train)))
print(str(mean_squared_error(y_target_train, yn_train)))
print(str(r2_score(y_target_train, yn_train)))
print()
print('Testing errors func:')
print(str(mean_absolute_percentage_error(y_target_test, yn_test)))
print(str(root_mean_squared_error(y_target_test, yn_test)))
print(str(mean_absolute_error(y_target_test, yn_test)))
print(str(max_error(y_target_test, yn_test)))
print(str(median_absolute_error(y_target_test, yn_test)))
print(str(mean_squared_error(y_target_test, yn_test)))
print(str(r2_score(y_target_test, yn_test)))

### GBR подвоєння входів

In [None]:
gbr = GradientBoostingRegressor(random_state=42, max_depth=5)


start_time = time.time()
gbr.fit(train_features, train_labels)
print("--- %s seconds ---" % (time.time() - start_time))

train_pred_z1 = gbr.predict(train_features)
train_pred_z2 = gbr.predict(train_features2)

pred_z1 = gbr.predict(test_features)
pred_z2 = gbr.predict(test_features2)

yn_train = find_yn(train_pred_z1, y_sum, N)
yn_test = find_yn(pred_z1, y_sum, N)

print('Training erros func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_target_train, yn_train)))
print("RMSE: " + str(root_mean_squared_error(y_target_train, yn_train)))
print("MAE: " + str(mean_absolute_error(y_target_train, yn_train)))
print("Max error: " + str(max_error(y_target_train, yn_train)))
print("Median absolute error: " + str(median_absolute_error(y_target_train, yn_train)))
print("Mean Squared error: " + str(mean_squared_error(y_target_train, yn_train)))
print("R2: " + str(r2_score(y_target_train, yn_train)))
print()
print('Testing errors func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_target_test, yn_test)))
print("RMSE: " + str(root_mean_squared_error(y_target_test, yn_test)))
print("MAE: " + str(mean_absolute_error(y_target_test, yn_test)))
print("Max error: " + str(max_error(y_target_test, yn_test)))
print("Median absolute error: " + str(median_absolute_error(y_target_test, yn_test)))
print("Mean Squared error: " + str(mean_squared_error(y_target_test, yn_test)))
print("R2: " + str(r2_score(y_target_test, yn_test)))
print()
print("Train errors")
print(str(mean_absolute_percentage_error(y_target_train, yn_train)))
print(str(root_mean_squared_error(y_target_train, yn_train)))
print(str(mean_absolute_error(y_target_train, yn_train)))
print(str(max_error(y_target_train, yn_train)))
print(str(median_absolute_error(y_target_train, yn_train)))
print(str(mean_squared_error(y_target_train, yn_train)))
print(str(r2_score(y_target_train, yn_train)))
print()
print('Testing errors func:')
print(str(mean_absolute_percentage_error(y_target_test, yn_test)))
print(str(root_mean_squared_error(y_target_test, yn_test)))
print(str(mean_absolute_error(y_target_test, yn_test)))
print(str(max_error(y_target_test, yn_test)))
print(str(median_absolute_error(y_target_test, yn_test)))
print(str(mean_squared_error(y_target_test, yn_test)))
print(str(r2_score(y_target_test, yn_test)))

--- 0.14599823951721191 seconds ---
Training erros func1:
MAPE: 0.008408691663873827
RMSE: 1.5022279838257993
MAE: 1.0609294687739408
Max error: 4.485709814399073
Median absolute error: 0.647344566094759
Mean Squared error: 2.2566889153893257
R2: 0.9987286546131978

Testing errors func1:
MAPE: 0.045524286240267116
RMSE: 8.32211910673297
MAE: 6.455302297364154
Max error: 20.025847429598713
Median absolute error: 5.56844893195192
Mean Squared error: 69.25766642664998
R2: 0.9523228973271768

Train errors
0.008408691663873827
1.5022279838257993
1.0609294687739408
4.485709814399073
0.647344566094759
2.2566889153893257
0.9987286546131978

Testing errors func:
0.045524286240267116
8.32211910673297
6.455302297364154
20.025847429598713
5.56844893195192
69.25766642664998
0.9523228973271768


### RF подвоєння входів

In [None]:
from sklearn.ensemble import RandomForestRegressor
rfg = RandomForestRegressor(random_state=42, max_depth=5)

start_time = time.time()
rfg.fit(train_features, train_labels)
print("--- %s seconds ---" % (time.time() - start_time))

train_pred_z1 = rfg.predict(train_features)
train_pred_z2 = rfg.predict(train_features2)

pred_z1 = rfg.predict(test_features)
pred_z2 = rfg.predict(test_features2)

yn_train = find_yn(train_pred_z1, y_sum, N)
yn_test = find_yn(pred_z1, y_sum, N)

print('Training erros func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_target_train, yn_train)))
print("RMSE: " + str(root_mean_squared_error(y_target_train, yn_train)))
print("MAE: " + str(mean_absolute_error(y_target_train, yn_train)))
print("Max error: " + str(max_error(y_target_train, yn_train)))
print("Median absolute error: " + str(median_absolute_error(y_target_train, yn_train)))
print("Mean Squared error: " + str(mean_squared_error(y_target_train, yn_train)))
print("R2: " + str(r2_score(y_target_train, yn_train)))
print()
print('Testing errors func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_target_test, yn_test)))
print("RMSE: " + str(root_mean_squared_error(y_target_test, yn_test)))
print("MAE: " + str(mean_absolute_error(y_target_test, yn_test)))
print("Max error: " + str(max_error(y_target_test, yn_test)))
print("Median absolute error: " + str(median_absolute_error(y_target_test, yn_test)))
print("Mean Squared error: " + str(mean_squared_error(y_target_test, yn_test)))
print("R2: " + str(r2_score(y_target_test, yn_test)))
print()
print("Train errors")
print(str(mean_absolute_percentage_error(y_target_train, yn_train)))
print(str(root_mean_squared_error(y_target_train, yn_train)))
print(str(mean_absolute_error(y_target_train, yn_train)))
print(str(max_error(y_target_train, yn_train)))
print(str(median_absolute_error(y_target_train, yn_train)))
print(str(mean_squared_error(y_target_train, yn_train)))
print(str(r2_score(y_target_train, yn_train)))
print()
print('Testing errors func:')
print(str(mean_absolute_percentage_error(y_target_test, yn_test)))
print(str(root_mean_squared_error(y_target_test, yn_test)))
print(str(mean_absolute_error(y_target_test, yn_test)))
print(str(max_error(y_target_test, yn_test)))
print(str(median_absolute_error(y_target_test, yn_test)))
print(str(mean_squared_error(y_target_test, yn_test)))
print(str(r2_score(y_target_test, yn_test)))

--- 0.14199542999267578 seconds ---
Training erros func1:
MAPE: 0.07910319516224487
RMSE: 12.8389407500529
MAE: 10.11913161108146
Max error: 33.65048370519992
Median absolute error: 9.55693339482395
Mean Squared error: 164.83839958336893
R2: 0.9071353887330038

Testing errors func1:
MAPE: 0.13841275435032427
RMSE: 22.023376951736687
MAE: 17.586264865108145
Max error: 40.06070345174072
Median absolute error: 14.454082177892282
Mean Squared error: 485.02913235828674
R2: 0.6661050691442569

Train errors
0.07910319516224487
12.8389407500529
10.11913161108146
33.65048370519992
9.55693339482395
164.83839958336893
0.9071353887330038

Testing errors func:
0.13841275435032427
22.023376951736687
17.586264865108145
40.06070345174072
14.454082177892282
485.02913235828674
0.6661050691442569


In [None]:
X_train, y_train = train_data[:,:-1],train_data[:,-1:]
X_test, y_test = test_data[:,:-1],test_data[:,-1:]

### SVR класичне

In [None]:
from sklearn.svm import SVR
svr = SVR(kernel='rbf', gamma='scale', coef0=0.0, epsilon=0.001, max_iter=-1)

start_time = time.time()
svr.fit(X_train, y_train)
print("--- %s seconds ---" % (time.time() - start_time))

pred_train = svr.predict(X_train)

pred_test = svr.predict(X_test)

print('Training erros func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_train, pred_train)))
print("RMSE: " + str(root_mean_squared_error(y_train, pred_train)))
print("MAE: " + str(mean_absolute_error(y_train, pred_train)))
print("Max error: " + str(max_error(y_train, pred_train)))
print("Median absolute error: " + str(median_absolute_error(y_train, pred_train)))
print("Mean Squared error: " + str(mean_squared_error(y_train, pred_train)))
print("R2: " + str(r2_score(y_train, pred_train)))
print()
print('Testing errors func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_test, pred_test)))
print("RMSE: " + str(root_mean_squared_error(y_test, pred_test)))
print("MAE: " + str(mean_absolute_error(y_test, pred_test)))
print("Max error: " + str(max_error(y_test, pred_test)))
print("Median absolute error: " + str(median_absolute_error(y_test, pred_test)))
print("Mean Squared error: " + str(mean_squared_error(y_test, pred_test)))
print("R2: " + str(r2_score(y_test, pred_test)))
print()
print("Train errors")
print(str(mean_absolute_percentage_error(y_train, pred_train)))
print(str(root_mean_squared_error(y_train, pred_train)))
print(str(mean_absolute_error(y_train, pred_train)))
print(str(max_error(y_train, pred_train)))
print(str(median_absolute_error(y_train, pred_train)))
print(str(mean_squared_error(y_train, pred_train)))
print(str(r2_score(y_train, pred_train)))
print()
print('Testing errors func:')
print(str(mean_absolute_percentage_error(y_test, pred_test)))
print(str(root_mean_squared_error(y_test, pred_test)))
print(str(mean_absolute_error(y_test, pred_test)))
print(str(max_error(y_test, pred_test)))
print(str(median_absolute_error(y_test, pred_test)))
print(str(mean_squared_error(y_test, pred_test)))
print(str(r2_score(y_test, pred_test)))

--- 0.002000093460083008 seconds ---
Training erros func1:
MAPE: 0.23520271675269883
RMSE: 42.95488357766631
MAE: 30.636904469051444
Max error: 103.23693656449555
Median absolute error: 32.151552763520044
Mean Squared error: 1351.323055495604
R2: 0.23870839220772255

Testing errors func1:
MAPE: 0.22082769123161244
RMSE: 38.636350934540495
MAE: 27.963122705568367
Max error: 51.445178913581344
Median absolute error: 27.439125415930754
Mean Squared error: 1081.7079834234773
R2: 0.25535027025038737

Train errors
0.23520271675269883
42.95488357766631
30.636904469051444
103.23693656449555
32.151552763520044
1351.323055495604
0.23870839220772255

Testing errors func:
0.22082769123161244
38.636350934540495
27.963122705568367
51.445178913581344
27.439125415930754
1081.7079834234773
0.25535027025038737


### GBR класичне

In [None]:
gbr = GradientBoostingRegressor(random_state=42, max_depth=5)

start_time = time.time()
gbr.fit(X_train, y_train)
print("--- %s seconds ---" % (time.time() - start_time))

pred_train = gbr.predict(X_train)

pred_test = gbr.predict(X_test)

print('Training erros func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_train, pred_train)))
print("RMSE: " + str(root_mean_squared_error(y_train, pred_train)))
print("MAE: " + str(mean_absolute_error(y_train, pred_train)))
print("Max error: " + str(max_error(y_train, pred_train)))
print("Median absolute error: " + str(median_absolute_error(y_train, pred_train)))
print("Mean Squared error: " + str(mean_squared_error(y_train, pred_train)))
print("R2: " + str(r2_score(y_train, pred_train)))
print()
print('Testing errors func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_test, pred_test)))
print("RMSE: " + str(root_mean_squared_error(y_test, pred_test)))
print("MAE: " + str(mean_absolute_error(y_test, pred_test)))
print("Max error: " + str(max_error(y_test, pred_test)))
print("Median absolute error: " + str(median_absolute_error(y_test, pred_test)))
print("Mean Squared error: " + str(mean_squared_error(y_test, pred_test)))
print("R2: " + str(r2_score(y_test, pred_test)))
print()
print("Train errors")
print(str(mean_absolute_percentage_error(y_train, pred_train)))
print(str(root_mean_squared_error(y_train, pred_train)))
print(str(mean_absolute_error(y_train, pred_train)))
print(str(max_error(y_train, pred_train)))
print(str(median_absolute_error(y_train, pred_train)))
print(str(mean_squared_error(y_train, pred_train)))
print(str(r2_score(y_train, pred_train)))
print()
print('Testing errors func:')
print(str(mean_absolute_percentage_error(y_test, pred_test)))
print(str(root_mean_squared_error(y_test, pred_test)))
print(str(mean_absolute_error(y_test, pred_test)))
print(str(max_error(y_test, pred_test)))
print(str(median_absolute_error(y_test, pred_test)))
print(str(mean_squared_error(y_test, pred_test)))
print(str(r2_score(y_test, pred_test)))

--- 0.05600118637084961 seconds ---
Training erros func1:
MAPE: 0.00013093770722333802
RMSE: 59.58031579296425
MAE: 0.01543701565173059
Max error: 0.05467684771696213
Median absolute error: 0.00973646418098184
Mean Squared error: 0.0004254467293656252
R2: 0.999999760317103

Testing errors func1:
MAPE: 0.05386436820033173
RMSE: 61.05922951014874
MAE: 8.497851740513948
Max error: 51.800437843917706
Median absolute error: 3.280222676013125
Mean Squared error: 292.78396104641945
R2: 0.7984469923405528

Train errors
0.00013093770722333802
59.58031579296425
0.01543701565173059
0.05467684771696213
0.00973646418098184
0.0004254467293656252
0.999999760317103

Testing errors func:
0.05386436820033173
61.05922951014874
8.497851740513948
51.800437843917706
3.280222676013125
292.78396104641945
0.7984469923405528


### RF класичне

In [None]:
from sklearn.ensemble import RandomForestRegressor
rfg = RandomForestRegressor(random_state=42, max_depth=5)

start_time = time.time()
rfg.fit(X_train, y_train)
print("--- %s seconds ---" % (time.time() - start_time))

pred_train = rfg.predict(X_train)

pred_test = rfg.predict(X_test)

print('Training erros func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_train, pred_train)))
print("RMSE: " + str(root_mean_squared_error(y_train, pred_train)))
print("MAE: " + str(mean_absolute_error(y_train, pred_train)))
print("Max error: " + str(max_error(y_train, pred_train)))
print("Median absolute error: " + str(median_absolute_error(y_train, pred_train)))
print("Mean Squared error: " + str(mean_squared_error(y_train, pred_train)))
print("R2: " + str(r2_score(y_train, pred_train)))
print()
print('Testing errors func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_test, pred_test)))
print("RMSE: " + str(root_mean_squared_error(y_test, pred_test)))
print("MAE: " + str(mean_absolute_error(y_test, pred_test)))
print("Max error: " + str(max_error(y_test, pred_test)))
print("Median absolute error: " + str(median_absolute_error(y_test, pred_test)))
print("Mean Squared error: " + str(mean_squared_error(y_test, pred_test)))
print("R2: " + str(r2_score(y_test, pred_test)))
print()
print("Train errors")
print(str(mean_absolute_percentage_error(y_train, pred_train)))
print(str(root_mean_squared_error(y_train, pred_train)))
print(str(mean_absolute_error(y_train, pred_train)))
print(str(max_error(y_train, pred_train)))
print(str(median_absolute_error(y_train, pred_train)))
print(str(mean_squared_error(y_train, pred_train)))
print(str(r2_score(y_train, pred_train)))
print()
print('Testing errors func:')
print(str(mean_absolute_percentage_error(y_test, pred_test)))
print(str(root_mean_squared_error(y_test, pred_test)))
print(str(mean_absolute_error(y_test, pred_test)))
print(str(max_error(y_test, pred_test)))
print(str(median_absolute_error(y_test, pred_test)))
print(str(mean_squared_error(y_test, pred_test)))
print(str(r2_score(y_test, pred_test)))

--- 0.09299898147583008 seconds ---
Training erros func1:
MAPE: 0.030773385670504184
RMSE: 58.09450562538824
MAE: 4.0553678571428575
Max error: 12.385000000000005
Median absolute error: 3.2295634920635052
Mean Squared error: 29.383882588425127
R2: 0.9834460729964254

Testing errors func1:
MAPE: 0.06538037611972256
RMSE: 53.6853190307977
MAE: 8.569791269841268
Max error: 17.68983333333331
Median absolute error: 7.468476190476196
Mean Squared error: 100.38270324805363
R2: 0.9308963657561036

Train errors
0.030773385670504184
58.09450562538824
4.0553678571428575
12.385000000000005
3.2295634920635052
29.383882588425127
0.9834460729964254

Testing errors func:
0.06538037611972256
53.6853190307977
8.569791269841268
17.68983333333331
7.468476190476196
100.38270324805363
0.9308963657561036


: 