In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
from sklearn.kernel_ridge import KernelRidge
import math
import time

from sklearn.metrics import max_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import r2_score

In [2]:
def preprocess_features(x1, x2, reversed=False):
    x1_size, x2_size = len(x1), len(x2)
    features = np.array([np.concatenate((x2[j][:-1], x1[i][:-1])) if reversed else np.concatenate((x1[i][:-1], x2[j][:-1])) 
                         for i in range(x1_size) for j in range(x2_size)])   
    labels = np.array([(x2[j][-1] - x1[i][-1]) if reversed else (x1[i][-1] - x2[j][-1]) 
                       for i in range(x1_size) for j in range(x2_size)])
    return features, labels

In [3]:
# def mean_absolute_percentage_error_v2(y_true, y_pred): 
#     y_true, y_pred = np.array(y_true), np.array(y_pred)
#     return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  
# def symmetric_mean_absolute_percentage_error(y_true, y_pred):
#     y_true, y_pred = np.array(y_true), np.array(y_pred)
#     return np.sum(np.abs(y_true - y_pred)) / np.sum(y_pred + y_true)

def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(((y_pred - y_true) ** 2).mean())

# def mean_absolute_error(y_true, y_pred):
#     y_true, y_pred = np.array(y_true), np.array(y_pred)
#     return np.mean(np.abs(y_true - y_pred))


In [4]:
def find_yn(z, y_sum, N):
    return np.array([(y_sum + sum(z[i: i + N])) / N for i in range(0, len(z), N)])

In [5]:
def find_yn2(z1, z2, y_sum, N):
    return np.array([(y_sum + (sum(z1[i: i + N]) - sum(z2[i: i + N])) / 2) / N for i in range(0, len(z1), N)])

In [6]:
# train_data = pd.read_csv("./datasets/train_energy_eff_Y1.csv").drop(['Unnamed: 0'],axis=1).to_numpy()
# test_data = pd.read_csv("./datasets/test_energy_eff_Y1.csv").drop(['Unnamed: 0'],axis=1).to_numpy()

In [7]:
train_data = np.loadtxt("./datasets/BodyFat_train.txt",delimiter=',')
test_data = np.loadtxt("./datasets/BodyFat_test.txt",delimiter=',')

In [8]:
y_sum = sum([a[-1] for a in train_data])
N = len(train_data)
y_target_test = np.array([a[-1] for a in test_data])
y_target_train = np.array([a[-1] for a in train_data])

In [9]:
train_features, train_labels = preprocess_features(train_data, train_data)
train_features2, train_labels2 = preprocess_features(train_data, train_data, reversed=True)
test_features, test_labels = preprocess_features(test_data, train_data)
test_features2, test_labels2 = preprocess_features(test_data, train_data, reversed=True)

In [10]:
scaler = MaxAbsScaler()
scaler.fit(train_features)
train_features = scaler.transform(train_features)
train_features2 = scaler.transform(train_features2)
test_features = scaler.transform(test_features)
test_features2 = scaler.transform(test_features2)

In [11]:
from sklearn.svm import SVR
svr = SVR(kernel='rbf', gamma='scale', coef0=0.0, epsilon=0.001, max_iter=-1)

start_time = time.time()
svr.fit(train_features, train_labels)
print("--- %s seconds ---" % (time.time() - start_time))

train_pred_z1 = svr.predict(train_features)
train_pred_z2 = svr.predict(train_features2)

pred_z1 = svr.predict(test_features)
pred_z2 = svr.predict(test_features2)

yn_train = find_yn(train_pred_z1, y_sum, N)
yn_test = find_yn(pred_z1, y_sum, N)

print('Training erros func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_target_train, yn_train)))
print("RMSE: " + str(root_mean_squared_error(y_target_train, yn_train)))
print("MAE: " + str(mean_absolute_error(y_target_train, yn_train)))
print("Max error: " + str(max_error(y_target_train, yn_train)))
print("Median absolute error: " + str(median_absolute_error(y_target_train, yn_train)))
print("Mean Squared error: " + str(mean_squared_error(y_target_train, yn_train)))
print("R2: " + str(r2_score(y_target_train, yn_train)))
print()
print('Testing errors func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_target_test, yn_test)))
print("RMSE: " + str(root_mean_squared_error(y_target_test, yn_test)))
print("MAE: " + str(mean_absolute_error(y_target_test, yn_test)))
print("Max error: " + str(max_error(y_target_test, yn_test)))
print("Median absolute error: " + str(median_absolute_error(y_target_test, yn_test)))
print("Mean Squared error: " + str(mean_squared_error(y_target_test, yn_test)))
print("R2: " + str(r2_score(y_target_test, yn_test)))
print()
print("Train errors")
print(str(mean_absolute_percentage_error(y_target_train, yn_train)))
print(str(root_mean_squared_error(y_target_train, yn_train)))
print(str(mean_absolute_error(y_target_train, yn_train)))
print(str(max_error(y_target_train, yn_train)))
print(str(median_absolute_error(y_target_train, yn_train)))
print(str(mean_squared_error(y_target_train, yn_train)))
print(str(r2_score(y_target_train, yn_train)))
print()
print('Testing errors func:')
print(str(mean_absolute_percentage_error(y_target_test, yn_test)))
print(str(root_mean_squared_error(y_target_test, yn_test)))
print(str(mean_absolute_error(y_target_test, yn_test)))
print(str(max_error(y_target_test, yn_test)))
print(str(median_absolute_error(y_target_test, yn_test)))
print(str(mean_squared_error(y_target_test, yn_test)))
print(str(r2_score(y_target_test, yn_test)))

--- 101.97428154945374 seconds ---
Training erros func1:
MAPE: 73208537370276.45
RMSE: 3.584112658175075
MAE: 2.787269950074675
Max error: 9.447897994281202
Median absolute error: 2.2929582542261278
Mean Squared error: 12.845863546490802
R2: 0.8151762355355048

Testing errors func1:
MAPE: 0.4657605445129911
RMSE: 4.753931571471416
MAE: 4.09769466088785
Max error: 10.374430319435408
Median absolute error: 4.069808299756339
Mean Squared error: 22.599865386232683
R2: 0.6779009972544756

Train errors
73208537370276.45
3.584112658175075
2.787269950074675
9.447897994281202
2.2929582542261278
12.845863546490802
0.8151762355355048

Testing errors func:
0.4657605445129911
4.753931571471416
4.09769466088785
10.374430319435408
4.069808299756339
22.599865386232683
0.6779009972544756


In [12]:
X_train, y_train = train_data[:,:-1],train_data[:,-1:]
X_test, y_test = test_data[:,:-1],test_data[:,-1:]

In [13]:
from sklearn.svm import SVR
svr = SVR(kernel='rbf', gamma='scale', coef0=0.0, epsilon=0.001, max_iter=-1)

start_time = time.time()
svr.fit(X_train, y_train)
print("--- %s seconds ---" % (time.time() - start_time))

pred_train = svr.predict(X_train)

pred_test = svr.predict(X_test)

print('Training erros func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_train, pred_train)))
print("RMSE: " + str(root_mean_squared_error(y_train, pred_train)))
print("MAE: " + str(mean_absolute_error(y_train, pred_train)))
print("Max error: " + str(max_error(y_train, pred_train)))
print("Median absolute error: " + str(median_absolute_error(y_train, pred_train)))
print("Mean Squared error: " + str(mean_squared_error(y_train, pred_train)))
print("R2: " + str(r2_score(y_train, pred_train)))
print()
print('Testing errors func1:')
print("MAPE: " + str(mean_absolute_percentage_error(y_test, pred_test)))
print("RMSE: " + str(root_mean_squared_error(y_test, pred_test)))
print("MAE: " + str(mean_absolute_error(y_test, pred_test)))
print("Max error: " + str(max_error(y_test, pred_test)))
print("Median absolute error: " + str(median_absolute_error(y_test, pred_test)))
print("Mean Squared error: " + str(mean_squared_error(y_test, pred_test)))
print("R2: " + str(r2_score(y_test, pred_test)))
print()
print("Train errors")
print(str(mean_absolute_percentage_error(y_train, pred_train)))
print(str(root_mean_squared_error(y_train, pred_train)))
print(str(mean_absolute_error(y_train, pred_train)))
print(str(max_error(y_train, pred_train)))
print(str(median_absolute_error(y_train, pred_train)))
print(str(mean_squared_error(y_train, pred_train)))
print(str(r2_score(y_train, pred_train)))
print()
print('Testing errors func:')
print(str(mean_absolute_percentage_error(y_test, pred_test)))
print(str(root_mean_squared_error(y_test, pred_test)))
print(str(mean_absolute_error(y_test, pred_test)))
print(str(max_error(y_test, pred_test)))
print(str(median_absolute_error(y_test, pred_test)))
print(str(mean_squared_error(y_test, pred_test)))
print(str(r2_score(y_test, pred_test)))

--- 0.02000260353088379 seconds ---
Training erros func1:
MAPE: 271962226009243.03
RMSE: 9.029176760928937
MAE: 5.202278713502715
Max error: 21.520646430069927
Median absolute error: 4.747072020659871
Mean Squared error: 40.29306402902694
R2: 0.4202712998856287

Testing errors func1:
MAPE: 0.7658565735990337
RMSE: 9.022797305874603
MAE: 5.28410149029656
Max error: 14.709468660238038
Median absolute error: 5.093426785972628
Mean Squared error: 41.998015506751365
R2: 0.40143365100500406

Train errors
271962226009243.03
9.029176760928937
5.202278713502715
21.520646430069927
4.747072020659871
40.29306402902694
0.4202712998856287

Testing errors func:
0.7658565735990337
9.022797305874603
5.28410149029656
14.709468660238038
5.093426785972628
41.998015506751365
0.40143365100500406


  y = column_or_1d(y, warn=True)
