In [1]:
import numpy as np
import pandas as pd
import sklearn
from matplotlib import pyplot as plt

In [2]:
from sklearn.svm import LinearSVC, SVR, SVC
# For Storing Models
import pickle
# For timing
import time
from datetime import timedelta
from sklearn.metrics import mean_squared_error

In [3]:
# SMAPE
def symmetric_mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return 100/len(y_true) * np.sum(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)))

In [4]:
df = pd.read_csv("../train1.csv")
df_test = pd.read_csv("../test1.csv")

In [5]:
x_train = df[['pca_component 0', 'pca_component 1', 'pca_component 2', 'pca_component 3', 'pca_component 4', "pca_component 5", "pca_component 6", "pca_component 7", "pca_component 8", "pca_component 9", "pca_component 10"]]
y_train = df['price']
x_test = df_test[['pca_component 0', 'pca_component 1', 'pca_component 2', 'pca_component 3', 'pca_component 4', "pca_component 5", "pca_component 6", "pca_component 7", "pca_component 8", "pca_component 9", "pca_component 10"]]
y_test = df_test['price']

In [6]:
#Min Max Scaling
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
x_train_svr = scaler.fit_transform(x_train)
x_test_svr = scaler.transform(x_test)
y_train_svr = y_train.ravel()
y_test_svr = y_test.ravel()

In [7]:
#Train the SVRs with different kernels
clf = [SVR(kernel='linear'),
       SVR(kernel='rbf'),
       SVR(kernel='poly')]
for i in range(3):
    print("Training Started for 1 model")
    start_time = time.monotonic()
    clf[i].fit(x_train_svr, y_train)
    end_time = time.monotonic()
    print("Training Done for 1 model")
    print("Time Taken: ",timedelta(seconds=end_time - start_time) )
# save the model
model = clf
filename = '../SVM/svr_models.sav'
pickle.dump(model, open(filename, 'wb'))

Training Started for 1 model
Training Done for 1 model
Time Taken:  0:00:21.259041
Training Started for 1 model
Training Done for 1 model
Time Taken:  0:00:32.608808
Training Started for 1 model
Training Done for 1 model
Time Taken:  0:00:24.382730


In [9]:
for i in range(3):
    # Use the model to predict values
    y_pred_svr = clf[i].predict(x_train_svr)

    # Calculate the Mean Squared Error using the mean_squared_error function.
    print("Training Data")
    print("R^2 value using score fn: %.3f" % clf[i].score(x_train_svr,y_train_svr))
    print("Mean Squared Log Error : %0.3f" % mean_squared_error(y_train_svr,y_pred_svr))
    print("Root Mean Squared Error : %0.3f" % mean_squared_error(y_train_svr,y_pred_svr, squared=False))
    print("SMAPE : %0.3f " % symmetric_mean_absolute_percentage_error(np.exp(y_train_svr),np.exp(y_pred_svr)))

Training Data
R^2 value using score fn: 0.427
Mean Squared Log Error : 1662.667
Root Mean Squared Error : 40.776
SMAPE : 193.587 
Training Data
R^2 value using score fn: 0.453
Mean Squared Log Error : 1586.478
Root Mean Squared Error : 39.831
SMAPE : 193.017 
Training Data
R^2 value using score fn: 0.450
Mean Squared Log Error : 1594.346
Root Mean Squared Error : 39.929
SMAPE : 192.699 


In [10]:
for i in range(3):
    # Use the model to predict values
    y_pred_svr = clf[i].predict(x_test_svr)

    # Calculate the Mean Squared Error using the mean_squared_error function.
    print("Test Data")
    print("R^2 value using score fn: %.3f" % clf[i].score(x_test_svr,y_test_svr))
    print("Mean Squared Log Error : %0.3f" % mean_squared_error(y_test_svr,y_pred_svr))
    print("Root Mean Squared Error : %0.3f" % mean_squared_error(y_test_svr,y_pred_svr, squared=False))
    print("SMAPE : %0.3f " % symmetric_mean_absolute_percentage_error(np.exp(y_test_svr),np.exp(y_pred_svr)))


Test Data
R^2 value using score fn: 0.440
Mean Squared Log Error : 1609.601
Root Mean Squared Error : 40.120
SMAPE : 193.670 
Test Data
R^2 value using score fn: 0.466
Mean Squared Log Error : 1535.585
Root Mean Squared Error : 39.187
SMAPE : 192.794 
Test Data
R^2 value using score fn: 0.459
Mean Squared Log Error : 1553.307
Root Mean Squared Error : 39.412
SMAPE : 192.860 
