In [1]:
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


data = pd.read_csv('../Data/imputed_data_7.csv')
X = data.drop(columns=['Price']).values
y = data['Price'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)


param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': [0.1, 1, 10],
}

svr = SVR()
grid_search = GridSearchCV(estimator=svr, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', verbose=1)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_

svr_best = SVR(**best_params)

svr_best.fit(X_train, y_train)
y_pred = svr_best.predict(X_test)

mse = mean_squared_error(y_test, y_pred)

rmse = mse ** 0.5

r_squared = r2_score(y_test, y_pred)

mae = mean_absolute_error(y_test, y_pred)

print("Best hyperparameters:", best_params)

Fitting 5 folds for each of 24 candidates, totalling 120 fits
Best hyperparameters: {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'}


In [2]:
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

data = pd.read_csv('../Data/imputed_data_7.csv')
X = data.drop(columns=['Price']).values
y = data['Price'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)

# We are going to run train the SVR model with the best hyperparameters we found above.
best_params = {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'}
svr_best = SVR(**best_params)

# Cross validation will be used to evaluate the performance of the model
cv_scores = cross_val_score(svr_best, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
svr_best.fit(X_train, y_train)
y_pred = svr_best.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
r_squared = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Best hyperparameters:", best_params)
print(f"Cross-validation scores (MSE): {-cv_scores}")
print(f"Mean cross-validation score (MSE): {(-cv_scores).mean()}")
print(f"Test set RMSE: {rmse}")
print(f"Test set R-squared: {r_squared}")
print(f"Test set MAE: {mae}")

Best hyperparameters: {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'}
Cross-validation scores (MSE): [4481.57925507 4489.17537518 3172.64229951 3499.50540288 4286.31921088]
Mean cross-validation score (MSE): 3985.844308704858
Test set RMSE: 66.00592947371612
Test set R-squared: 0.5872200797266235
Test set MAE: 50.6782792149458
