In [1]:
from sklearn.svm import SVR
import pandas as pd
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV

In [2]:
param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'gamma': [0.001, 0.01, 0.1, 1, 10, 100],
    'epsilon': [0.01, 0.1, 0.5, 1.0, 2.0]
}

# Predict the World Bank Dataset without PCA

In [None]:
X_train = pd.read_csv('Datasets/World Bank Transformed/X_train without PCA.csv').values
y_train = pd.read_csv('Datasets/World Bank Transformed/y_train.csv').values
X_test = pd.read_csv('Datasets/World Bank Transformed/X_test without PCA.csv').values
y_test = pd.read_csv('Datasets/World Bank Transformed/y_test.csv').values

ridge_model = SVR()
grid_search = GridSearchCV(ridge_model, param_grid, cv=5, scoring='r2')
grid_search.fit(X_train, y_train.ravel())

In [None]:
print("Best parameters:", grid_search.best_params_)

In [None]:
y_pred = grid_search.best_estimator_.predict(X_test)
WB_MAE = metrics.mean_absolute_error(y_test, y_pred)
WB_MSE = metrics.mean_squared_error(y_test, y_pred)
print('Mean Absolute Error:', WB_MAE)
print('\nMean Squared Error:', WB_MSE) 

WB_R2 = metrics.r2_score(y_test, y_pred)
print('\nR^2 Score:', WB_R2, '\n')

# Predict the World Bank Dataset with PCA

In [None]:
X_train = pd.read_csv('Datasets/World Bank Transformed/X_train with PCA.csv').values
y_train = pd.read_csv('Datasets/World Bank Transformed/y_train.csv').values
X_test = pd.read_csv('Datasets/World Bank Transformed/X_test with PCA.csv').values
y_test = pd.read_csv('Datasets/World Bank Transformed/y_test.csv').values

ridge_model = SVR()
grid_search = GridSearchCV(ridge_model, param_grid, cv=5, scoring='r2')
grid_search.fit(X_train, y_train.ravel())

In [None]:
print("Best parameters:", grid_search.best_params_)

In [None]:
y_pred = grid_search.best_estimator_.predict(X_test)
WB_PCA_MAE = metrics.mean_absolute_error(y_test, y_pred)
WB_PCA_MSE = metrics.mean_squared_error(y_test, y_pred)
print('Mean Absolute Error:', WB_PCA_MAE)
print('\nMean Squared Error:', WB_PCA_MSE) 

WB_PCA_R2 = metrics.r2_score(y_test, y_pred)
print('\nR^2 Score:', WB_PCA_R2, '\n')

# Predict the Kaggle Dataset without PCA

In [None]:
X_train = pd.read_csv('Datasets/Kaggle Transformed/X_train without PCA.csv').values
y_train = pd.read_csv('Datasets/Kaggle Transformed/y_train.csv').values
X_test = pd.read_csv('Datasets/Kaggle Transformed/X_test without PCA.csv').values
y_test = pd.read_csv('Datasets/Kaggle Transformed/y_test.csv').values

ridge_model = SVR()
grid_search = GridSearchCV(ridge_model, param_grid, cv=5, scoring='r2')
grid_search.fit(X_train, y_train.ravel())

In [None]:
print("Best parameters:", grid_search.best_params_)

In [None]:
y_pred = grid_search.best_estimator_.predict(X_test)
KG_MAE = metrics.mean_absolute_error(y_test, y_pred)
KG_MSE = metrics.mean_squared_error(y_test, y_pred)
print('Mean Absolute Error:', KG_MAE)
print('\nMean Squared Error:', KG_MSE) 

KG_R2 = metrics.r2_score(y_test, y_pred)
print('\nR^2 Score:', KG_R2, '\n')

# Predict the Kaggle Dataset with PCA

In [None]:
X_train = pd.read_csv('Datasets/Kaggle Transformed/X_train with PCA.csv').values
y_train = pd.read_csv('Datasets/Kaggle Transformed/y_train.csv').values
X_test = pd.read_csv('Datasets/Kaggle Transformed/X_test with PCA.csv').values
y_test = pd.read_csv('Datasets/Kaggle Transformed/y_test.csv').values

ridge_model = SVR()
grid_search = GridSearchCV(ridge_model, param_grid, cv=5, scoring='r2')
grid_search.fit(X_train, y_train.ravel())

In [None]:
print("Best parameters:", grid_search.best_params_)

In [None]:
y_pred = grid_search.best_estimator_.predict(X_test)
KG_PCA_MAE = metrics.mean_absolute_error(y_test, y_pred)
KG_PCA_MSE = metrics.mean_squared_error(y_test, y_pred)
print('Mean Absolute Error:', KG_PCA_MAE)
print('\nMean Squared Error:', KG_PCA_MSE) 

KG_PCA_R2 = metrics.r2_score(y_test, y_pred)
print('\nR^2 Score:', KG_PCA_R2, '\n')

In [None]:
data = {
    'World Bank without PCA': [WB_MSE, WB_MAE, WB_R2],
    'World Bank with PCA': [WB_PCA_MSE, WB_PCA_MAE, WB_PCA_R2],
    'Kaggle without PCA': [KG_MSE, KG_MAE, KG_R2],
    'Kaggle with PCA': [KG_PCA_MSE, KG_PCA_MAE, KG_PCA_R2]
}

index_labels = ['Mean Squared Error', 'Mean Absolute Error', 'R^2 Score']

df = pd.DataFrame(data, index=index_labels)

df