# Support Vector Regressor

## 1. Importación de librerías, establecimiento de directorio de trabajo y carga de datos

In [13]:
# librerías básicas 
import os
import pandas as pd  
import numpy as np  
import warnings  
warnings.filterwarnings('ignore')
  
# para la construcción del modelo 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# algoritmo
from sklearn.svm import SVR
from sklearn.svm import NuSVR
from sklearn.svm import LinearSVR

# para obtener métricas
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedKFold

# para guardar el modelo
import pickle
  
# para visualización
import seaborn as sns  
import matplotlib.pyplot as plt  
from mpl_toolkits.mplot3d import Axes3D  
%matplotlib inline  

os.chdir("C:/Users/aleja/OneDrive - Universidad Politécnica de Madrid/Universidad/Asignaturas/Cuarto/Trabajo de Fin de Grado")

In [2]:
accesos = pd.read_csv("Resultados/Accesos/Accesos_Calidad_Vida.csv", index_col="Unnamed: 0")
accesos.head()

Unnamed: 0,latitud,longitud,edad,calidad_vida
0,41.621468,2.068474,11,52.885748
1,41.60027,2.085002,12,74.793875
2,41.616524,2.089927,12,79.858657
3,41.61882,2.08948,11,79.858657
4,41.618908,2.089475,12,79.858657


## 2. Construcción de la matriz de características (X) y el vector variable dependiente (y)

In [3]:
X = accesos.iloc[:, :-1].values
y = accesos.iloc[:, -1].values

## 3. División del dataset en set de entrenamiento y de test

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

## 4. Feature scaling of X (rango: -3 a 3)

In [5]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## 5. Construcción y entrenamiento del modelo de regresión sobre el set de entrenamiento

In [6]:
# se crean 4 modelos por cada kernel del algoritmo y 2 más por cada tipo de SVR
svr_rbf = SVR(kernel="rbf", C=100, gamma=0.1, epsilon=0.1)
svr_lin = SVR(kernel="linear", C=100, gamma="auto")
svr_poly = SVR(kernel="poly", C=100, gamma="auto", degree=3, epsilon=0.1, coef0=1)
svr_sig = SVR(kernel="sigmoid", C=100, gamma="auto")
svr_linear = LinearSVR(C=100)
svr_nu = NuSVR(kernel="rbf", C=100)

# se entrenan los modelos con el dataset de entrenamiento
svr_rbf.fit(X_train, y_train) 
svr_lin.fit(X_train, y_train) 
svr_poly.fit(X_train, y_train) 
svr_sig.fit(X_train, y_train) 
svr_linear.fit(X_train, y_train) 
svr_nu.fit(X_train, y_train) 

## 6. Predicción de resultados del set de test

In [7]:
y_pred_rbf = svr_rbf.predict(X_test)
y_pred_lin = svr_lin.predict(X_test)
y_pred_poly = svr_poly.predict(X_test)
y_pred_sig = svr_poly.predict(X_test)
y_pred_linear = svr_linear.predict(X_test)
y_pred_nu = svr_nu.predict(X_test)

## 7.  Métricas de rendimiento

In [10]:
#Methods
print('\t\tRBF\t\tLinear\t\tPolynomial\t\tSigmoid\t\tLinearSVR\t\tNuSVR')

# R2score
print('R2score:', 
      r2_score(y_test, y_pred_rbf), 
      r2_score(y_test, y_pred_lin), 
      r2_score(y_test, y_pred_poly), 
      r2_score(y_test, y_pred_sig), 
      r2_score(y_test, y_pred_linear), 
      r2_score(y_test, y_pred_nu))

# Mean Absolute Error
print('MAE: \t', 
      mean_absolute_error(y_test, y_pred_rbf), 
      mean_absolute_error(y_test, y_pred_lin), 
      mean_absolute_error(y_test, y_pred_poly), 
      mean_absolute_error(y_test, y_pred_sig), 
      mean_absolute_error(y_test, y_pred_linear), 
      mean_absolute_error(y_test, y_pred_nu))

# Mean Squared Error
print('MSE: \t', 
      mean_squared_error(y_test, y_pred_rbf), 
      mean_squared_error(y_test, y_pred_lin), 
      mean_squared_error(y_test, y_pred_poly), 
      mean_squared_error(y_test, y_pred_sig), 
      mean_squared_error(y_test, y_pred_linear), 
      mean_squared_error(y_test, y_pred_nu))

# Root Mean Squared Error
print("RMSE: \t", 
      mean_squared_error(y_test, y_pred_rbf, squared=False), 
      mean_squared_error(y_test, y_pred_lin, squared=False), 
      mean_squared_error(y_test, y_pred_poly, squared=False), 
      mean_squared_error(y_test, y_pred_sig, squared=False), 
      mean_squared_error(y_test, y_pred_linear, squared=False), 
      mean_squared_error(y_test, y_pred_nu, squared=False))

		RBF		Linear		Polynomial		Sigmoid		LinearSVR		NuSVR
R2score: 0.962863703800964 0.6552149149196811 0.8954363426809763 0.8954363426809763 0.6572807693883878 0.9833135243485418
MAE: 	 1.9841965806349153 8.069982837760053 3.541296007357603 3.541296007357603 8.073438226662311 1.2410708126012129
MSE: 	 14.94645859615811 138.76763506802277 42.08433620858022 42.08433620858022 137.93617874516482 6.715901771776427
RMSE: 	 3.8660650015433147 11.779967532553847 6.487244115075385 6.487244115075385 11.74462339733228 2.5915056958796034


### 7.1. Para ganar más intuición

In [11]:
np.set_printoptions(precision=2) # redondea a dos decimales

print(np.concatenate((y_pred_nu.reshape(len(y_pred_nu), 1), y_test.reshape(len(y_test), 1)), 1))

[[77.4  79.86]
 [80.14 79.86]
 [79.69 79.86]
 ...
 [80.06 79.86]
 [79.67 79.86]
 [80.04 79.86]]


### 7.2 Más métricas de rendimiento

In [None]:
accuracies = cross_val_score(estimator = svr_nu, X = X_train, y = y_train, cv = 10)

print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

### 7.3 Grid Search

In [None]:
parameters = {
    'C': [1, 10, 100],
    'epsilon': [0.5, 1, 1.5]
}

cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

grid_search = GridSearchCV(estimator = svr_nu,
                           param_grid = parameters,
                           scoring = 'r2',
                           cv = cv,
                           n_jobs = -1)

grid_search.fit(X_train, y_train)

best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_

print("Best r2: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

## 8. Guardar el mejor modelo

In [None]:
with open('Modelos/SVR/SVM_rbf.pkl','wb') as f:
    pickle.dump(svr_rbf,f)
    
with open('Modelos/SVR/SVM_lin.pkl','wb') as f:
    pickle.dump(svr_lin,f)
    
with open('Modelos/SVR/SVM_poly.pkl','wb') as f:
    pickle.dump(svr_poly,f)
    
with open('Modelos/SVR/SVM_sig.pkl','wb') as f:
    pickle.dump(svr_sig,f)
    
with open('Modelos/SVR/SVM_linear.pkl','wb') as f:
    pickle.dump(svr_linear,f)
    
with open('Modelos/SVR/NUSVM.pkl','wb') as f:
    pickle.dump(svr_nu,f)

## 9. Ejemplo de predicción

In [None]:
print(regressor.predict(sc.transform([['425060.955125', '4.606138e+06', '22']]))) 