# Gradient Boosting Regressor

## 1. Importación de librerías, establecimiento de directorio de trabajo y carga de datos

In [1]:
# librerias principales para uso y visualización de datos
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import warnings  
warnings.filterwarnings('ignore')

# dividir datos para modelo
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# algoritmo
from sklearn.ensemble import GradientBoostingRegressor

# metricas de rendimiento
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedKFold

# guardar el modelo
import pickle

os.chdir("C:/Users/aleja/OneDrive - Universidad Politécnica de Madrid/Universidad/Asignaturas/Cuarto/Trabajo de Fin de Grado")

In [5]:
accesos = pd.read_csv("Resultados/Accesos_Calidad_Vida.csv", index_col="Unnamed: 0")
accesos.head()

Unnamed: 0,latitud,longitud,edad,calidad_vida
0,41.621468,2.068474,11,52.885748
1,41.60027,2.085002,12,74.793875
2,41.616524,2.089927,12,79.858657
3,41.61882,2.08948,11,79.858657
4,41.618908,2.089475,12,79.858657


## 2. Construcción de la matriz de características (X) y el vector variable dependiente (y)

In [6]:
X = accesos.iloc[:, :-1].values
y = accesos.iloc[:, -1].values

## 3. División del dataset en set de entrenamiento y de test

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=500)

## 4. Feature scaling of X (rango: -3 a 3)

In [8]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## 5. Construcción y entrenamiento del modelo de regresión sobre el set de entrenamiento

In [9]:
regressor = GradientBoostingRegressor(n_estimators=700, max_depth=3)
regressor.fit(X_train, y_train)

GradientBoostingRegressor(n_estimators=700)

## 6. Predicción de resultados del set de test

In [10]:
y_pred = regressor.predict(X_test)

## 7.  Métricas de rendimiento
Comparamos el set de test con el predicho

In [11]:
# R2score
print('R2score:', r2_score(y_test, y_pred))

# Mean Absolute Error
print('MAE: \t', mean_absolute_error(y_test, y_pred))

# Mean Squared Error
print('MSE: \t', mean_squared_error(y_test, y_pred))

# Root Mean Squared Error
print("RMSE: ", mean_squared_error(y_test, y_pred, squared=False))

R2score: 0.991048465774164
MAE: 	 0.8749122891200584
MSE: 	 3.6110513439049194
RMSE:  1.9002766493079157


### 7.1. Para ganar más intuición

In [9]:
np.set_printoptions(precision=2) # redondea a dos decimales

print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[29.77 29.09]
 [71.4  74.79]
 [74.75 79.86]
 ...
 [80.   79.86]
 [80.   79.86]
 [80.   79.86]]


## 8. Probar otro modelo

In [21]:
regressor = GradientBoostingRegressor(n_estimators=50, max_depth=3, learning_rate=1, criterion='mse')
regressor.fit(X_train, y_train)

GradientBoostingRegressor(criterion='mse', learning_rate=1, n_estimators=50)

In [18]:
y_pred = regressor.predict(X_test)

In [19]:
# R2score
print('R2score:', r2_score(y_test, y_pred))

# Mean Absolute Error
print('MAE: \t', mean_absolute_error(y_test, y_pred))

# Mean Squared Error
print('MSE: \t', mean_squared_error(y_test, y_pred))

# Root Mean Squared Error
print("RMSE: ", mean_squared_error(y_test, y_pred, squared=False))

R2score: 0.9877629250182701
MAE: 	 0.9927493983473472
MSE: 	 4.936439379375084
RMSE:  2.221809933224506


## 9. Guardar el mejor modelo

In [None]:
with open('Modelos/Gradient_Boosting_Regressor.pkl','wb') as f:
    pickle.dump(regressor, f)

## 10. Ejemplo de predicción

In [22]:
print(regressor.predict(sc.transform([['425060.955125', '4.606138e+06', '22']]))) 

[9.20459904]
