# Lectura de los datos limpios

In [13]:
# Importar librerías
import pandas
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures

In [14]:
data = pandas.read_csv("./IMDb_Limpio.csv")

In [15]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6616 entries, 0 to 6615
Columns: 928 entries, Unnamed: 0 to dia_semana_6
dtypes: bool(486), float64(8), int64(425), object(9)
memory usage: 25.4+ MB


In [16]:
data.head()

Unnamed: 0.1,Unnamed: 0,imdb_title_id,title,original_title,year,date_published,genre,duration,country,language,...,mes_10,mes_11,mes_12,dia_semana_0,dia_semana_1,dia_semana_2,dia_semana_3,dia_semana_4,dia_semana_5,dia_semana_6
0,506,tt0017136,Metropolis,Metropolis,0,1928-10-01,"Drama, Sci-Fi",153,Germany,German,...,1,0,0,1,0,0,0,0,0,0
1,1048,tt0021749,Luci della città,City Lights,1,1931-04-02,"Comedy, Drama, Romance",87,USA,English,...,0,0,0,0,0,0,1,0,0,0
2,2454,tt0027977,Tempi moderni,Modern Times,2,1937-03-12,"Comedy, Drama, Family",87,USA,English,...,0,0,0,0,0,0,0,1,0,0
3,2795,tt0029453,Il bandito della Casbah,Pépé le Moko,3,1937-10-22,"Crime, Drama, Romance",94,France,"French, Arabic",...,1,0,0,0,0,0,0,1,0,0
4,2827,tt0029583,Biancaneve e i sette nani,Snow White and the Seven Dwarfs,3,1938-11-30,"Animation, Family, Fantasy",83,USA,English,...,0,1,0,0,0,1,0,0,0,0


# Variables que se usan para el split de los datos

In [17]:
# Limpieza de Género. Hace dummies para cada género distinto. 
generos = []
for j in data['genre'].dropna().str.rsplit(', '):
    for i in j:
        if i not in generos:
            generos.append(i)

generos_names = ["{}{}".format(i,"_Genre") for i in generos]

lenguaje = []
for j in data['language'].dropna().str.rsplit(', '):
    for i in j:
        if i not in lenguaje:
            lenguaje.append(i)

lenguaje_names = ["{}{}".format(i,"_Language") for i in lenguaje]

pais = []
for j in data['country'].dropna().str.rsplit(', '):
    for i in j:
        if i not in pais:
            pais.append(i)

pais_names = ["{}{}".format(i,"_Country") for i in pais]

In [18]:
director_list = [s for s in list(data.columns) if "director" in s]
writer_list = [s for s in list(data.columns) if "writer" in s]
prodcomp_list = [s for s in list(data.columns) if "production" in s]
mes_list = [s for s in list(data.columns) if "mes" in s]
dia_semana_list = [s for s in list(data.columns) if "dia_semana" in s]

# Separacion en training, test y validación 

In [19]:
# Separa la base de datos, usa la variable "quality" como variable objetivo, y
# todas las demás variables como variables explicativas
X = data[["duration", "avg_vote", "votes", \
          "budget","metascore", "reviews_from_users",  \
          "reviews_from_critics"] + mes_list + dia_semana_list].to_numpy()
X2 = data[generos_names + lenguaje_names + pais_names \
         + director_list + writer_list + prodcomp_list].to_numpy()
Y = data["gross_income"].to_numpy()

print(X.shape)
print(Y.shape)

(6616, 29)
(6616,)


In [20]:
X = PolynomialFeatures(2, interaction_only=True, include_bias=False).fit_transform(X)
X = np.concatenate((X, X2), axis=1)
print(X.shape)
print(Y.shape)

(6616, 1128)
(6616,)


In [21]:
# Separamos en training, validación y testing
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=42)

print(f"{len(x_train)} training samples")
print(f"{len(x_val)} validation samples")
print(f"{len(x_test)} test samples")

4762 training samples
530 validation samples
1324 test samples


# Setup para modelar

In [22]:
# Metricas
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
import time

# Modelos
from sklearn.neighbors import KNeighborsRegressor # kNN
from sklearn.tree import DecisionTreeRegressor # Arbol de Decisiones
from sklearn.svm import SVR # Support Vector Regression
from sklearn.linear_model import Lasso # Regresion Regularizada con Lasso
from sklearn.ensemble import RandomForestRegressor # Random Forest
from sklearn.neural_network import MLPRegressor # Multi Layer perceptron
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.svm import LinearSVR # Linear Support Vector Regression

# Para poder hacer búsqueda de línea 
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV

In [23]:
# Set print options for decimal values
np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})

# K-vecinos

In [24]:
# Grid search over kNN regressor
parameters = {'n_neighbors': range(1, 20), 'weights': ['uniform', 'distance'],
              'p': range(1, 2), 'algorithm': ['auto', 'ball_tree']}

gs_model = GridSearchCV(KNeighborsRegressor(), parameters, n_jobs=-1, scoring="r2")
start = time.time()
gs_model.fit(x_train, y_train)
stop = time.time()

print(f"Training time: {stop - start}s")
print(f"Best score: {gs_model.best_score_:.3f}")
print(f"Best params: {gs_model.best_params_}")

Training time: 362.5565450191498s
Best score: 0.678
Best params: {'algorithm': 'auto', 'n_neighbors': 19, 'p': 1, 'weights': 'distance'}


In [26]:
# Predict on validation data, and print score
y_val_hat = gs_model.best_estimator_.predict(x_val)

print(f"MSE on the training set: {mean_squared_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MSE on the validation set: {mean_squared_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MSE on the test set: {mean_squared_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAE on the training set: {mean_absolute_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAE on the validation set: {mean_absolute_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAE on the test set: {mean_absolute_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAPE on the training set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAPE on the validation set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAPE on the test set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"R^2 on the training set: {r2_score(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"R^2 on the validation set: {r2_score(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"R^2 on the test set: {r2_score(gs_model.best_estimator_.predict(x_test), y_test):.3f}")

MSE on the training set: 0.000
MSE on the validation set: 9.578
MSE on the test set: 9.614

MAE on the training set: 0.000
MAE on the validation set: 2.254
MAE on the test set: 2.230

MAPE on the training set: 0.000
MAPE on the validation set: 0.075
MAPE on the test set: 0.075

R^2 on the training set: 1.000
R^2 on the validation set: 0.508
R^2 on the test set: 0.519


# Regresion regularizada con Lasso

In [27]:
# Grid search over Lasso Regularized Linear Regression
parameters = {'normalize': ['True', 'False'], 'alpha':np.logspace(-10, 0,base = np.e, num=100, endpoint = True).tolist()}

gs_model = GridSearchCV(Lasso(tol = 1e3), parameters, n_jobs=-1, scoring='r2')
start = time.time()
gs_model.fit(x_train, y_train)
stop = time.time()

print(f"Training time: {stop - start}s")
print(f"Best score: {gs_model.best_score_:.3f}")
print(f"Best params: {gs_model.best_params_}")

Training time: 215.85090708732605s
Best score: 0.523
Best params: {'alpha': 0.0009399376922881782, 'normalize': 'True'}


In [28]:
# Predict on validation data, and print score
y_val_hat = gs_model.best_estimator_.predict(x_val)

print(f"MSE on the training set: {mean_squared_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MSE on the validation set: {mean_squared_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MSE on the test set: {mean_squared_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAE on the training set: {mean_absolute_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAE on the validation set: {mean_absolute_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAE on the test set: {mean_absolute_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAPE on the training set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAPE on the validation set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAPE on the test set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"R^2 on the training set: {r2_score(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"R^2 on the validation set: {r2_score(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"R^2 on the test set: {r2_score(gs_model.best_estimator_.predict(x_test), y_test):.3f}")

MSE on the training set: 11.420
MSE on the validation set: 13.251
MSE on the test set: 13.999

MAE on the training set: 2.566
MAE on the validation set: 2.798
MAE on the test set: 2.836

MAPE on the training set: 0.083
MAPE on the validation set: 0.090
MAPE on the test set: 0.092

R^2 on the training set: 0.389
R^2 on the validation set: 0.325
R^2 on the test set: 0.266


# Arbol de decisiones

In [29]:
# Grid search over DecisionTree
parameters = {'splitter': ['best', 'random']}

gs_model = GridSearchCV(DecisionTreeRegressor(), parameters, n_jobs=-1, scoring="r2")
start = time.time()
gs_model.fit(x_train, y_train)
stop = time.time()

print(f"Training time: {stop - start}s")
print(f"Best score: {gs_model.best_score_:.3f}")
print(f"Best params: {gs_model.best_params_}")

Training time: 2.8423922061920166s
Best score: 0.502
Best params: {'splitter': 'best'}


In [30]:
# Predict on validation data, and print score
y_val_hat = gs_model.best_estimator_.predict(x_val)

print(f"MSE on the training set: {mean_squared_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MSE on the validation set: {mean_squared_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MSE on the test set: {mean_squared_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAE on the training set: {mean_absolute_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAE on the validation set: {mean_absolute_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAE on the test set: {mean_absolute_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAPE on the training set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAPE on the validation set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAPE on the test set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"R^2 on the training set: {r2_score(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"R^2 on the validation set: {r2_score(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"R^2 on the test set: {r2_score(gs_model.best_estimator_.predict(x_test), y_test):.3f}")

MSE on the training set: 0.000
MSE on the validation set: 13.975
MSE on the test set: 14.857

MAE on the training set: 0.000
MAE on the validation set: 2.622
MAE on the test set: 2.662

MAPE on the training set: 0.000
MAPE on the validation set: 0.090
MAPE on the test set: 0.093

R^2 on the training set: 1.000
R^2 on the validation set: 0.463
R^2 on the test set: 0.476


# Random Forest

In [31]:
# Grid search over Random Forest
parameters = {'n_estimators': [10,20,30,100,300]}

gs_model = GridSearchCV(RandomForestRegressor(), parameters, n_jobs=-1, scoring="r2")
start = time.time()
gs_model.fit(x_train, y_train)
stop = time.time()

print(f"Training time: {stop - start}s")
print(f"Best score: {gs_model.best_score_:.3f}")
print(f"Best params: {gs_model.best_params_}")

Training time: 138.40102791786194s
Best score: 0.747
Best params: {'n_estimators': 300}


In [32]:
# Predict on validation data, and print score
y_val_hat = gs_model.best_estimator_.predict(x_val)

print(f"MSE on the training set: {mean_squared_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MSE on the validation set: {mean_squared_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MSE on the test set: {mean_squared_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAE on the training set: {mean_absolute_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAE on the validation set: {mean_absolute_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAE on the test set: {mean_absolute_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAPE on the training set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAPE on the validation set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAPE on the test set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"R^2 on the training set: {r2_score(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"R^2 on the validation set: {r2_score(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"R^2 on the test set: {r2_score(gs_model.best_estimator_.predict(x_test), y_test):.3f}")

MSE on the training set: 0.930
MSE on the validation set: 7.267
MSE on the test set: 7.510

MAE on the training set: 0.675
MAE on the validation set: 1.895
MAE on the test set: 1.916

MAPE on the training set: 0.023
MAPE on the validation set: 0.063
MAPE on the test set: 0.065

R^2 on the training set: 0.959
R^2 on the validation set: 0.637
R^2 on the test set: 0.654


# Procesos Gausianos

In [33]:
# Grid search over Random Forest
parameters = {"n_restarts_optimizer": [0,1,2]}

gs_model = GridSearchCV(GaussianProcessRegressor(normalize_y = True), parameters, n_jobs=-1, scoring="neg_mean_squared_error")
start = time.time()
gs_model.fit(x_train, y_train)
stop = time.time()

print(f"Training time: {stop - start}s")
print(f"Best score: {gs_model.best_score_:.3f}")
print(f"Best params: {gs_model.best_params_}")

Training time: 123.96700859069824s
Best score: -26.832
Best params: {'n_restarts_optimizer': 0}


In [34]:
## Predict on validation data, and print score
y_val_hat = gs_model.best_estimator_.predict(x_val)

print(f"MSE on the training set: {mean_squared_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MSE on the validation set: {mean_squared_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MSE on the test set: {mean_squared_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAE on the training set: {mean_absolute_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAE on the validation set: {mean_absolute_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAE on the test set: {mean_absolute_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAPE on the training set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAPE on the validation set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAPE on the test set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"R^2 on the training set: {r2_score(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"R^2 on the validation set: {r2_score(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"R^2 on the test set: {r2_score(gs_model.best_estimator_.predict(x_test), y_test):.3f}")

MSE on the training set: 0.000
MSE on the validation set: 27.598
MSE on the test set: 28.666

MAE on the training set: 0.000
MAE on the validation set: 4.161
MAE on the test set: 4.193

MAPE on the training set: 0.000
MAPE on the validation set: 0.129
MAPE on the test set: 0.130

R^2 on the training set: 1.000
R^2 on the validation set: -546636968986984066861179600896.000
R^2 on the test set: 0.000


# Multi Layer Perceptron

In [35]:
# Grid search over MLP
parameters = {'alpha': [0.000001, 0.1, 0.2], "hidden_layer_sizes": [(100),(25,25)], \
              "learning_rate_init": [1e-4, 1e-6]}

gs_model = GridSearchCV(MLPRegressor(max_iter = 400), parameters, n_jobs=-1, scoring="r2")
start = time.time()
gs_model.fit(x_train, y_train)
stop = time.time()

print(f"Training time: {stop - start}s")
print(f"Best score: {gs_model.best_score_:.3f}")
print(f"Best params: {gs_model.best_params_}")

Training time: 764.9392211437225s
Best score: -538018102513548.375
Best params: {'alpha': 0.1, 'hidden_layer_sizes': (25, 25), 'learning_rate_init': 0.0001}


In [36]:
# Predict on validation data, and print score
y_val_hat = gs_model.best_estimator_.predict(x_val)

print(f"MSE on the training set: {mean_squared_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MSE on the validation set: {mean_squared_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MSE on the test set: {mean_squared_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAE on the training set: {mean_absolute_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAE on the validation set: {mean_absolute_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAE on the test set: {mean_absolute_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAPE on the training set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAPE on the validation set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAPE on the test set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"R^2 on the training set: {r2_score(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"R^2 on the validation set: {r2_score(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"R^2 on the test set: {r2_score(gs_model.best_estimator_.predict(x_test), y_test):.3f}")

MSE on the training set: 30275016226827600.000
MSE on the validation set: 41409786944206888.000
MSE on the test set: 31302426719297900.000

MAE on the training set: 73609462.669
MAE on the validation set: 73549200.876
MAE on the test set: 72631587.681

MAPE on the training set: 1.000
MAPE on the validation set: 1.000
MAPE on the test set: 1.000

R^2 on the training set: -0.002
R^2 on the validation set: -0.001
R^2 on the test set: -0.001


# Support Vector Regression

In [None]:
# Grid search over SVR
parameters = {'kernel': ['linear', 'poly', 'rbf'], 'degree': range(1, 4),
              'coef0': [0.0, 0.001, 0.01, 0.1, 1.0], 'C': [0.001, 0.01, 0.1, 1.0, 10, 100]}

gs_model = GridSearchCV(SVR(), parameters, cv=2, n_jobs=-1, verbose=2)
start = time.time()
gs_model.fit(x_train, y_train)
stop = time.time()

print(f"Training time: {stop - start}s")
print(f"Best score: {gs_model.best_score_:.3f}")
print(f"Best params: {gs_model.best_params_}")

Fitting 2 folds for each of 270 candidates, totalling 540 fits


In [None]:
# Predict on validation data, and print score
y_val_hat = gs_model.best_estimator_.predict(x_val)

print(f"MSE on the training set: {mean_squared_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MSE on the validation set: {mean_squared_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MSE on the test set: {mean_squared_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAE on the training set: {mean_absolute_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAE on the validation set: {mean_absolute_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAE on the test set: {mean_absolute_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAPE on the training set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAPE on the validation set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAPE on the test set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"R^2 on the training set: {r2_score(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"R^2 on the validation set: {r2_score(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"R^2 on the test set: {r2_score(gs_model.best_estimator_.predict(x_test), y_test):.3f}")

# Linear Support Vector Regression

In [None]:
# Grid search over SVR
parameters = {}

gs_model = GridSearchCV(LinearSVR(max_iter = 1e6), parameters, cv=2, n_jobs=-1, verbose=2)
start = time.time()
gs_model.fit(x_train, y_train)
stop = time.time()

print(f"Training time: {stop - start}s")
print(f"Best score: {gs_model.best_score_:.3f}")
print(f"Best params: {gs_model.best_params_}")

In [None]:
# Predict on validation data, and print score
y_val_hat = gs_model.best_estimator_.predict(x_val)

print(f"MSE on the training set: {mean_squared_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MSE on the validation set: {mean_squared_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MSE on the test set: {mean_squared_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAE on the training set: {mean_absolute_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAE on the validation set: {mean_absolute_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAE on the test set: {mean_absolute_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"MAPE on the training set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"MAPE on the validation set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"MAPE on the test set: {mean_absolute_percentage_error(gs_model.best_estimator_.predict(x_test), y_test):.3f}")
print()
print(f"R^2 on the training set: {r2_score(gs_model.best_estimator_.predict(x_train), y_train):.3f}")
print(f"R^2 on the validation set: {r2_score(gs_model.best_estimator_.predict(x_val), y_val):.3f}")
print(f"R^2 on the test set: {r2_score(gs_model.best_estimator_.predict(x_test), y_test):.3f}")