<a href="https://colab.research.google.com/github/JulioLaz/Consumer_Spending_Prediction_final/blob/nuevo_test/test_Consumer_Spending_Prediction_final_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**PROBLEMA DE NEGOCIO**


---




La necesidad de prever y optimizar el gasto de sus usuarios ha llevado a una empresa de comercio electrónico a buscar soluciones innovadoras. Como científicos de datos, hemos sido convocados para desarrollar un modelo de machine learning que pueda predecir con precisión cuánto gastará un usuario al visitar dicho sitio web.

## Referencia de las variables:
https://support.google.com/analytics/answer/3437719?hl=es-419

#**1. Configuración del Ambiente**


---




In [1]:
# !python -V
# print('------')
# !pip show Pandas | grep 'Name\|Version'
# print('------')
# !pip show Matplotlib | grep 'Name\|Version'

# Python 3.10.12
# ------
# Name: pandas
# Version: 1.5.3
# ------
# Name: matplotlib
# Version: 3.7.1

In [2]:
!pip install xgboost



In [3]:
!pip install wget

Collecting wget
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9655 sha256=31095ba735438d457f19d8e285842fe9c69b36608893e926a76f42c481fec5aa
  Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


In [4]:
import logging
import lightgbm as lgb

# Establecer el nivel de registro a WARNING para el logger de LightGBM
logging.getLogger('lightgbm').setLevel(logging.WARNING)

import wget
import warnings
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
import lightgbm as lgb

from scipy.stats import randint
from sklearn.preprocessing import LabelEncoder, StandardScaler, FunctionTransformer
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import cross_val_score
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from joblib import dump, load
from sklearn.model_selection import KFold

# Ignorar las advertencias
warnings.filterwarnings("ignore")

# Configurar pandas para mostrar todas las columnas
pd.set_option('display.max_columns', None)

# Variables globales
global df_traffic, resultados, modelo, modelo_clasificacion,df_metrics_cv

# No mostrar notacion cientifica:
np.set_printoptions(suppress=True)

#**2. Preprocesamiento de Datos**


---


In [5]:
def preprocesamiento():
  global df_traffic
  df_traffic = pd.read_csv('https://raw.githubusercontent.com/ElProfeAlejo/Bootcamp_Databases/main/traffic_site.csv', dtype={'date':object,'fullVisitorId':object,'visitId':object})
  diccionarios = ['device','geoNetwork','trafficSource','totals']

  ## Desempacar diccionario:
  for columna in diccionarios:
    df_traffic = df_traffic.join(pd.DataFrame([json.loads(linea) for linea in df_traffic[columna]]))
  df_traffic.drop(columns=diccionarios, axis=1,inplace=True)

  # Convertir las columnas a string para envitar error:
  df_traffic_str = df_traffic.astype(str).copy()

  # Buscar las columnas que tienen un sólo valor:
  unique_value=[]
  for col in df_traffic_str.drop(columns='isMobile',axis=1).columns:
      if 1 == len(df_traffic_str[col].unique()):
        unique_value.append(col)
  # print(f'Vars con valor único ({len(unique_value)})\n {unique_value}')

  ### eliminar col con valor único:
  df_traffic.drop(columns=unique_value,axis=1,inplace=True)

  ## Elimino columna con valor un sólo valor
  df_traffic.drop(columns='campaignCode',axis=1,inplace=True)

  ### cambiar columnas a tipo número:
  cuant = ['fullVisitorId','visitId','visitNumber','visitStartTime', 'bounces', 'hits','pageviews','newVisits','pageviews', 'transactionRevenue']
  for columna in cuant:
      df_traffic[columna] = pd.to_numeric(df_traffic[columna])


  ### cambiar valor dentro del dict anterior:
  df_traffic['adwordsClickInfo'] = df_traffic['adwordsClickInfo'].apply(lambda x: np.nan if isinstance(x, dict) and x == {'criteriaParameters': 'not available in demo dataset'} else x)

  ### Desempacar del dict clave valor:
  # Aplicar pd.Series() a la columna 'adwordsClickInfo' para dividir los diccionarios en columnas
  expanded_info = df_traffic['adwordsClickInfo'].apply(pd.Series)

  # Concatenar el DataFrame original con las nuevas columnas
  df_traffic = pd.concat([df_traffic, expanded_info], axis=1)

  # Eliminar la columnas:
  columns_to_drop = ['adwordsClickInfo', 'criteriaParameters', 0, 'targetingCriteria', 'date']
  df_traffic.drop(columns=columns_to_drop, inplace=True)
  df_traffic = df_traffic.drop_duplicates() ##eliminar filas duplicadas

  ## Cambio formato a visitStartTime:
  df_traffic['visitStartTime'] = pd.to_datetime(df_traffic['visitStartTime'], unit='s')

  ### cambia los nan a ceros:
  df_traffic.fillna(0, inplace=True)

  ### Dividir el target en 1e6:
  df_traffic['transactionRevenue']= df_traffic['transactionRevenue']/1e6
preprocesamiento()

#**3. Exploración y Feature Engineering**


---


In [6]:
def feature_engineering_final():
    global df_traffic
    # Crear columnas para el año, el mes, la semana del mes, la quincena del mes y la hora
    df_traffic['visitStartTime'] = pd.to_datetime(df_traffic['visitStartTime'])
    df_traffic['year'] = df_traffic['visitStartTime'].dt.year.astype('uint16')
    df_traffic['month'] = df_traffic['visitStartTime'].dt.month.astype('uint8')
    df_traffic['fortnight'] = df_traffic['visitStartTime'].dt.day.apply(lambda day: 1 if day <= 15 else 2).astype('uint8')
    df_traffic['hour'] = df_traffic['visitStartTime'].dt.hour.astype('uint8')
    df_traffic['day'] = df_traffic['visitStartTime'].dt.day.astype('uint8')
    df_traffic['time_range'] = pd.cut(df_traffic['visitStartTime'].dt.hour, bins=[0, 6, 12, 18, 24], labels=['madrugada', 'mañana', 'tarde', 'noche'], ordered=False).astype('object')

    ### Aplicar Codificador de etiquetas para transformar de cualitativa a cuantitativa ordinal:
    cualitativas = df_traffic.dtypes[df_traffic.dtypes == object].keys()
    for columna in cualitativas:
        lbl = LabelEncoder()
        strings = list(df_traffic[columna].values.astype('str'))
        lbl.fit(strings)
        df_traffic[columna] = lbl.transform(strings)
        # Convertir al tipo uint8
        df_traffic[columna] = df_traffic[columna].astype('uint8')

    ## Elimino col sessionId and visitStartTime:
    df_traffic.drop(columns='sessionId',inplace=True)
    df_traffic.drop(columns='visitStartTime', axis=1,inplace=True)

    ## Codificación de frecuencia:
    ### Codificación de Frecuencia:  para fullVisitorId:
    fullVisitorId_frequency = df_traffic['fullVisitorId'].value_counts()
    df_traffic['fullVisitorId_enc_frec'] = df_traffic['fullVisitorId'].map(fullVisitorId_frequency)

    ### Codificación de Frecuencia:  para visitId:
    fullVisitorId_frequency = df_traffic['visitId'].value_counts()
    df_traffic['visitId_enc_frec'] = df_traffic['visitId'].map(fullVisitorId_frequency)

    ### Eliminar visitId, fullVisitorId:
    df_traffic.drop(columns='visitId',axis=1,inplace=True)
    df_traffic.drop(columns='fullVisitorId',axis=1,inplace=True)

    ## convertir a int la col booleana:
    df_traffic['isMobile'] = df_traffic['isMobile'].astype(int)

    ## cambiar los nan por ceros:
    df_traffic.fillna(0, inplace=True)

    # Rellenar los valores faltantes en 'transactionRevenue' con cero
    df_traffic['transactionRevenue'].fillna(0, inplace=True)

    # Codificación one-hot y eliminación de columnas originales
    columns=['browser', 'continent','networkDomain']
    df_traffic = pd.get_dummies(df_traffic, columns=columns, prefix=columns, drop_first=True)

    ### cambiar a frecuencias:
    columns_to_map = ['city', 'country', 'subContinent', 'metro','hour','time_range', 'channelGrouping']

    for column in columns_to_map:
        column_frequency = df_traffic[column].value_counts()
        df_traffic[column] = df_traffic[column].map(column_frequency)

    #### Eliminar columnas;
    columns_features= ['year','fortnight','isMobile','campaign','gclId',
                       'page', 'adContent','bounces','newVisits',
                       'metro','visitId_enc_frec','browser_1','browser_2',
                      'browser_3',	'browser_4',	'browser_6',	'browser_7'] ### ,'gclId','page'
    for feature in columns_features:
        df_traffic.drop(columns=[feature], inplace=True)

    df_traffic.drop(columns=['isVideoAd', 'adNetworkType','slot','hits'],axis=1,inplace=True)

    ### optimize memory
    conversion_dict = {
        'transactionRevenue': 'uint16',
        'channelGrouping': 'uint16',
        'subContinent': 'uint16',
        'country': 'uint16',
        'city': 'uint16',
        'hour': 'uint16',
        'time_range': 'uint16',
        'fullVisitorId_enc_frec': 'uint8',
        'visitNumber': 'uint8',
        'pageviews': 'uint16'
    }
    df_traffic = df_traffic.astype(conversion_dict)
feature_engineering_final()

# Modelo


---


In [10]:
def aplica_modelo_gbm_cv():
    global modelo
    data_traf = df_traffic.copy()
    X = data_traf.drop('transactionRevenue', axis=1)
    y = data_traf.transactionRevenue.copy()

    params_xgb_dmatrix = {
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',
        'max_depth': 20,
        'eta': 0.3,
        'subsample': 0.91,
        'colsample_bytree': 0.8,
        'seed': 42
    }

    # Realizar validación cruzada más exhaustiva
    num_boost_round = 100
    num_folds = 10  # Número de divisiones (folds) para la validación cruzada
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

    rmse_scores = []
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        dtrain = xgb.DMatrix(X_train, label=y_train)
        dtest = xgb.DMatrix(X_test, label=y_test)

        bst_cv = xgb.train(params_xgb_dmatrix, dtrain, num_boost_round=num_boost_round, evals=[(dtest, 'eval')],
                           early_stopping_rounds=10, verbose_eval=False)

        y_pred_cv = bst_cv.predict(dtest)
        rmse_scores.append(mean_squared_error(y_test, y_pred_cv, squared=False))

    # Obtener el número óptimo de rondas de impulso (boosting rounds)
    best_num_round = np.argmin(rmse_scores) + 1

    # Entrenar el modelo con el número óptimo de rondas de impulso
    bst_cv = xgb.train(params_xgb_dmatrix, xgb.DMatrix(X, label=y), num_boost_round=best_num_round)
    modelo = bst_cv
    y_pred_cv = bst_cv.predict(xgb.DMatrix(X))
    rmse_cv = mean_squared_error(y, y_pred_cv, squared=False)
    mse_cv = mean_squared_error(y, y_pred_cv, squared=True)
    r2_cv = r2_score(y, y_pred_cv)

    model_metrics_cv = {
        "GBM CV": {"% R2": r2_cv * 100, "MSE": mse_cv, "RMSE": rmse_cv}
    }
    df_model_metrics_cv = pd.DataFrame(model_metrics_cv).T
    df_metrics_cv = df_model_metrics_cv.round(4)

    predictions = bst_cv.predict(xgb.DMatrix(X))
    predictions[predictions < 1] = 0

    df_resultados = pd.DataFrame({
        'transactionRevenue': y,
        'predictions': predictions
    })
    df_resultados=df_resultados[df_resultados.transactionRevenue>0].sample(50)
    df_resultados['predictions'] = (df_resultados['predictions']).astype(int)
    df_resultados.loc[df_resultados['predictions'] > 1, 'predictions'] *= 1.23
    df_resultados['predictions'] = df_resultados['predictions'].round().astype(int)
    df_resultados['Delta'] = (df_resultados['predictions']-df_resultados['transactionRevenue']).astype(int)
    df_resultados.round(2)

aplica_modelo_gbm_cv()

In [11]:
X = df_traffic.drop('transactionRevenue', axis=1)
y = df_traffic.transactionRevenue.copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
df_train = pd.concat([X_train.reset_index(drop=True), pd.DataFrame({'transactionRevenue': y_train}).reset_index(drop=True)], axis=1)
df_train[df_train.transactionRevenue>0].sample(5)

Unnamed: 0,channelGrouping,visitNumber,operatingSystem,deviceCategory,subContinent,country,region,city,source,medium,keyword,isTrueDirect,referralPath,pageviews,month,hour,day,time_range,fullVisitorId_enc_frec,browser_5,browser_8,browser_9,browser_10,browser_11,browser_12,browser_13,browser_14,browser_15,browser_16,browser_17,browser_18,browser_19,browser_20,browser_21,continent_1,continent_2,continent_3,continent_4,continent_5,networkDomain_1,networkDomain_2,networkDomain_3,networkDomain_4,networkDomain_5,networkDomain_6,networkDomain_7,networkDomain_8,networkDomain_9,networkDomain_10,networkDomain_11,networkDomain_12,networkDomain_13,networkDomain_14,networkDomain_15,networkDomain_16,networkDomain_17,networkDomain_18,networkDomain_19,networkDomain_20,networkDomain_21,networkDomain_22,networkDomain_23,networkDomain_24,networkDomain_25,networkDomain_26,networkDomain_27,networkDomain_28,networkDomain_29,networkDomain_30,networkDomain_31,networkDomain_32,networkDomain_33,networkDomain_34,networkDomain_35,networkDomain_36,networkDomain_37,networkDomain_38,networkDomain_39,networkDomain_40,networkDomain_41,networkDomain_42,networkDomain_43,networkDomain_44,networkDomain_45,networkDomain_46,networkDomain_47,networkDomain_48,networkDomain_49,networkDomain_50,networkDomain_51,networkDomain_52,networkDomain_53,networkDomain_54,networkDomain_55,networkDomain_56,networkDomain_57,networkDomain_58,networkDomain_59,networkDomain_60,networkDomain_61,networkDomain_62,networkDomain_63,networkDomain_64,networkDomain_65,networkDomain_66,networkDomain_67,networkDomain_68,networkDomain_69,networkDomain_70,networkDomain_71,networkDomain_72,networkDomain_73,networkDomain_74,networkDomain_75,networkDomain_76,networkDomain_77,networkDomain_78,networkDomain_79,networkDomain_80,networkDomain_81,networkDomain_82,networkDomain_83,networkDomain_84,networkDomain_85,networkDomain_86,networkDomain_87,networkDomain_88,networkDomain_89,networkDomain_90,networkDomain_91,networkDomain_92,networkDomain_93,networkDomain_94,networkDomain_95,networkDomain_96,networkDomain_97,networkDomain_98,networkDomain_99,networkDomain_100,networkDomain_101,networkDomain_102,networkDomain_103,networkDomain_104,networkDomain_105,networkDomain_106,networkDomain_107,networkDomain_108,networkDomain_109,networkDomain_110,networkDomain_111,networkDomain_112,networkDomain_113,networkDomain_114,networkDomain_115,networkDomain_116,networkDomain_117,networkDomain_118,networkDomain_119,networkDomain_120,networkDomain_121,networkDomain_122,networkDomain_123,networkDomain_124,networkDomain_125,networkDomain_126,networkDomain_127,networkDomain_128,networkDomain_129,networkDomain_130,networkDomain_131,networkDomain_132,networkDomain_133,networkDomain_134,networkDomain_135,networkDomain_136,networkDomain_137,networkDomain_138,networkDomain_139,networkDomain_140,networkDomain_141,networkDomain_142,networkDomain_143,networkDomain_144,networkDomain_145,networkDomain_146,networkDomain_147,networkDomain_148,networkDomain_149,networkDomain_150,networkDomain_151,networkDomain_152,networkDomain_153,networkDomain_154,networkDomain_155,networkDomain_156,networkDomain_157,networkDomain_158,networkDomain_159,networkDomain_160,networkDomain_161,networkDomain_162,networkDomain_163,networkDomain_164,networkDomain_165,networkDomain_166,networkDomain_167,networkDomain_168,networkDomain_169,networkDomain_170,networkDomain_171,networkDomain_172,networkDomain_173,networkDomain_174,networkDomain_175,networkDomain_176,networkDomain_177,networkDomain_178,networkDomain_179,networkDomain_180,networkDomain_181,networkDomain_182,networkDomain_183,networkDomain_184,networkDomain_185,networkDomain_186,networkDomain_187,networkDomain_188,networkDomain_189,networkDomain_190,networkDomain_191,networkDomain_192,networkDomain_193,networkDomain_194,networkDomain_195,networkDomain_196,networkDomain_197,networkDomain_198,networkDomain_199,networkDomain_200,networkDomain_201,networkDomain_202,networkDomain_203,networkDomain_204,networkDomain_205,networkDomain_206,networkDomain_207,networkDomain_208,networkDomain_209,networkDomain_210,networkDomain_211,networkDomain_212,networkDomain_213,networkDomain_214,networkDomain_215,networkDomain_216,networkDomain_217,networkDomain_218,networkDomain_219,networkDomain_220,networkDomain_221,networkDomain_222,networkDomain_223,networkDomain_224,networkDomain_225,networkDomain_226,networkDomain_227,networkDomain_228,networkDomain_229,networkDomain_230,networkDomain_231,networkDomain_232,networkDomain_233,networkDomain_234,networkDomain_235,networkDomain_236,networkDomain_237,networkDomain_238,networkDomain_239,networkDomain_240,networkDomain_241,networkDomain_242,networkDomain_243,networkDomain_244,networkDomain_245,networkDomain_246,networkDomain_247,networkDomain_248,networkDomain_249,networkDomain_250,networkDomain_251,networkDomain_252,networkDomain_253,networkDomain_254,networkDomain_255,transactionRevenue
8191,1438,7,5,0,5369,5030,29,511,48,6,17,1,0,15,8,631,15,3883,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
9210,1438,4,5,0,5369,5030,130,394,48,6,17,1,0,21,11,607,23,2886,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16
5970,5155,1,11,1,5369,5030,29,117,23,5,4,0,197,18,11,440,9,2498,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
1262,5155,9,6,0,5369,5030,211,6846,23,5,4,1,197,18,10,594,21,2886,10,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20
4853,2011,2,6,0,5369,5030,138,7,0,0,17,1,197,29,6,444,12,2498,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24


In [12]:
y_pred_cv = modelo.predict(xgb.DMatrix(X_test))
rmse_cv = mean_squared_error(y_test, y_pred_cv, squared=False)
mse_cv = mean_squared_error(y_test, y_pred_cv, squared=True)
r2_cv = r2_score(y_test, y_pred_cv)

model_metrics_cv = {
    "GBM CV": {"% R2": r2_cv * 100, "MSE": mse_cv, "RMSE": rmse_cv}
}
df_model_metrics_cv = pd.DataFrame(model_metrics_cv).T
df_metrics_cv = df_model_metrics_cv.round(4)
df_metrics_cv

Unnamed: 0,% R2,MSE,RMSE
GBM CV,97.4375,10.9679,3.3118
