In [1]:
import pandas as pd
import numpy as np
import re

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_predict

from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import StackingRegressor

from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin
from sklearn.base import RegressorMixin
from sklearn.pipeline import Pipeline

from sklearn.metrics import mean_absolute_percentage_error

import warnings

warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv("../csv/data_hipotesis.csv")
data = df.copy()
data.head()

Unnamed: 0.1,Unnamed: 0,fecha_publicacion,latitud,dormitorios,longitud,banos,descripcion,area,precio,distrito,...,precision,type,antiguedad_categoria,precioxm2,banos/dormitorio,lima_top,precioxm2_log,precio_log,area_log,distrito_clase
0,0,0,-12.139912,3.0,-77.018606,2.0,Se Alquila Lindo Departamento en Barranco\n¡De...,77.0,2960.0,barranco,...,ROOFTOP,street_address,Grupo 2,38.441558,0.666667,1,3.649139,7.992945,4.343805,clase1
1,1,0,-12.139272,2.0,-77.021154,2.0,Se Alquila Departamento Duplex en Corazón de B...,96.0,2250.0,barranco,...,ROOFTOP,street_address,Grupo 1,23.4375,1.0,1,3.154337,7.718685,4.564348,clase1
2,2,0,-12.139555,1.0,-77.023879,1.0,Se Alquila Lindo Departamento en Barranco\nSE ...,40.0,2000.0,barranco,...,ROOFTOP,establishment,Grupo 3,50.0,1.0,1,3.912023,7.600902,3.688879,clase1
3,3,0,-12.138541,3.0,-77.017627,2.0,Departamento de 3 Habitaciones Sin Amoblar en ...,80.0,2974.0,barranco,...,ROOFTOP,establishment,Grupo 3,37.175,0.666667,1,3.615636,7.997663,4.382027,clase1
4,4,0,-12.138338,2.0,-77.025239,1.0,Se Alquila Lindo Departamento en La Av El Sol ...,70.0,2800.0,barranco,...,ROOFTOP,subpremise,Grupo 2,40.0,0.5,1,3.688879,7.937375,4.248495,clase1


## 1. Preprocesamiento

**labels**

In [3]:
precision_labels = {
    'ROOFTOP':4,
    "APPROXIMATE":3,
    "GEOMETRIC_CENTER":2,
    "RANGE_INTERPOLATED":1
}
# Mapeo de categorías a puntajes
type_labels = {
    'street_address': 10,
    'subpremise': 9,
    'premise': 8,
    'establishment':7,
    'electrician': 7,
    'cafe': 7,
    'bar': 7,
    'clothing_store': 7,
    'car_repair': 7,
    'church': 7,
    'electronics_store': 7,
    'atm': 7,
    'accounting': 7,
    'embassy': 7,
    'doctor': 7,
    'bakery': 7,
    'convenience_store': 7,
    'bicycle_store': 7,
    'car_dealer': 7,
    'dentist': 7,
    'car_wash': 7,
    'book_store': 7,
    'beauty_salon': 7,
    'bus_station': 6,
    'locality': 5,
    'neighborhood': 4,
    'route': 3,
    'intersection': 3,
    'political': 2,
    'administrative_area_level_3': 1
}
distritos_labels = {
    "clase1":1,
    "clase2":2,
    "clase3":3
}


**Definicion de modelos**

In [4]:
modelos = [
    {
        'nombre':'lasso_regression',
        'parametros': {
            'alpha': [0.001, 0.01, 0.1, 1]
        },
        'modelo': Lasso(random_state=42),
    },
    {
        'nombre':'random_forest',
        'parametros': {
            'n_estimators': [150, 200, 250],
            'max_depth': [10, 15, 20],
            'min_samples_leaf': [3, 5, 7]
        },
        'modelo': RandomForestRegressor(random_state=42),
    },
    {
        'nombre':'support_vector_regressor',
        'parametros': {
            'C': [0.1, 1, 10],
            'epsilon': [0.01, 0.1, 1]
        },
        'modelo': SVR(),
    },
    {
        'nombre':'xgboost',
        'parametros': {
            'n_estimators': [150, 200, 250],
            'max_depth': [5, 7, 9],
            'learning_rate': [0.01, 0.025, 0.05]
        },
        'modelo': XGBRegressor(random_state=42, verbosity=0, reg_alpha=0.1)
    },
    {
        'nombre':'knn',
        'parametros': {
            'n_neighbors': [3, 5, 7]
        },
        'modelo': KNeighborsRegressor(),
    }
]


'''    {
        'nombre':'random_forest',
        'parametros': {
            'n_estimators': [ 150, 200, 250],
            'max_depth': [10, 15, 20],
            'min_samples_leaf':[3, 5, 7]
        },
        'modelo': RandomForestRegressor(random_state=42),
    },
    
    {
        'nombre':'linear_regression',
        'parametros':{
            'fit_intercept': [True, False]
        },
        'modelo': LinearRegression(),
    },
    {
        'nombre':'ridge_regression',
        'parametros':{
            'alpha': [0.1, 1, 10, 100]
        },
        'modelo': Ridge(random_state=42),
    }    
''',


("    {\n        'nombre':'random_forest',\n        'parametros': {\n            'n_estimators': [ 150, 200, 250],\n            'max_depth': [10, 15, 20],\n            'min_samples_leaf':[3, 5, 7]\n        },\n        'modelo': RandomForestRegressor(random_state=42),\n    },\n    \n    {\n        'nombre':'linear_regression',\n        'parametros':{\n            'fit_intercept': [True, False]\n        },\n        'modelo': LinearRegression(),\n    },\n    {\n        'nombre':'ridge_regression',\n        'parametros':{\n            'alpha': [0.1, 1, 10, 100]\n        },\n        'modelo': Ridge(random_state=42),\n    }    \n",)

### 1.1. Ingenieria de caracateristicas

Crear nuevas caracteristicas apartir de las existentes

In [5]:
class IngenieriaDeCaracteristicas(BaseEstimator, TransformerMixin):
    def __init__(self, precision_labels, type_labels, distritos_labels):
        self.precision_labels = precision_labels
        self.type_labels = type_labels
        self.distrito_labels = distritos_labels
   
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_copia = X.copy()
        X_copia['precision_num'] = X_copia['precision'].map(self.precision_labels)
        X_copia['type_num'] = X_copia['type'].map(self.type_labels)
        X_copia['distrito_clase'] = X_copia['distrito_clase'].map(self.distrito_labels)
        X_copia['banos/dormitorios'] = X_copia['banos'] / X_copia['dormitorios']
        X_copia['area/dormitorios'] = X_copia['area'] / X_copia['dormitorios']
        X_copia['exactitud_coordenadas'] = 1 / (X_copia['precision_num'] * X_copia['type_num'])
        X_copia.drop(columns=['precision_num','precision','type','type_num'],inplace=True)
        return X_copia

In [6]:
X = data.drop('precio',axis=1)
y = data['precio']

### 1.2. Imputar por la media y moda

Imputar por la media y moda siempre y cuando el % de nulos sea menor que el 2%

In [7]:
class PersonalSimpleImputer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self, X, y=None):
        if np.max(X.isnull().mean().values) > 0.02:
            print("Error, existe un % de nulos alto")
            return self
        X_num = X.select_dtypes(include=('int', 'float'))
        X_obj = X.select_dtypes(include=('O'))

        self.imputer_mean = SimpleImputer(strategy='mean')
        self.imputer_mode = SimpleImputer(strategy='most_frequent')

        self.imputer_mean.fit(X_num)
        self.imputer_mode.fit(X_obj)
        return self

    def transform(self, X):
        X_num = X.select_dtypes(include=('int', 'float'))
        X_obj = X.select_dtypes(include=('O'))

        X_num_imp = self.imputer_mean.transform(X_num)
        X_obj_imp = self.imputer_mode.transform(X_obj)

        # Usamos np.concatenate y conservamos el índice original
        X_imp = np.concatenate([X_num_imp, X_obj_imp], axis=1)
        return pd.DataFrame(X_imp,
                            columns=X_num.columns.tolist() + X_obj.columns.tolist(),
                            index=X.index)

### 1.3. Agregar la variable precioxm2

Agregar la variable precioxm2 predicha apartir de los k-vecinos mas cercanos

In [8]:
class AgregandoPrecioxm2(BaseEstimator, TransformerMixin):
    def __init__(self, n_neighbors=3, n_folds=5):
        self.n_neighbors = n_neighbors
        self.n_folds = n_folds
        self.knn = KNeighborsRegressor(n_neighbors=n_neighbors)

    def fit(self, X, y):
        X_copia = X.copy()
        # Conservamos el índice para evitar desalineación
        coordenadas = X_copia[['latitud', 'longitud']]
        precioxm2 = y / X_copia['area'].values

        kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=42)

        # cross_val_predict devuelve las predicciones en el orden original
        preds = cross_val_predict(self.knn, coordenadas, precioxm2, cv=kfold)
        # Creamos una Serie que conserva el índice original
        self.precioxm2_pred_train = pd.Series(preds, index=coordenadas.index)

        # Entrena el modelo con todos los datos de entrenamiento
        self.knn.fit(coordenadas, precioxm2)
        
        return self  # Retornamos self para seguir el patrón Transformer

    def transform(self, X):
        X_copia = X.copy()
        coordenadas = X_copia[['latitud', 'longitud']]

        # Si transformamos el conjunto de entrenamiento, usamos las predicciones ya calculadas;
        # de lo contrario, predecimos con el modelo entrenado.
        if hasattr(self, "precioxm2_pred_train") and len(X) == len(self.precioxm2_pred_train):
            # Reindexamos para asegurar el orden
            X_copia['precioxm2_pred'] = self.precioxm2_pred_train.reindex(X.index)
        else:
            X_copia['precioxm2_pred'] = self.knn.predict(coordenadas)
        return X_copia

### 1.4. Standarizar y convertir a variables numericas

Usar standarscaler y OneHotEncoder para convertir las columnas a la misma escala y a valores numericos

In [9]:
class CustomColumnTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        
        for col in X.columns:
            try:
                X[col]  = X[col].astype('float')
            except Exception:
                pass

        self.numericas = X.select_dtypes(include=('int','float')).columns
        self.categoricas = X.select_dtypes(include=('object')).columns
        self.columnas = self.numericas.tolist()+self.categoricas.tolist()
        
        #X_ = X[self.columnas]
        self.column_transformer_ = ColumnTransformer([
            ('num', StandardScaler(), self.numericas),
            ('cat', OneHotEncoder(), self.categoricas)
        ], remainder='passthrough')
        self.column_transformer_.fit(X)
        return self

    def transform(self, X):
        for col in X.columns:
            try:
                X[col]  = X[col].astype('float')
            except Exception:
                pass
        X_transfomer = self.column_transformer_.transform(X)
        exp = re.compile(r'(num|cat)__(.*)')
        columnas = self.column_transformer_.get_feature_names_out()
        columnas_ = [exp.findall(x)[0][1] for x in columnas]
        return pd.DataFrame(X_transfomer, columns=columnas_, index=X.index)

### 1.5. Seleccion de caracteristicas

Usar un randomforest con los parametros adecuados de tal forma que mediante el atributo feature_importances_ pueda seleccionar las caracteristicas mas relevantes y generales del dataset.

In [10]:
class SeleccionDeCaracteristicas(BaseEstimator,TransformerMixin):
    def __init__(self, n_splits=15,n_features=10):
        self.n_splits = n_splits
        self.n_features = n_features
    
    def fit(self, X, y=None):
        rfr = RandomForestRegressor(n_estimators=150,max_depth=15,max_features='sqrt',min_samples_leaf=3,random_state=42)
        kfold = KFold(n_splits=self.n_splits,shuffle=True,random_state=42)
        importancias = np.zeros(X.shape[1])
        
        for train_index,test_index in kfold.split(X,y):
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]
            rfr.fit(X_train, y_train)
            importancias += rfr.feature_importances_
            
        importancias /= kfold.n_splits
        self.feature_names = X.columns[np.argsort(importancias)[::-1][:self.n_features]].tolist()  # Selección de las 10 características más importantes
        return self
    
    def transform(self, X):
        return X[self.feature_names]
    

## 2. Modelado

Hiperaparametros

In [11]:
class BestBaseModelTrainer:
    """Clase auxiliar que entrena modelos base con GridSearchCV y genera predicciones"""

    def __init__(self, base_models, cv=5, n_jobs=1, scoring='neg_mean_absolute_percentage_error'):
        self.base_models = base_models
        self.cv = cv
        self.n_jobs = n_jobs
        self.scoring = scoring

    def train_and_predict(self, X, y):
        modelos_entrenados = []
        kfold = KFold(n_splits=self.cv, shuffle=True, random_state=42)
        predicciones_train = pd.DataFrame()

        for modelo in self.base_models:
            gs = GridSearchCV(
                modelo['modelo'],
                modelo['parametros'],
                cv=self.cv,
                n_jobs=self.n_jobs,
                scoring=self.scoring,
                refit=True,
            )

            gs.fit(X, y)

            predicciones_train[modelo['nombre']] = cross_val_predict(
                gs.best_estimator_, X, y, cv=kfold, n_jobs=self.n_jobs
            )
            
            modelos_entrenados.append((modelo['nombre'], gs.best_estimator_))

            print("\n")

        return predicciones_train, modelos_entrenados

In [12]:
class CustomStackingRegressor(BaseEstimator, RegressorMixin):
    """
    Regressor que implementa stacking con modelos base preentrenados o nuevos
    Compatible con scikit-learn Pipeline
    """
    def __init__(self, meta_model, cv=5, n_jobs=1,
                 scoring='neg_mean_absolute_percentage_error',
                 base_models=None,
                 predicciones_out_fold=None,
                 modelos_entrenados=None):

        self.base_models = base_models  # Configuración para entrenar nuevos modelos
        self.meta_model = meta_model
        
        self.predicciones_out_fold = predicciones_out_fold  # Modelos ya entrenados
        self.modelos_entrenados = modelos_entrenados
        
        self.is_fitted = False
        self.cv = cv
        self.n_jobs = n_jobs
        self.scoring = scoring
        
        if self.base_models is None and (self.predicciones_out_fold is None or self.modelos_entrenados is None):
            raise ValueError("Debes proporcionar 'base_models' para entrenar nuevos modelos o 'predicciones_out_fold' para usarlos en el entrenamiento del stacking.")

    def fit(self, X, y):
        """Entrena los modelos base (si es necesario) y el meta-modelo"""
        # Verifica si hay modelos preentrenados
        if self.predicciones_out_fold is not None and self.modelos_entrenados is not None:
            print("Utilizando las predicciones out fold")
            predicciones_train = pd.DataFrame(self.predicciones_out_fold)
        else:
            print("Entrenando nuevos modelos base")
            # Creamos el entrenador de modelos base
            trainer = BestBaseModelTrainer(
                base_models=self.base_models,
                cv=self.cv,
                n_jobs=self.n_jobs,
                scoring=self.scoring
            )

            # Entrenamos modelos base y obtenemos predicciones out-of-fold
            predicciones_train, self.modelos_entrenados = trainer.train_and_predict(X, y)

        # Aplicamos PCA a las predicciones
        self.pca = PCA(n_components=2)
        predicciones_train_pca = self.pca.fit_transform(predicciones_train)

        # Entrenamos el meta-modelo
        self.meta_model.fit(predicciones_train_pca, y)
        self.is_fitted = True

        return self

    def predict(self, X):
        """Predice usando los modelos base y el meta-modelo"""
        if not self.is_fitted:
            raise ValueError("El modelo no ha sido entrenado aún. Llama a 'fit' antes de 'predict'.")

        # Generamos predicciones usando los modelos base
        predicciones_test = np.column_stack([
            model.predict(X) for _, model in self.modelos_entrenados
        ])

        # Aplicamos PCA a las predicciones
        predicciones_test_pca = self.pca.transform(predicciones_test)

        # Generamos predicciones con el meta-modelo
        return self.meta_model.predict(predicciones_test_pca)

## 3. Probando

#### Preprocesamiento

In [13]:
data

Unnamed: 0.1,Unnamed: 0,fecha_publicacion,latitud,dormitorios,longitud,banos,descripcion,area,precio,distrito,...,precision,type,antiguedad_categoria,precioxm2,banos/dormitorio,lima_top,precioxm2_log,precio_log,area_log,distrito_clase
0,0,0,-12.139912,3.0,-77.018606,2.0,Se Alquila Lindo Departamento en Barranco\n¡De...,77.000000,2960.0,barranco,...,ROOFTOP,street_address,Grupo 2,38.441558,0.666667,1,3.649139,7.992945,4.343805,clase1
1,1,0,-12.139272,2.0,-77.021154,2.0,Se Alquila Departamento Duplex en Corazón de B...,96.000000,2250.0,barranco,...,ROOFTOP,street_address,Grupo 1,23.437500,1.000000,1,3.154337,7.718685,4.564348,clase1
2,2,0,-12.139555,1.0,-77.023879,1.0,Se Alquila Lindo Departamento en Barranco\nSE ...,40.000000,2000.0,barranco,...,ROOFTOP,establishment,Grupo 3,50.000000,1.000000,1,3.912023,7.600902,3.688879,clase1
3,3,0,-12.138541,3.0,-77.017627,2.0,Departamento de 3 Habitaciones Sin Amoblar en ...,80.000000,2974.0,barranco,...,ROOFTOP,establishment,Grupo 3,37.175000,0.666667,1,3.615636,7.997663,4.382027,clase1
4,4,0,-12.138338,2.0,-77.025239,1.0,Se Alquila Lindo Departamento en La Av El Sol ...,70.000000,2800.0,barranco,...,ROOFTOP,subpremise,Grupo 2,40.000000,0.500000,1,3.688879,7.937375,4.248495,clase1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3310,3357,0,-12.105428,1.0,-77.022412,1.0,Alquiler Departamento Oferta\nDepartamento en ...,43.000000,2000.0,surquillo,...,ROOFTOP,establishment,Grupo 2,46.511628,1.000000,0,3.839702,7.600902,3.761200,clase2
3311,3358,0,-12.116230,2.0,-77.014257,1.0,Se Alquila Departamento en Barrio Medico\nCRIS...,44.000000,2400.0,surquillo,...,ROOFTOP,street_address,Grupo 2,54.545455,0.500000,0,3.999034,7.783224,3.784190,clase2
3312,3359,0,-12.119018,3.0,-76.999006,2.0,"Departamento de 3 Dorm.\nLindo departamento, b...",100.686335,2500.0,surquillo,...,ROOFTOP,street_address,Grupo 2,24.829586,0.666667,0,3.212036,7.824046,4.612010,clase2
3313,3360,0,-12.112500,2.0,-77.001545,1.0,¡Hermoso Flat Cerca Al Real Plaza de Angamos c...,70.000000,2300.0,surquillo,...,ROOFTOP,street_address,Grupo 2,32.857143,0.500000,0,3.492169,7.740664,4.248495,clase2


In [14]:
X = data[['latitud','longitud','dormitorios','banos','area','precision','type','antiguedad_categoria','distrito_clase']]
y = data['precio']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42,stratify=data['distrito'])

pipeline_transfomer = Pipeline([
    ('feature_engineering', IngenieriaDeCaracteristicas(precision_labels, type_labels, distritos_labels)),
    ('imputation', PersonalSimpleImputer()),
    ('agregando_precioxm2', AgregandoPrecioxm2(n_neighbors=3, n_folds=10)),
    ('transformador', CustomColumnTransformer()),
    ('seleccionar_caracteristicas',SeleccionDeCaracteristicas(n_features=10)),
])

X_train_transformed = pipeline_transfomer.fit_transform(X_train,y_train)
X_test_transformed = pipeline_transfomer.transform(X_test)

### 1. Segmentacion : Ninguna

**Predicciones de los modelos base**

In [15]:
base_models = BestBaseModelTrainer(modelos,cv=10,n_jobs=-1)
predicciones_out_fold,modelos_entrenados = base_models.train_and_predict(X_train_transformed,y_train)













In [16]:
for nombre, modelo in modelos_entrenados:
    y_test_pred = modelo.predict(X_test_transformed)
    y_train_pred = modelo.predict(X_train_transformed)
    
    error_test = mean_absolute_percentage_error(y_test,y_test_pred)
    error_train = mean_absolute_percentage_error(y_train,y_train_pred)
    
    print(f"Test {nombre}: {error_test:.4f}%")
    print(f"Train {nombre}: {error_train:.4f}%\n")

Test lasso_regression: 0.2083%
Train lasso_regression: 0.2030%

Test random_forest: 0.1693%
Train random_forest: 0.0976%

Test support_vector_regressor: 0.2321%
Train support_vector_regressor: 0.2329%

Test xgboost: 0.1648%
Train xgboost: 0.0764%

Test knn: 0.1885%
Train knn: 0.1607%



**Prediccion del stacking Lasso sobre las predicciones de los modelos base usando PCA**

In [21]:
stacking_regressor = CustomStackingRegressor(
    modelos_entrenados= modelos_entrenados,
    predicciones_out_fold = predicciones_out_fold,
    meta_model=Ridge(alpha=0.01),#RandomForestRegressor(n_estimators=30,max_depth=10,min_samples_leaf=4),
    cv=10,
    n_jobs=-1
)
stacking_regressor.fit(X_train_transformed,y_train)

y_pred_test = stacking_regressor.predict(X_test_transformed)
y_pred_train = stacking_regressor.predict(X_train_transformed)

error_test = mean_absolute_percentage_error(y_test,y_pred_test)
error_train = mean_absolute_percentage_error(y_train,y_pred_train)

print(f"Test Stacking: {error_test:.4f}%")
print(f"Train Stacking: {error_train:.4f}%")

Utilizando las predicciones out fold
Test Stacking: 0.1665%
Train Stacking: 0.1103%


In [31]:
base_models = BestBaseModelTrainer(modelos,cv=10,n_jobs=-1)
clases_distritos_train = []
for distrito_clase in X_train_transformed['distrito_clase'].unique():
    X_train_distrito = X_train_transformed[X_train_transformed['distrito_clase']==distrito_clase]
    y_train_distrito = y_train.loc[X_train_distrito.index]
    predicciones_out_fold, modelos_entrenados = base_models.train_and_predict(X_train_distrito,y_train_distrito)
    clases_distritos_train.append({
        'distrito_clase': distrito_clase,
        'predicciones_out_fold': predicciones_out_fold,
        'modelos_entrenados': modelos_entrenados,
    })

In [30]:
for diccionario in clases_distritos_train:

    print(f"Distrito clase : {diccionario['distrito_clase']}")

    X_train_distrito = X_train_transformed[X_train_transformed['distrito_clase']==diccionario['distrito_clase']]
    y_train_distrito = y_train.loc[X_train_distrito.index]
    
    stacking_regressor = CustomStackingRegressor(
        modelos_entrenados= diccionario['modelos_entrenados'],
        predicciones_out_fold = diccionario['predicciones_out_fold'],
        meta_model=Ridge(alpha=0.01),#RandomForestRegressor(n_estimators=30,max_depth=10,min_samples_leaf=4),
        cv=10,
        n_jobs=-1
    )
    
    stacking_regressor.fit(X_train_distrito,y_train_distrito)
    
    y_pred_test = stacking_regressor.predict(X_test_transformed)
    y_pred_train = stacking_regressor.predict(X_train_transformed)
    
    error_test = mean_absolute_percentage_error(y_test,y_pred_test)
    error_train = mean_absolute_percentage_error(y_train,y_pred_train)
    
    print(f"Test Stacking: {error_test:.4f}%")
    print(f"Train Stacking: {error_train:.4f}%")
    

1532   -1.155716
1023    1.364809
2957    0.104547
2395    0.104547
3041    0.104547
          ...   
2416   -1.155716
2373   -1.155716
3113    0.104547
419     1.364809
116    -1.155716
Name: distrito_clase, Length: 2652, dtype: float64

NameError: name 'diccionario' is not defined