In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import LabelEncoder

In [2]:
# Cargar dataset de prueba
data_test = pd.read_csv('../data/raw/test.csv')

#data_test = pd.read_csv('test.csv')

In [3]:
data_test.head()

Unnamed: 0,CustomerID,Age,Gender,Income,VisitFrequency,AverageSpend,PreferredCuisine,TimeOfVisit,GroupSize,DiningOccasion,MealType,OnlineReservation,DeliveryOrder,LoyaltyProgramMember,WaitTime,ServiceRating,FoodRating,AmbianceRating,HighSatisfaction
0,1950,67,Female,87167,Weekly,152.136795,Chinese,Lunch,4,Celebration,Dine-in,0,1,1,14.722728,5,5,4,1
1,1145,65,Female,45416,Weekly,119.837286,Italian,Breakfast,1,Celebration,Takeaway,1,0,0,8.71786,2,3,5,0
2,960,54,Female,48233,Weekly,31.424327,American,Lunch,5,Casual,Dine-in,0,0,0,19.326013,5,2,2,0
3,1057,61,Female,69799,Weekly,67.879151,American,Dinner,5,Celebration,Dine-in,0,1,1,24.708839,3,4,2,1
4,2030,52,Female,108514,Monthly,155.891539,Mexican,Lunch,6,Celebration,Dine-in,0,1,1,35.944814,4,3,5,0


In [4]:
# Cargar configuraciones de ingeniería de características
#with open('../artifacts/feature_eng_configs.pkl', 'rb') as f:
    #feature_eng_configs = pickle.load(f)

with open('feature_eng_configs.pkl', 'rb') as f:
    feature_eng_configs = pickle.load(f)

In [8]:
# Aplicar LabelEncoder a las columnas categóricas
columnas_label_encoding = feature_eng_configs['label_encoders'].keys()

for col in columnas_label_encoding:
    le = LabelEncoder()
    le.classes_ = feature_eng_configs['label_encoders'][col]  # Cargar las clases del pickle
    # Validar que todos los valores en la columna están dentro de las clases conocidas
    data_test[col] = data_test[col].apply(lambda x: x if x in le.classes_ else None)
    # Transformar solo los valores válidos
    data_test[col] = data_test[col].map(lambda x: le.transform([x])[0] if x is not None else -1)


Otra opción

In [4]:
data_test.drop(['CustomerID'], axis=1, inplace=True)
data_test.head()

Unnamed: 0,Age,Gender,Income,VisitFrequency,AverageSpend,PreferredCuisine,TimeOfVisit,GroupSize,DiningOccasion,MealType,OnlineReservation,DeliveryOrder,LoyaltyProgramMember,WaitTime,ServiceRating,FoodRating,AmbianceRating,HighSatisfaction
0,67,Female,87167,Weekly,152.136795,Chinese,Lunch,4,Celebration,Dine-in,0,1,1,14.722728,5,5,4,1
1,65,Female,45416,Weekly,119.837286,Italian,Breakfast,1,Celebration,Takeaway,1,0,0,8.71786,2,3,5,0
2,54,Female,48233,Weekly,31.424327,American,Lunch,5,Casual,Dine-in,0,0,0,19.326013,5,2,2,0
3,61,Female,69799,Weekly,67.879151,American,Dinner,5,Celebration,Dine-in,0,1,1,24.708839,3,4,2,1
4,52,Female,108514,Monthly,155.891539,Mexican,Lunch,6,Celebration,Dine-in,0,1,1,35.944814,4,3,5,0


In [5]:
columnas_label_encoding = ['Gender', 'VisitFrequency', 'PreferredCuisine', 'TimeOfVisit', 'DiningOccasion', 'MealType']

label_encoder = LabelEncoder()
for col in columnas_label_encoding:
    data_test[col] = label_encoder.fit_transform(data_test[col])

data_test.head()

Unnamed: 0,Age,Gender,Income,VisitFrequency,AverageSpend,PreferredCuisine,TimeOfVisit,GroupSize,DiningOccasion,MealType,OnlineReservation,DeliveryOrder,LoyaltyProgramMember,WaitTime,ServiceRating,FoodRating,AmbianceRating,HighSatisfaction
0,67,0,87167,3,152.136795,1,2,4,2,0,0,1,1,14.722728,5,5,4,1
1,65,0,45416,3,119.837286,3,0,1,2,1,1,0,0,8.71786,2,3,5,0
2,54,0,48233,3,31.424327,0,2,5,1,0,0,0,0,19.326013,5,2,2,0
3,61,0,69799,3,67.879151,0,1,5,2,0,0,1,1,24.708839,3,4,2,1
4,52,0,108514,1,155.891539,4,2,6,2,0,0,1,1,35.944814,4,3,5,0


In [6]:
data_test = data_test.drop(columns=['HighSatisfaction'])
data_test.head()

Unnamed: 0,Age,Gender,Income,VisitFrequency,AverageSpend,PreferredCuisine,TimeOfVisit,GroupSize,DiningOccasion,MealType,OnlineReservation,DeliveryOrder,LoyaltyProgramMember,WaitTime,ServiceRating,FoodRating,AmbianceRating
0,67,0,87167,3,152.136795,1,2,4,2,0,0,1,1,14.722728,5,5,4
1,65,0,45416,3,119.837286,3,0,1,2,1,1,0,0,8.71786,2,3,5
2,54,0,48233,3,31.424327,0,2,5,1,0,0,0,0,19.326013,5,2,2
3,61,0,69799,3,67.879151,0,1,5,2,0,0,1,1,24.708839,3,4,2
4,52,0,108514,1,155.891539,4,2,6,2,0,0,1,1,35.944814,4,3,5


In [7]:
# Cargar el scaler estándar si fue usado previamente
with open('../artifacts/std_scaler.pkl', 'rb') as f:
    std_scaler = pickle.load(f)

#with open('std_scaler.pkl', 'rb') as f:
    #std_scaler = pickle.load(f)

In [8]:
# Estandarizar las variables del dataset de prueba
X_data_test_std = std_scaler.transform(data_test)


In [10]:
# Cargar el modelo guardado
with open('../models/random_forest_v1.pkl', 'rb') as f:
    modelo = pickle.load(f)

#with open('random_forest_v1.pkl', 'rb') as f:
    #modelo = pickle.load(f)

In [11]:
# Realizar predicciones
model_predicts = modelo.predict(X_data_test_std)

# Mostrar las predicciones
print("Predicciones del modelo:")
print(model_predicts)

Predicciones del modelo:
[1 0 0 1 0 0 0 1 0 0 1 1 1 1 0 0 1 1 1 0 0 1 0 0 1 1 0 1 0 1 1 1 0 1 0 0 1
 1 0 1 0 0 1 1 0 1 1 1 0 0 1 0 0 1 1 1 1 1 0 1 0 1 1 0 1 0 0 1 0 1 1 0 0 0
 1 1 1 1 0 1 1 0 0 1 0 1 0 1 1 1 1 1 1 0 1 0 0 0 0 1 0 0 1 1 1 1 1 0 0 1 0
 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 1 1 1 0 1 1 0 0 0 0 1 1 1 0 0 1 0 0 0 1 1
 0 0 1 1 0 0 0 1 0 1 0 1 0 0 1 1 0 0 0 1 1 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1
 1 0 1 0 1 1 0 1 1 0 1 0 0 0 0 1 1 1 1 1 0 1 0 0 0 0 0 1 1 1 0 1 0 0 1 0 1
 1 1 1 0 1 1 0 1 1 1 0 0 1 1 0 1 0 0 0 1 0 1 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1
 0 0 0 0 1 1 0 0 0 1 0 0 1 1 1 1 1 0 1 0 1 1 0 0 0 0 0 0 0 1 1 1 1 1 0 1 0
 0 0 1 1 0 0 0 1 1 0 1 1 1 0 0 0 1 1 0 0 1 0 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0
 0 1 1 1 1 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 1 0 1 1 0 1 1 0 0 0 1 0 1 1 0 1 0
 1 0 1 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 1 1 0 0 1 0 1 1 1 1 0 0 1 0 1 0 1
 0 1 0 1 1 0 0 0 1 0 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 0 1 1 1 0 0 1 1 1 1 0 0
 1 1 0 0 0 0 0 1 0 0 1 1 1 0 0 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 0 0 1 1 0 0
