In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

In [2]:
df = pd.read_csv('../data/train_dataset2.csv')
df.fillna(0, inplace=True)  # Opcional: rellenar valores vacÃ­os si los hay
df.head()

Unnamed: 0,Name,SimulatedPrice,RealPrice,Type,Connection,Switches,Rating,Stores,EtiquetaPrecio
0,A4Tech B3370R,22.45,34.49,Keyboard Only,USB,LK Optical Red / Blue,4.0,0,ExcelenteOferta
1,A4Tech B3370R,21.28,34.49,Keyboard Only,USB,LK Optical Red / Blue,4.0,0,ExcelenteOferta
2,A4Tech B3370R,23.1,34.49,Keyboard Only,USB,LK Optical Red / Blue,4.0,0,ExcelenteOferta
3,A4Tech B3370R,22.53,34.49,Keyboard Only,USB,LK Optical Red / Blue,4.0,0,ExcelenteOferta
4,A4Tech B3370R,21.12,34.49,Keyboard Only,USB,LK Optical Red / Blue,4.0,0,ExcelenteOferta


In [3]:
# Target
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df["EtiquetaPrecio"])

# Features categÃ³ricas
cat_features = df[["Name", "Type", "Connection", "Switches"]]

# Features numÃ©ricas
num_features = df[["SimulatedPrice", "Rating", "Stores"]]

In [4]:
cat_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
cat_encoded = cat_encoder.fit_transform(cat_features)

In [5]:
scaler = StandardScaler()
num_scaled = scaler.fit_transform(num_features)

In [6]:
X = np.hstack([num_scaled, cat_encoded])

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [9]:
y_pred = model.predict(X_test)

print("ðŸ“Š Matriz de ConfusiÃ³n:")
print(confusion_matrix(y_test, y_pred))

print("\nðŸ“„ Reporte de ClasificaciÃ³n:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

ðŸ“Š Matriz de ConfusiÃ³n:
[[ 283    0   25    0   28    0    0]
 [   0  216    0    0    0    0   37]
 [  15    0  685    0    0   20    0]
 [   0    0    0  318   32    0   14]
 [  25    0    0   14  404    0    0]
 [   0    0   32    0    0 1277    0]
 [   0   20    0   24    0    0  321]]

ðŸ“„ Reporte de ClasificaciÃ³n:
                 precision    recall  f1-score   support

     BuenPrecio       0.88      0.84      0.86       336
         Estafa       0.92      0.85      0.88       253
ExcelenteOferta       0.92      0.95      0.94       720
     MalaCompra       0.89      0.87      0.88       364
         Normal       0.87      0.91      0.89       443
        Oferton       0.98      0.98      0.98      1309
   PesimaCompra       0.86      0.88      0.87       365

       accuracy                           0.92      3790
      macro avg       0.90      0.90      0.90      3790
   weighted avg       0.92      0.92      0.92      3790



In [10]:
joblib.dump(model, '../model/modelo_multiclase_v3.pkl')
joblib.dump(scaler, '../model/scaler_full_v2.pkl')
joblib.dump(cat_encoder, '../model/encoder_categoricos_v2.pkl')
joblib.dump(label_encoder, '../model/encoder_etiquetas_v2.pkl')

print("âœ… Modelo v3 entrenado y guardado con Ã©xito.")

âœ… Modelo v3 entrenado y guardado con Ã©xito.
