In [1]:
import pandas as pd

df = pd.read_csv('../Data/mobilsNET.csv')

In [2]:
import numpy as np

probabilidad_descuento = 0.20

df['target'] = np.random.rand(len(df)) < probabilidad_descuento
df['Precio_descuento'] = df['Precio'].where(~df['target'], df['Precio'] * (1 - np.random.uniform(0.05, 0.20, size=len(df))))

In [3]:
df["Precio Inicial"] = df["Precio Inicial"].str.replace(".", "", regex=False).str.replace(",", ".").astype(float)
df = pd.get_dummies(df, columns=["Marca"], prefix="Marca")
df[["Ancho", "Alto", "Profundidad"]]=df["Dimensiones"].str.replace(",", ".").str.extract(r"([\d.]+) x ([\d.]+) x ([\d.]+)").astype(float)
df.drop(columns=["Dimensiones", "Unnamed: 0"], inplace=True)

In [4]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=["target", "Precio_descuento"])
y = df["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Tamaño del dataset de entrenamiento: {X_train.shape[0]}")
print(f"Tamaño del dataset de prueba: {X_test.shape[0]}")

Tamaño del dataset de entrenamiento: 1248
Tamaño del dataset de prueba: 312


In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Support Vector Machine": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier()
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy

print("--- Resultados finales ---")
for model, acc in results.items():
    print(f"{model}: {acc*100:.4f}")

--- Resultados finales ---
Random Forest: 78.8462
Logistic Regression: 79.4872
Support Vector Machine: 80.1282
K-Nearest Neighbors: 75.6410


In [6]:
import joblib

joblib.dump(model, 'mobile_discount_model.pkl')

['mobile_discount_model.pkl']