In [2]:
import pandas as pd

df = pd.read_csv("datajupiter.csv") 
df.head()

Unnamed: 0,id,tipo,producto,material,alto,ancho,precio
0,1,Puerta para casa interior,Puerta interior barras lineales,Material simple,250.0,90.0,300.0
1,2,Puerta para casa interior,Puerta interior barras lineales,Material simple,249.0,90.0,300.0
2,3,Puerta para casa interior,Puerta interior barras lineales,Material simple,251.0,90.0,300.0
3,4,Puerta para casa interior,Puerta interior barras lineales,Material simple,250.0,91.0,300.0
4,5,Puerta para casa interior,Puerta interior barras lineales,Material simple,251.0,93.0,300.0


In [3]:
df = df.drop(columns=['id'])
df = df.drop(columns=['tipo'])

In [4]:
df.head()

Unnamed: 0,producto,material,alto,ancho,precio
0,Puerta interior barras lineales,Material simple,250.0,90.0,300.0
1,Puerta interior barras lineales,Material simple,249.0,90.0,300.0
2,Puerta interior barras lineales,Material simple,251.0,90.0,300.0
3,Puerta interior barras lineales,Material simple,250.0,91.0,300.0
4,Puerta interior barras lineales,Material simple,251.0,93.0,300.0


In [5]:
pip install pandas scikit-learn joblib jupyter

Collecting jupyter
  Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting jupyter-console (from jupyter)
  Downloading jupyter_console-6.6.3-py3-none-any.whl.metadata (5.8 kB)
Collecting ipywidgets (from jupyter)
  Downloading ipywidgets-8.1.7-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.14 (from ipywidgets->jupyter)
  Downloading widgetsnbextension-4.0.14-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab_widgets~=3.0.15 (from ipywidgets->jupyter)
  Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl.metadata (20 kB)
Downloading jupyter-1.1.1-py2.py3-none-any.whl (2.7 kB)
Downloading ipywidgets-8.1.7-py3-none-any.whl (139 kB)
Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl (216 kB)
Downloading widgetsnbextension-4.0.14-py3-none-any.whl (2.2 MB)
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
   --------- ------------------------------ 0.5/2.2 MB 2.1 MB/s eta 0:00:01
   -------------- -----------------

In [6]:
from sklearn.preprocessing import LabelEncoder
# Codificar producto
le_producto = LabelEncoder()
df['producto'] = le_producto.fit_transform(df['producto'])

# Codificar material respetando orden lógico
orden_material = {'Material simple': 0, 'Material intermedio': 1, 'Material resistente': 2}
df['material'] = df['material'].map(orden_material)

In [7]:
df.head()

Unnamed: 0,producto,material,alto,ancho,precio
0,8,0,250.0,90.0,300.0
1,8,0,249.0,90.0,300.0
2,8,0,251.0,90.0,300.0
3,8,0,250.0,91.0,300.0
4,8,0,251.0,93.0,300.0


In [8]:
from sklearn.utils import shuffle
df = shuffle(df, random_state=42)
df.head()

Unnamed: 0,producto,material,alto,ancho,precio
157,4,1,232.0,260.0,1200.0
341,1,1,260.0,400.0,3800.0
315,2,1,350.0,260.0,8000.0
234,6,2,260.0,260.0,1650.0
155,4,1,240.0,260.0,1400.0


In [9]:
# Reemplaza comas y convierte a float
df['precio'] = df['precio'].str.replace(',', '').astype(float)

In [10]:
correlacion = df[['producto', 'material','alto', 'ancho', 'precio']].corr()
print(correlacion)

          producto  material      alto     ancho    precio
producto  1.000000  0.092304 -0.413348 -0.597774 -0.667720
material  0.092304  1.000000 -0.137486  0.059819 -0.122713
alto     -0.413348 -0.137486  1.000000  0.226187  0.790148
ancho    -0.597774  0.059819  0.226187  1.000000  0.464554
precio   -0.667720 -0.122713  0.790148  0.464554  1.000000


In [11]:
!pip install xgboost
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib



In [12]:
# Datos
X = df[['producto', 'material', 'alto', 'ancho']]
y = df['precio']

# Dividir en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modelos
dt_model = DecisionTreeRegressor(random_state=42)
rf_model = RandomForestRegressor(random_state=42)
xgb_model = XGBRegressor(random_state=42, verbosity=0)

# Entrenamiento
dt_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)

# Guardado
joblib.dump(dt_model, 'modelo_arbol.pkl')
joblib.dump(rf_model, 'modelo_random_forest.pkl')
joblib.dump(xgb_model, 'modelo_xgboost.pkl')

['modelo_xgboost.pkl']

In [13]:
from sklearn.model_selection import cross_val_score
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

def evaluar(nombre, y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    print(f"{nombre}:")
    print(" - RMSE:", round(rmse, 2))
    print(" - R²:", round(r2, 4))
    print()

# Función para imprimir resultados de Cross-Validation
def cross_validation_resultado(nombre, modelo, X, y):
    scores = cross_val_score(modelo, X, y, cv=5, scoring='r2')
    print(f"{nombre} - Cross-Validation (R²):")
    print(" - Scores:", np.round(scores, 4))
    print(" - Promedio:", round(np.mean(scores), 4))
    print(" - Desviación estándar:", round(np.std(scores), 4))
    print()

# Predicciones y evaluación
evaluar("Árbol de Decisión", y_test, dt_model.predict(X_test))
evaluar("Random Forest", y_test, rf_model.predict(X_test))
evaluar("XGBoost", y_test, xgb_model.predict(X_test))

# Evaluación Cross-Validation
cross_validation_resultado("Árbol de Decisión", dt_model, X, y)
cross_validation_resultado("Random Forest", rf_model, X, y)
cross_validation_resultado("XGBoost", xgb_model, X, y)


Árbol de Decisión:
 - RMSE: 146.91
 - R²: 0.9966

Random Forest:
 - RMSE: 295.09
 - R²: 0.9861

XGBoost:
 - RMSE: 191.65
 - R²: 0.9941

Árbol de Decisión - Cross-Validation (R²):
 - Scores: [0.9988 0.9729 0.9978 0.9961 0.9996]
 - Promedio: 0.9931
 - Desviación estándar: 0.0101

Random Forest - Cross-Validation (R²):
 - Scores: [0.9961 0.9822 0.9955 0.9947 0.9951]
 - Promedio: 0.9927
 - Desviación estándar: 0.0053

XGBoost - Cross-Validation (R²):
 - Scores: [0.9974 0.994  0.9986 0.9769 0.989 ]
 - Promedio: 0.9912
 - Desviación estándar: 0.0079



In [17]:
import pandas as pd
import joblib

X_prueba = datos_nuevos[['producto', 'material', 'alto', 'ancho']]

# Datos de prueba codificados
datos_nuevos = pd.DataFrame([
    [508.0, 263.0, 2, 1],
    [255.0, 98.0, 11, 2],
    [242.0, 263.0, 6, 1],
    [260.0, 260.0, 5, 1],
    [251.0, 93.0, 9, 0],
    [250.0, 95.0, 10, 1]
], columns=['alto', 'ancho', 'producto', 'material'])

# Cargar el modelo entrenado
modelo = joblib.load('modelo_arbol.pkl') 

# Predecir precios
predicciones = modelo.predict(X_prueba)

# Mostrar resultados
datos_nuevos['precio_predicho'] = predicciones
print(datos_nuevos)

    alto  ancho  producto  material  precio_predicho
0  508.0  263.0         2         1           9500.0
1  255.0   98.0        11         2            980.0
2  242.0  263.0         6         1           1400.0
3  260.0  260.0         5         1           1250.0
4  251.0   93.0         9         0            300.0
5  250.0   95.0        10         1            780.0


In [19]:
# Agregar columna de precios reales
precios_reales = [9500.00, 980.00, 1400.00, 1250.00, 300.00, 780.00]
datos_nuevos['precio_real'] = precios_reales

# Calcular errores
datos_nuevos['error_absoluto'] = abs(datos_nuevos['precio_real'] - datos_nuevos['precio_predicho'])
datos_nuevos['error_porcentual'] = round((datos_nuevos['error_absoluto'] / datos_nuevos['precio_real']) * 100, 2)

# Mostrar resultados
print(datos_nuevos[['producto', 'material', 'alto', 'ancho', 'precio_real', 'precio_predicho', 'error_absoluto', 'error_porcentual']])

   producto  material   alto  ancho  precio_real  precio_predicho  \
0         2         1  508.0  263.0       9500.0           9500.0   
1        11         2  255.0   98.0        980.0            980.0   
2         6         1  242.0  263.0       1400.0           1400.0   
3         5         1  260.0  260.0       1250.0           1250.0   
4         9         0  251.0   93.0        300.0            300.0   
5        10         1  250.0   95.0        780.0            780.0   

   error_absoluto  error_porcentual  
0             0.0               0.0  
1             0.0               0.0  
2             0.0               0.0  
3             0.0               0.0  
4             0.0               0.0  
5             0.0               0.0  
