In [11]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [12]:
# =========================================
# 1) Cargar datos y objetivo
# =========================================
import os, json, warnings, platform, datetime
import numpy as np
import pandas as pd
import joblib
warnings.filterwarnings("ignore")

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

DATA_FILE = "house_price.csv"     # <-- tu archivo limpio
TARGET    = "House_Price"                             # precio de venta
assert os.path.exists(DATA_FILE), f"No se encuentra {DATA_FILE}"

df = pd.read_csv(DATA_FILE)
df.info()

#df["clase_salario"] = df["clase_salario"].map({0: "MENOR", 1: "MAYOR"}).astype(str)
#df.info()
#df["clase_salario"].value_counts()

y  = df[TARGET]
X  = df.drop(columns=[TARGET])

print("Shape:", X.shape,
      "| y(mean):", round(y.mean(), 4),
      "| y(std):", round(y.std(), 4),
      "| y[min,max]:", (round(y.min(), 4), round(y.max(), 4)))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Square_Footage        1000 non-null   int64  
 1   Num_Bedrooms          1000 non-null   int64  
 2   Num_Bathrooms         1000 non-null   int64  
 3   Year_Built            1000 non-null   int64  
 4   Lot_Size              1000 non-null   float64
 5   Garage_Size           1000 non-null   int64  
 6   Neighborhood_Quality  1000 non-null   int64  
 7   House_Price           1000 non-null   float64
dtypes: float64(2), int64(6)
memory usage: 62.6 KB
Shape: (1000, 7) | y(mean): 618861.0186 | y(std): 253568.0584 | y[min,max]: (111626.8534, 1108236.8363)


In [13]:
X , y

(     Square_Footage  Num_Bedrooms  Num_Bathrooms  Year_Built  Lot_Size  \
 0              1360             2              1        1981  0.599637   
 1              4272             3              3        2016  4.753014   
 2              3592             1              2        2016  3.634823   
 3               966             1              2        1977  2.730667   
 4              4926             2              1        1993  4.699073   
 ..              ...           ...            ...         ...       ...   
 995            3261             4              1        1978  2.165110   
 996            3179             1              2        1999  2.977123   
 997            2606             4              2        1962  4.055067   
 998            4723             5              2        1950  1.930921   
 999            3268             4              2        1983  3.108790   
 
      Garage_Size  Neighborhood_Quality  
 0              0                     5  
 1            

In [14]:
# =========================================
# 2) Split temprano (80/20)
# =========================================
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20,  random_state=RANDOM_STATE
)
print(f"Train: {X_train.shape} | Test: {X_test.shape}")

Train: (800, 7) | Test: (200, 7)


In [15]:
# =========================================
# 3) Preprocesamiento (en pipeline)
# =========================================
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.feature_selection import VarianceThreshold
from imblearn.pipeline import Pipeline as ImbPipeline  # imblearn solo por consistencia de API

cat_features = X_train.select_dtypes(include=["object","category"]).columns.tolist()
num_features = X_train.select_dtypes(include=["number","bool"]).columns.tolist()

# OneHotEncoder compatible (con fallback)
try:
    ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
except TypeError:
    ohe = OneHotEncoder(handle_unknown="ignore", sparse=False)

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_features),
        ("cat", ohe,              cat_features),
    ],
    remainder="drop",
)

def build_pipe(model):
    # Nota: en regresión NO se usa SMOTE
    return ImbPipeline([
        ("prep", preprocessor),
        ("var0", VarianceThreshold(0.0)),  # limpia columnas constantes tras OHE
        ("model", model),
    ])

print(f"Features numéricas: {num_features}")
print(f"Features categóricas: {cat_features}")

Features numéricas: ['Square_Footage', 'Num_Bedrooms', 'Num_Bathrooms', 'Year_Built', 'Lot_Size', 'Garage_Size', 'Neighborhood_Quality']
Features categóricas: []


In [16]:
# =========================================
# 4) Modelos candidatos (REGRESIÓN)
# =========================================
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

candidates = [
    ("LR",  LinearRegression()),
    ("RG",  Ridge(random_state=RANDOM_STATE)),
    ("LS",  Lasso(random_state=RANDOM_STATE, max_iter=5000)),
    ("EN",  ElasticNet(random_state=RANDOM_STATE, max_iter=5000)),
    ("KNR", KNeighborsRegressor()),
    ("DTR", DecisionTreeRegressor(random_state=RANDOM_STATE)),
    ("RFR", RandomForestRegressor(n_estimators=300, random_state=RANDOM_STATE, n_jobs=-1)),
    ("MLP", MLPRegressor(hidden_layer_sizes=(64,), max_iter=800, random_state=RANDOM_STATE)),
    ("XGB", XGBRegressor(tree_method="hist", random_state=RANDOM_STATE,
                         n_estimators=400, learning_rate=0.05, max_depth=6,
                         subsample=0.9, colsample_bytree=0.9, n_jobs=-1)),
    ("LGB", LGBMRegressor(n_estimators=500, learning_rate=0.05, max_depth=-1,
                          subsample=0.9, colsample_bytree=0.9,
                          random_state=RANDOM_STATE, n_jobs=-1, verbosity=-1)),
    ("CAT", CatBoostRegressor(iterations=600, learning_rate=0.05, depth=6,
                              random_state=RANDOM_STATE, l2_leaf_reg=3.0,
                              verbose=False, allow_writing_files=False, thread_count=-1)),
]

In [17]:
# =========================================
# 5) Baseline con CV (sin tuning)
# =========================================
from sklearn.model_selection import KFold, cross_validate
import pandas as pd

cv = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
scoring = {
    "rmse": "neg_root_mean_squared_error",
    "mae":  "neg_mean_absolute_error",
    "r2":   "r2",
}

rows = []
for name, model in candidates:
    pipe = build_pipe(model)
    scores = cross_validate(pipe, X_train, y_train, cv=cv, scoring=scoring, n_jobs=-1)
    row = {
        "model": name,
        "rmse": -scores["test_rmse"].mean(),
        "mae":  -scores["test_mae"].mean(),
        "r2":    scores["test_r2"].mean(),
    }
    rows.append(row)
    print(f"{name:>3} | RMSE {row['rmse']:.3f} | MAE {row['mae']:.3f} | R² {row['r2']:.3f}")

baseline_df = pd.DataFrame(rows).sort_values("rmse")
display(baseline_df)
baseline_best_name  = baseline_df.iloc[0]["model"]
baseline_best_model = dict(candidates)[baseline_best_name]
print(f">>> Baseline ganador: {baseline_best_name}")


 LR | RMSE 9757.503 | MAE 7713.166 | R² 0.998
 RG | RMSE 9765.618 | MAE 7716.171 | R² 0.998
 LS | RMSE 9757.421 | MAE 7713.188 | R² 0.998
 EN | RMSE 85009.069 | MAE 72951.040 | R² 0.886
KNR | RMSE 84231.890 | MAE 67425.240 | R² 0.888
DTR | RMSE 32801.003 | MAE 25993.194 | R² 0.983
RFR | RMSE 22130.153 | MAE 17358.377 | R² 0.992
MLP | RMSE 666074.755 | MAE 616800.606 | R² -6.002
XGB | RMSE 19750.941 | MAE 15365.104 | R² 0.994
LGB | RMSE 16710.538 | MAE 13094.157 | R² 0.996
CAT | RMSE 14638.624 | MAE 11125.546 | R² 0.997


Unnamed: 0,model,rmse,mae,r2
2,LS,9757.421225,7713.188403,0.998478
0,LR,9757.502718,7713.165548,0.998478
1,RG,9765.617571,7716.170733,0.998475
10,CAT,14638.623957,11125.546492,0.996601
9,LGB,16710.538414,13094.156611,0.995573
8,XGB,19750.941387,15365.104469,0.993834
6,RFR,22130.152828,17358.376705,0.992277
5,DTR,32801.002597,25993.194156,0.983009
4,KNR,84231.890444,67425.239651,0.887603
3,EN,85009.068657,72951.039955,0.8862


>>> Baseline ganador: LS


In [18]:
# =========================================
# 6) Tuning con CV y elección del ganador (rápido)
# =========================================
import tempfile, shutil
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint, uniform
try:
    from scipy.stats import loguniform
except Exception:
    from sklearn.utils.fixes import loguniform

cv_light = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
cv_heavy = KFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE)

param_spaces = {
    "RG":  {"model__alpha": loguniform(1e-3, 1e3)},
    "LS":  {"model__alpha": loguniform(1e-3, 1e2)},
    "EN":  {"model__alpha": loguniform(1e-3, 1e2), "model__l1_ratio": uniform(0.0, 1.0)},
    "KNR": {"model__n_neighbors": randint(2, 50), "model__weights": ["uniform","distance"], "model__p":[1,2]},
    "DTR": {"model__max_depth": randint(3, 16), "model__min_samples_leaf": randint(1, 10)},
    "RFR": {"model__n_estimators": randint(200, 600), "model__max_depth": randint(4, 16),
            "model__min_samples_split": randint(2, 20), "model__min_samples_leaf": randint(1, 10),
            "model__max_features": ["sqrt","log2", None], "model__bootstrap": [True, False]},
    "MLP": {"model__alpha": loguniform(1e-4, 1e-1), "model__learning_rate_init": loguniform(1e-4, 1e-2)},
    "XGB": {"model__n_estimators": randint(250, 600), "model__learning_rate": loguniform(5e-3, 2e-1),
            "model__max_depth": randint(3, 9), "model__subsample": uniform(0.7, 0.3),
            "model__colsample_bytree": uniform(0.7, 0.3), "model__min_child_weight": randint(1, 6)},
    "LGB": {"model__n_estimators": randint(300, 800), "model__learning_rate": loguniform(5e-3, 2e-1),
            "model__num_leaves": randint(16, 128), "model__max_depth": randint(-1, 12),
            "model__min_child_samples": randint(10, 50), "model__subsample": uniform(0.7, 0.3),
            "model__colsample_bytree": uniform(0.7, 0.3), "model__reg_lambda": loguniform(1e-3, 10)},
    "CAT": {"model__iterations": randint(300, 700), "model__learning_rate": loguniform(5e-3, 2e-1),
            "model__depth": randint(4, 10), "model__l2_leaf_reg": loguniform(1e-2, 30),
            "model__border_count": randint(32, 255)},
}

to_tune = [
    ("RG",  Ridge(random_state=RANDOM_STATE)),
    ("EN",  ElasticNet(random_state=RANDOM_STATE, max_iter=5000)),
    ("RFR", RandomForestRegressor(random_state=RANDOM_STATE, n_jobs=1)),
    ("XGB", XGBRegressor(tree_method="hist", random_state=RANDOM_STATE, n_jobs=1)),
    ("LGB", LGBMRegressor(random_state=RANDOM_STATE, n_jobs=1, verbosity=-1)),
    ("CAT", CatBoostRegressor(random_state=RANDOM_STATE, verbose=False, allow_writing_files=False, thread_count=1)),
]

refit_metric = "rmse"  # minimizamos RMSE
scoring = {"rmse": "neg_root_mean_squared_error", "mae": "neg_mean_absolute_error", "r2": "r2"}

best_models = []
cache_dir = tempfile.mkdtemp(prefix="skcache_")
try:
    for name, base_model in to_tune:
        pipe = build_pipe(base_model)
        try: pipe.set_params(memory=cache_dir)
        except: pass
        heavy = name in ["RFR","XGB","LGB","CAT"]
        search = RandomizedSearchCV(
            pipe, param_spaces[name],
            n_iter=(15 if heavy else 12),
            cv=(cv_heavy if heavy else cv_light),
            scoring=scoring, refit="rmse",
            n_jobs=-1, random_state=RANDOM_STATE, verbose=1,
            error_score=np.nan, return_train_score=False
        )
        search.fit(X_train, y_train)
        best_models.append((name, search.best_estimator_, -search.best_score_, search.best_params_))  # RMSE positivo
    best_models.sort(key=lambda x: x[2])  # menor RMSE primero
    best_name, final_pipe_opt, best_cv_rmse, best_params = best_models[0]
    print(f">>> GANADOR OPTIMIZADO: {best_name} (RMSE CV={best_cv_rmse:.3f})")
finally:
    shutil.rmtree(cache_dir, ignore_errors=True)


Fitting 5 folds for each of 12 candidates, totalling 60 fits
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Fitting 3 folds for each of 15 candidates, totalling 45 fits
Fitting 3 folds for each of 15 candidates, totalling 45 fits
Fitting 3 folds for each of 15 candidates, totalling 45 fits
Fitting 3 folds for each of 15 candidates, totalling 45 fits
>>> GANADOR OPTIMIZADO: RG (RMSE CV=9757.503)


In [19]:
# =========================================
# 7) Comparación justa (solo CV) - baseline vs ganador
# =========================================
from sklearn.model_selection import KFold, cross_validate

same_cv = KFold(n_splits=5, shuffle=True, random_state=123)
pipe_baseline_best = build_pipe(baseline_best_model)
pipe_tuned_best    = final_pipe_opt

def cv_rmse(pipe, name):
    s = cross_validate(pipe, X_train, y_train, cv=same_cv,
                       scoring={"rmse":"neg_root_mean_squared_error"}, n_jobs=-1)
    rmse = -s["test_rmse"].mean()
    print(f"{name}: RMSE {rmse:.4f}")
    return rmse

rmse_base = cv_rmse(pipe_baseline_best, f"Baseline({baseline_best_name})")
rmse_tune = cv_rmse(pipe_tuned_best,   f"Tuned({best_name})")

# Regla: si la mejora < 1% del RMSE base, nos quedamos con el baseline (más simple)
if (rmse_base - rmse_tune) / rmse_base >= 0.01:
    winner_name, winner_pipe = best_name, pipe_tuned_best
else:
    winner_name, winner_pipe = baseline_best_name, pipe_baseline_best

print(f">>> Modelo seleccionado para TEST: {winner_name}")


Baseline(LS): RMSE 9842.7132
Tuned(RG): RMSE 9842.7222
>>> Modelo seleccionado para TEST: LS


In [20]:
# =========================================
# 8) Política de decisión (mínima)
# =========================================
POLICY = {
    "clip_to_train_range": True,   # recorta predicciones al rango visto en TRAIN
    "round_to_int": False,         # pon True si el objetivo es entero (conteos)
    "lower": float(y_train.min()),
    "upper": float(y_train.max()),
}
print("Política:", POLICY)

def postprocess_preds(yhat, policy=POLICY):
    ypp = yhat.copy()
    if policy.get("clip_to_train_range", False):
        ypp = np.clip(ypp, policy["lower"], policy["upper"])
    if policy.get("round_to_int", False):
        ypp = np.rint(ypp).astype(int)
    return ypp


Política: {'clip_to_train_range': True, 'round_to_int': False, 'lower': 111626.85342361582, 'upper': 1108236.8362913695}


In [22]:
# =========================================
# 9) Evaluación final en TEST
# =========================================
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

winner_pipe.fit(X_train, y_train)
y_pred = winner_pipe.predict(X_test)
y_pp   = postprocess_preds(y_pred, POLICY)

rmse = np.sqrt(mean_squared_error(y_test, y_pp))
mae  = mean_absolute_error(y_test, y_pp)
r2   = r2_score(y_test, y_pp)

print(f"TEST → RMSE: {rmse:.4f} | MAE: {mae:.4f} | R²: {r2:.4f}")

# vistazo rápido (primeros 10)
import pandas as pd
preview = pd.DataFrame({"y_true": y_test.reset_index(drop=True),
                        "y_pred": pd.Series(y_pp)}).head(10)
print(preview.to_string(index=False))

TEST → RMSE: 10071.5718 | MAE: 8174.7484 | R²: 0.9984
      y_true       y_pred
9.010005e+05 8.686863e+05
4.945375e+05 4.903408e+05
9.494042e+05 9.456671e+05
1.040389e+06 1.033404e+06
7.940100e+05 7.766993e+05
7.240336e+05 7.324451e+05
9.984392e+05 9.950497e+05
9.097134e+05 8.851737e+05
7.926815e+05 7.967311e+05
9.474908e+05 9.317382e+05


In [23]:
# =========================================
# 10) Interpretabilidad + breve error analysis (mínimo, FIX)
# =========================================
import numpy as np
import pandas as pd
from sklearn.inspection import permutation_importance
from sklearn.metrics import mean_absolute_error

# 10.1 ¿Cuánto recorta la política?
raw_pred = winner_pipe.predict(X_test)
clip_low  = (raw_pred < POLICY["lower"]).mean()
clip_high = (raw_pred > POLICY["upper"]).mean()
print(f"[Policy] clipped_low: {clip_low:.3%} | clipped_high: {clip_high:.3%}")

# 10.2 Importancias por Permutación (sobre columnas ORIGINALES)
r = permutation_importance(
    winner_pipe,            # pipeline completa
    X_test, y_test,
    n_repeats=10,
    random_state=RANDOM_STATE,
    scoring="neg_root_mean_squared_error"
)

feat_names = X_test.columns  # <-- CLAVE: mismos nombres que el X de entrada
imp = (pd.DataFrame({
        "feature": feat_names,
        "importance": r.importances_mean,
        "std": r.importances_std
     })
     .sort_values("importance", ascending=False)
     .head(15)
)
print("\nTop-15 importancias (perm, columnas originales):")
print(imp.to_string(index=False))

# 10.3 Errores: resumen + peores casos
y_hat = winner_pipe.predict(X_test)
y_pp  = postprocess_preds(y_hat, POLICY)
res   = pd.DataFrame({
    "y_true": y_test.reset_index(drop=True),
    "y_pred": pd.Series(y_pp)
})
res["abs_err"] = (res["y_true"] - res["y_pred"]).abs()
print("\nResumen de |error|:")
print(res["abs_err"].describe(percentiles=[.1,.25,.5,.75,.9]).to_string())

print("\nPeores 10 casos (|error| alto):")
top_bad_idx = res["abs_err"].nlargest(10).index
print(pd.concat([res.loc[top_bad_idx], X_test.reset_index(drop=True).loc[top_bad_idx]], axis=1)
      .to_string(index=False))

# 10.4 Métricas por subgrupos (ej. clase_salario)
if "clase_salario" in X_test.columns:
    by_cls = (pd.concat([X_test.reset_index(drop=True)[["clase_salario"]], res], axis=1)
              .groupby("clase_salario")["abs_err"]
              .agg(["count","mean","median"]))
    print("\nMAE por clase_salario:")
    print(by_cls.to_string())


[Policy] clipped_low: 0.000% | clipped_high: 0.000%

Top-15 importancias (perm, columnas originales):
             feature    importance          std
      Square_Footage 352695.277099 11688.904969
            Lot_Size  20607.122183  1133.220185
          Year_Built  20037.131367   765.710123
        Num_Bedrooms  12688.127963   633.297337
       Num_Bathrooms   3983.328415   313.814426
         Garage_Size   1618.491318   184.963529
Neighborhood_Quality    -15.831120    13.841836

Resumen de |error|:
count      200.000000
mean      8174.748374
std       5897.793404
min         23.808552
10%       1465.929161
25%       3643.174188
50%       7284.703856
75%      11332.201698
90%      16287.768525
max      32314.198054

Peores 10 casos (|error| alto):
       y_true        y_pred      abs_err  Square_Footage  Num_Bedrooms  Num_Bathrooms  Year_Built  Lot_Size  Garage_Size  Neighborhood_Quality
901000.488234 868686.290180 32314.198054            4012             3              1        2016

In [24]:
Xtr = winner_pipe.named_steps["prep"].transform(X_test)
model = winner_pipe.named_steps["model"]
r2 = permutation_importance(model, Xtr, y_test, n_repeats=10,
                            random_state=RANDOM_STATE,
                            scoring="neg_root_mean_squared_error")
feat_names_ohe = winner_pipe.named_steps["prep"].get_feature_names_out()
imp_ohe = pd.DataFrame({"feature": feat_names_ohe,
                        "importance": r2.importances_mean,
                        "std": r2.importances_std}).sort_values("importance", ascending=False).head(20)


In [25]:
imp_ohe

Unnamed: 0,feature,importance,std
0,num__Square_Footage,352695.277099,11688.904969
4,num__Lot_Size,20607.122183,1133.220185
3,num__Year_Built,20037.131367,765.710123
1,num__Num_Bedrooms,12688.127963,633.297337
2,num__Num_Bathrooms,3983.328415,313.814426
5,num__Garage_Size,1618.491318,184.963529
6,num__Neighborhood_Quality,-15.83112,13.841836


# Interpretación y Conclusiones del Modelo

## 1. Contexto y datos
- **Dataset:** 1 000 registros con 7 variables predictoras numéricas.  
- **Variable objetivo:** `House_Price` (precio de la vivienda).

## 2. Preparación y división
- División **80 % entrenamiento / 20 % prueba**.  
- Preprocesamiento: escalado estándar; no se requirió imputación ni codificación especial.

## 3. Selección de modelos
- Se evaluaron **11 regresores** (lineales, árboles, ensambles y redes).  
- El *baseline* obtuvo **R² ≈ 0.998** y **RMSE ≈ 9 700**.  
- El mejor desempeño lo logró **Lasso Regression (LS)**.

## 4. Ajuste de hiperparámetros
- Se aplicó **RandomizedSearch** en varios modelos (Ridge, ElasticNet, Random Forest, XGB, LightGBM, CatBoost).  
- Las mejoras fueron **menores al 1 %** respecto a Lasso, por lo que se mantuvo este modelo.

## 5. Resultados en el conjunto de prueba
- **RMSE:** ~**10 071**  
- **MAE:** ~**8 175**  
- **R²:** ~**0.9984**  
- El error medio absoluto equivale a **< 2 % del precio real**, mostrando una excelente capacidad de generalización.

## 6. Importancia de variables (permutación)
| Variable               | Importancia aproximada |
|------------------------|------------------------|
| **Square_Footage**     | **~352 k (impacto RMSE)** |
| Lot_Size               | ~20 k |
| Year_Built             | ~20 k |
| Num_Bedrooms           | ~12 k |
| Num_Bathrooms          | ~4 k |
| Garage_Size            | ~1.6 k |
| Neighborhood_Quality   | ~0 (sin aporte) |

- **Square_Footage** (metros cuadrados) es, por amplio margen, el factor más influyente.
- *Neighborhood_Quality* prácticamente no aporta valor predictivo.

## 7. Análisis de errores
- Error absoluto medio: **~8 175**.  
- Máximo error: **~32 300** en propiedades atípicas (muy grandes o muy pequeñas).

---

## **Conclusión general**
El modelo **lineal regularizado (Lasso)** explica casi **toda la variabilidad de los precios** (R² ≈ 0.998) y predice con **alta precisión**.  
El **tamaño de la vivienda** es el principal determinante del precio, seguido por **tamaño del terreno**, **año de construcción** y **número de dormitorios**.  
Variables como *Neighborhood_Quality* aportan muy poco, lo que sugiere que **para estimar precios basta con las características físicas principales**.  
No se requiere un modelo más complejo.