In [7]:
import matplotlib.pyplot as plt
import numpy as np
import random
import statsmodels.api as sm
import statsmodels.stats.diagnostic as smd
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
import seaborn as sns
import statsmodels.api as sm
import scipy.stats as stats
import statsmodels.stats.diagnostic as diag
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.impute import SimpleImputer

from sklearn.compose import make_column_selector as selector #Para seleccionar de forma automática las variables numéricas y categóricas
from sklearn.preprocessing import OneHotEncoder #Para codificar las variables categóricas usando dummies
from sklearn.preprocessing import StandardScaler #Para normalizar las variables numéricas
from sklearn.compose import ColumnTransformer #Modifica las columnas usando los preprocesadores
from sklearn.pipeline import make_pipeline #Planifica una secuencia de procesos
from sklearn import set_config #Para mostrar graficamente el pipeline
from sklearn.model_selection import GridSearchCV

set_config(display='diagram')
#Metrics
from sklearn.metrics import make_scorer, accuracy_score,precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score ,precision_score,recall_score,f1_score

In [2]:

df = pd.read_csv('train.csv')
df_clean = df.dropna(subset=['OverallQual', 'GrLivArea', 'TotalBsmtSF', 'SalePrice'])


print("Dimensiones del DataFrame:", df.shape)
print(df.head())

Dimensiones del DataFrame: (1460, 81)
   Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \
0   1          60       RL         65.0     8450   Pave   NaN      Reg   
1   2          20       RL         80.0     9600   Pave   NaN      Reg   
2   3          60       RL         68.0    11250   Pave   NaN      IR1   
3   4          70       RL         60.0     9550   Pave   NaN      IR1   
4   5          60       RL         84.0    14260   Pave   NaN      IR1   

  LandContour Utilities  ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold  \
0         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   
1         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      5   
2         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      9   
3         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   
4         Lvl    AllPub  ...        0    NaN   NaN         NaN       0     12   

  YrSold  SaleType  SaleCondit

In [3]:
df_clean = df.dropna(subset=['OverallQual', 'GrLivArea', 'TotalBsmtSF', 'SalePrice'])

precio_33 = np.percentile(df_clean['SalePrice'], 33)
precio_66 = np.percentile(df_clean['SalePrice'], 66)

def categorizar_precio(precio):
    if precio <= precio_33:
        return 'Económica'
    elif precio <= precio_66:
        return 'Intermedia'
    else:
        return 'Cara'


df_clean['PriceCategory'] = df_clean['SalePrice'].apply(categorizar_precio)

print(df_clean['PriceCategory'].value_counts(normalize=True))

PriceCategory
Cara          0.340411
Económica     0.330822
Intermedia    0.328767
Name: proportion, dtype: float64


In [11]:

X = df_clean.drop(['Id','SalePrice','PriceCategory'], axis=1)
y = df_clean['PriceCategory']


X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.20,
    random_state=42,
    stratify=y
)


preprocesador = ColumnTransformer([
    ("num", make_pipeline(
        SimpleImputer(strategy="median"),
        StandardScaler()
    ), selector(dtype_include=["int64","float64"])),
    ("cat", make_pipeline(
        SimpleImputer(strategy="most_frequent"),
        OneHotEncoder(handle_unknown="ignore")
    ), selector(dtype_include=object))
])


# Usa 'preprocessor_fixed' en lugar de 'preprocesador'
modelo1 = make_pipeline(
    preprocesador,
    MLPClassifier(hidden_layer_sizes=(5,3), activation="tanh", solver="adam", max_iter=300, verbose=True, random_state=42)
)
modelo2 = make_pipeline(
    preprocesador,
    MLPClassifier(hidden_layer_sizes=(10,5), activation="relu", solver="adam", max_iter=300, verbose=True, random_state=42)
)

# Entrena y evalúa
modelo1.fit(X_train, y_train)
modelo2.fit(X_train, y_train)
# … evaluación como antes …


# Evaluar los modelos en el conjunto de prueba
for i, modelo in enumerate([modelo1, modelo2], start=1):
    y_pred = modelo.predict(X_test)
    print(f"\n--- Evaluación Modelo {i} ---")
    print("Accuracy :", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred, average='weighted'))
    print("Recall   :", recall_score(y_test, y_pred, average='weighted'))
    print("F1-score :", f1_score(y_test, y_pred, average='weighted'))
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Iteration 1, loss = 1.19514329
Iteration 2, loss = 1.09905710
Iteration 3, loss = 1.01297718
Iteration 4, loss = 0.94013716
Iteration 5, loss = 0.87743444
Iteration 6, loss = 0.83291879
Iteration 7, loss = 0.79894293
Iteration 8, loss = 0.77114572
Iteration 9, loss = 0.74849473
Iteration 10, loss = 0.72914893
Iteration 11, loss = 0.71272449
Iteration 12, loss = 0.69745531
Iteration 13, loss = 0.68380692
Iteration 14, loss = 0.67112269
Iteration 15, loss = 0.65955715
Iteration 16, loss = 0.64923108
Iteration 17, loss = 0.63872574
Iteration 18, loss = 0.62899549
Iteration 19, loss = 0.61976816
Iteration 20, loss = 0.61053097
Iteration 21, loss = 0.60181694
Iteration 22, loss = 0.59314861
Iteration 23, loss = 0.58499923
Iteration 24, loss = 0.57694785
Iteration 25, loss = 0.56887984
Iteration 26, loss = 0.56112805
Iteration 27, loss = 0.55348571
Iteration 28, loss = 0.54591316
Iteration 29, loss = 0.53858579
Iteration 30, loss = 0.53179884
Iteration 31, loss = 0.52466459
Iteration 32, los



Iteration 11, loss = 0.89996746
Iteration 12, loss = 0.89138969
Iteration 13, loss = 0.88305902
Iteration 14, loss = 0.87605542
Iteration 15, loss = 0.86862202
Iteration 16, loss = 0.86152548
Iteration 17, loss = 0.85467553
Iteration 18, loss = 0.84779751
Iteration 19, loss = 0.84091089
Iteration 20, loss = 0.83353346
Iteration 21, loss = 0.82603282
Iteration 22, loss = 0.81857613
Iteration 23, loss = 0.81132273
Iteration 24, loss = 0.80433331
Iteration 25, loss = 0.79625383
Iteration 26, loss = 0.78883047
Iteration 27, loss = 0.78214403
Iteration 28, loss = 0.77530864
Iteration 29, loss = 0.76891856
Iteration 30, loss = 0.76303794
Iteration 31, loss = 0.75655760
Iteration 32, loss = 0.75147573
Iteration 33, loss = 0.74372864
Iteration 34, loss = 0.73819014
Iteration 35, loss = 0.73101499
Iteration 36, loss = 0.72474366
Iteration 37, loss = 0.71880236
Iteration 38, loss = 0.71279568
Iteration 39, loss = 0.70678816
Iteration 40, loss = 0.70045024
Iteration 41, loss = 0.69430310
Iteratio

