<a href="https://colab.research.google.com/github/Mario2216/API/blob/main/PrecioDeCasas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# ============================================================================
# AN√ÅLISIS COMPLETO DE PRECIOS DE CASAS - VERSI√ìN FINAL CORREGIDA
# ============================================================================
# Compatible con Google Colab
# Acepta cualquier nombre de archivo CSV
# ============================================================================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from matplotlib.backends.backend_pdf import PdfPages
import warnings
import io
warnings.filterwarnings('ignore')

# Configuraci√≥n de estilo
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("="*70)
print("  AN√ÅLISIS DE PREDICCI√ìN DE PRECIOS DE CASAS")
print("="*70)

# ============================================================================
# 1. CARGA DE DATOS
# ============================================================================

print("\n[1/9] Cargando datos...")

# Para Google Colab
try:
    from google.colab import files

    print("üìÅ Sube tu archivo CSV de entrenamiento:")
    uploaded = files.upload()

    # Obtener el primer archivo subido (funciona con cualquier nombre)
    filename = list(uploaded.keys())[0]
    print(f"‚úì Archivo detectado: {filename}")

    # Cargar el archivo
    df = pd.read_csv(io.BytesIO(uploaded[filename]))

except ImportError:
    # Para ejecuci√≥n local (fuera de Google Colab)
    print("‚ö†Ô∏è  No est√°s en Google Colab. Usando archivo local...")
    df = pd.read_csv('train.csv')

print(f"‚úì Datos cargados: {df.shape[0]} filas √ó {df.shape[1]} columnas")
print(f"\nPrimeras 5 filas:")
print(df.head())

# ============================================================================
# 2. AN√ÅLISIS EXPLORATORIO
# ============================================================================

print("\n[2/9] An√°lisis exploratorio de datos...")

# Verificar que existan las columnas necesarias
if 'SalePrice' not in df.columns:
    print("‚ùå ERROR: No se encuentra la columna 'SalePrice'")
    print("Columnas disponibles:", df.columns.tolist())
    raise ValueError("Falta la columna 'SalePrice'")

# Estad√≠sticas de SalePrice
print(f"\nüìä Estad√≠sticas de SalePrice:")
print(df['SalePrice'].describe())

# Valores faltantes
missing = df.isnull().sum()
missing = missing[missing > 0].sort_values(ascending=False)
print(f"\n‚úì Variables con valores faltantes: {len(missing)}")
if len(missing) > 0:
    print(f"Top 5 variables con m√°s faltantes:")
    print(missing.head())

# ============================================================================
# 3. DIVISI√ìN TRAIN/TEST
# ============================================================================

print("\n[3/9] Dividiendo datos en Train/Test...")

# Remover Id si existe
columns_to_drop = ['SalePrice']
if 'Id' in df.columns:
    columns_to_drop.insert(0, 'Id')

X = df.drop(columns_to_drop, axis=1)
y = df['SalePrice']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"‚úì Train: {len(X_train)} muestras ({len(X_train)/len(df)*100:.1f}%)")
print(f"‚úì Test:  {len(X_test)} muestras ({len(X_test)/len(df)*100:.1f}%)")

# ============================================================================
# 4. PREPROCESAMIENTO
# ============================================================================

print("\n[4/9] Preprocesando datos...")

# Identificar variables num√©ricas y categ√≥ricas
num_vars = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
cat_vars = X.select_dtypes(include='object').columns.tolist()

print(f"‚úì Variables num√©ricas: {len(num_vars)}")
print(f"‚úì Variables categ√≥ricas: {len(cat_vars)}")

# Pipeline num√©rico
num_pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Pipeline categ√≥rico
cat_pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

# Combinar pipelines
preprocessor = ColumnTransformer([
    ('num', num_pipe, num_vars),
    ('cat', cat_pipe, cat_vars)
])

# Aplicar transformaciones
print("  Transformando datos...")
X_train_proc = preprocessor.fit_transform(X_train)
X_test_proc = preprocessor.transform(X_test)

print(f"‚úì Datos preprocesados: {X_train_proc.shape[1]} features despu√©s de encoding")

# ======================

  AN√ÅLISIS DE PREDICCI√ìN DE PRECIOS DE CASAS

[1/9] Cargando datos...
üìÅ Sube tu archivo CSV de entrenamiento:


Saving train.csv to train (3).csv
‚úì Archivo detectado: train (3).csv
‚úì Datos cargados: 1460 filas √ó 81 columnas

Primeras 5 filas:
   Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \
0   1          60       RL         65.0     8450   Pave   NaN      Reg   
1   2          20       RL         80.0     9600   Pave   NaN      Reg   
2   3          60       RL         68.0    11250   Pave   NaN      IR1   
3   4          70       RL         60.0     9550   Pave   NaN      IR1   
4   5          60       RL         84.0    14260   Pave   NaN      IR1   

  LandContour Utilities  ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold  \
0         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   
1         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      5   
2         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      9   
3         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   
4         Lvl 