In [None]:
# Housing Analysis in Mexico
# Notebook path: notebooks/housing_analysis.ipynb

# 1️⃣ Imports
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# 2️⃣ Set folder paths
DATA_PATH = "../data/raw"
OUTPUT_PATH = "../outputs"

os.makedirs(OUTPUT_PATH, exist_ok=True)

# 3️⃣ Load data
# Replace 'housing_data.csv' with your actual CSV file name in data/raw/
data_file = os.path.join(DATA_PATH, "housing_data.csv")

try:
    df = pd.read_csv(data_file)
    print("Data loaded successfully ✅")
except FileNotFoundError:
    print(f"File not found: {data_file}")

# 4️⃣ Quick exploration
if 'df' in globals():
    print("First 5 rows:")
    display(df.head())
    
    print("\nData info:")
    print(df.info())
    
    print("\nMissing values:")
    print(df.isnull().sum())
    
# 5️⃣ Example visualizations
if 'df' in globals():
    # Distribution of prices
    plt.figure(figsize=(10,6))
    sns.histplot(df['price'], bins=30, kde=True)
    plt.title("Distribution of Housing Prices")
    plt.xlabel("Price")
    plt.ylabel("Count")
    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_PATH, "price_distribution.png"))
    plt.show()
    
    # Price vs Size
    if 'size' in df.columns:
        plt.figure(figsize=(10,6))
        sns.scatterplot(data=df, x='size', y='price')
        plt.title("Price
