In [1]:
import os
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv

# Load credentials from .env
load_dotenv()
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
DB_USER = os.getenv("DB_USER")
DB_PASS = os.getenv("DB_PASS")

# Create engine
engine = create_engine(
    f"postgresql+psycopg2://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)

pd.set_option('display.max_rows', None)


### 🧠 Business Question:
What is the average median house price by city in the Zillow dataset?


In [2]:
# Descriptive query: Average median_price by city
sql_query = '''
SELECT 
    city, 
    ROUND(AVG(median_price), 2) AS avg_median_price
FROM 
    sql_project.zillow_raw
GROUP BY 
    city
ORDER BY 
    avg_median_price DESC;
'''

df = pd.read_sql(sql_query, con=engine)
df.head()


Unnamed: 0,city,avg_median_price
0,Los Angeles,880000.0
1,Seattle,720000.0
2,Miami,600000.0
3,Austin,540000.0
4,Denver,510000.0


### 📊 Insight:
Seattle has the highest average house price, while Austin has the lowest in this sample.

### ✅ Recommendation:
If targeting affordability, Austin might be a focus for first-time buyers.

### 🔮 Prediction:
If trends continue, cities like Seattle may become increasingly unaffordable relative to others.


### ❓ Business Question:
Which city has the highest inventory, and how does that relate to its median housing price?


In [3]:
sql_query = '''
SELECT 
    city, 
    inventory, 
    median_price,
    ROUND(median_price::numeric / NULLIF(inventory, 0), 2) AS price_per_home
FROM sql_project.zillow_raw
ORDER BY inventory DESC;
'''

df = pd.read_sql(sql_query, con=engine)
df.head()


Unnamed: 0,city,inventory,median_price,price_per_home
0,Los Angeles,1245,880000,706.83
1,Seattle,1150,720000,626.09
2,Denver,1021,510000,499.51
3,Austin,987,540000,547.11
4,Miami,843,600000,711.74


### 📌 Insight:
Los Angeles has the highest inventory and the highest price per home, indicating that even with greater supply, demand keeps prices high. This suggests inventory alone doesn’t drive pricing.


### ✅ Recommendation:
Investigate demand factors like migration trends and income levels in high-inventory cities to better understand pricing resilience.


### 🔮 Prediction:
Cities like Denver and Austin may see price corrections if inventory keeps rising and demand doesn’t catch up.
