# **Personalized Real Estate Agent**

In [7]:
# %env OPENAI_API_KEY=your api key

In [8]:
# ===============================================================
#  UDACITY REAL ESTATE AGENT ‚Äî FULL WORKING VERSION
#  Uses TF-IDF Vector Database (Udacity-Compatible)
# ===============================================================

import os
import json
import textwrap
from io import StringIO
from typing import Optional

import pandas as pd
from pydantic import BaseModel

# LLM (OpenAI <1.0)
import openai
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = "https://openai.vocareum.com/v1"

if not openai.api_key:
    raise ValueError("OPENAI_API_KEY not set.")
print("OpenAI client initialized.\n")

# ========================================================
# STEP 1 ‚Äî Generate Listings CSV Using LLM
# ========================================================

prompt = """
Generate 10 synthetic house listings in STRICT CSV format.
Headers: Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description

RULES:
- Output ONLY CSV (header + 10 rows)
- All fields must be enclosed in double quotes
- No commas inside numeric values
- Description must NOT contain commas
- No commentary outside CSV
"""

resp = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": prompt}],
    temperature=0.0
)

csv_text = resp["choices"][0]["message"]["content"].strip()

with open("homes.csv", "w", encoding="utf-8") as f:
    f.write(csv_text)

df = pd.read_csv(StringIO(csv_text), quotechar='"', skipinitialspace=False)

print("=== STEP 1: DataFrame ===")
print(df.head(10))
print("Shape:", df.shape)
print("\n")


# ========================================================
# STEP 2 ‚Äî TF-IDF Vector Database (Udacity-Compatible)
# ========================================================

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def document_text(row):
    """Convert listing into a text representation for TF-IDF."""
    return " ".join([
        str(row["Neighborhood"]),
        str(row["Price"]),
        f"{row['Bedrooms']} bedrooms",
        f"{row['Bathrooms']} bathrooms",
        str(row["House Size"]),
        str(row["Description"])
    ])

docs_raw = [document_text(row) for _, row in df.iterrows()]

vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(docs_raw)

print("=== STEP 2: Vector DB created using TF-IDF ===")
print("Matrix shape:", tfidf_matrix.shape)
print("\n")


# ========================================================
# STEP 3 ‚Äî Extract User Preferences
# ========================================================

class Prefs(BaseModel):
    budget: Optional[int] = None
    bedrooms: Optional[int] = None
    bathrooms: Optional[int] = None
    min_size: Optional[int] = None
    preferences: Optional[str] = None

print("Describe what you want (ex: 3 bedrooms under 350000, 2 bathrooms).")
user_text = input("\nYour text:\n> ")

extract_prompt = f"""
Extract this as JSON:

\"\"\"{user_text}\"\"\"

Format:
{{
 "budget": int|null,
 "bedrooms": int|null,
 "bathrooms": int|null,
 "min_size": int|null,
 "preferences": string|null
}}
"""

resp = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": extract_prompt}],
    temperature=0.0
)

prefs = Prefs(**json.loads(resp["choices"][0]["message"]["content"]))

# Ask for missing fields manually
missing = [field for field, val in prefs.dict().items() if val is None and field != "preferences"]

if missing:
    print("\nMissing fields:", missing)
    for m in missing:
        val = input(f"Enter {m} (or blank): ")
        if val.strip():
            try:
                setattr(prefs, m, int(val))
            except:
                setattr(prefs, m, None)

print("\n=== STEP 3: Final Preferences ===")
print(json.dumps(prefs.dict(), indent=2))


# ========================================================
# STEP 4 ‚Äî Semantic Search Using TF-IDF + Filtering
# ========================================================

query_text = " ".join([
    prefs.preferences or "",
    f"{prefs.bedrooms} bedrooms" if prefs.bedrooms else "",
    f"{prefs.bathrooms} bathrooms" if prefs.bathrooms else "",
    f"{prefs.budget} budget" if prefs.budget else "",
    f"{prefs.min_size} sqft" if prefs.min_size else ""
])

print("\nSemantic Query:", query_text)

query_vec = vectorizer.transform([query_text])
scores = cosine_similarity(query_vec, tfidf_matrix)[0]

df["similarity"] = scores
ranked = df.sort_values(by="similarity", ascending=False)

# Filtering rules
filtered = []
for _, row in ranked.iterrows():
    ok = True

    # Clean numbers
    price = int(str(row["Price"]).replace("$", "").replace(",", ""))
    size = int(str(row["House Size"]).replace("sqft", "").strip())

    if prefs.budget and price > prefs.budget:
        ok = False
    if prefs.bedrooms and row["Bedrooms"] < prefs.bedrooms:
        ok = False
    if prefs.bathrooms and row["Bathrooms"] < prefs.bathrooms:
        ok = False
    if prefs.min_size and size < prefs.min_size:
        ok = False

    if ok:
        filtered.append(row)

print("\n=== STEP 4: Filtered Matches ===")
if not filtered:
    print("No matches found.")
else:
    for i, row in enumerate(filtered[:5], 1):
        print(f"\nMatch #{i}")
        for col in df.columns:
            if col != "similarity":
                print(f"{col}: {row[col]}")


# ========================================================
# STEP 5 ‚Äî Personalized Recommendation (LLM)
# ========================================================

if filtered:
    best = filtered[0].to_dict()

    prompt = f"""
Rewrite this property into a short personalized recommendation.
Keep all facts exactly the same. Add 1‚Äì2 friendly emojis.

Property:
{json.dumps(best, indent=2)}

Buyer preferences:
{json.dumps(prefs.dict(), indent=2)}

Return only the rewritten description.
"""

    resp = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7
    )

    print("\n=== STEP 5: Personalized Recommendation ===")
    print(textwrap.fill(resp["choices"][0]["message"]["content"].strip(), width=100))
else:
    print("No property to personalize.")


OpenAI client initialized.

=== STEP 1: DataFrame ===
      Neighborhood       Price  Bedrooms  Bathrooms House Size  \
0         Westwood    $500,000         3          2  2000 sqft   
1        Brentwood    $750,000         4          3  2500 sqft   
2      Silver Lake    $600,000         2          1  1500 sqft   
3  Hollywood Hills  $1,200,000         5          4  3500 sqft   
4     Venice Beach    $900,000         3          2  1800 sqft   
5        Echo Park    $650,000         2          1  1200 sqft   
6     Santa Monica  $1,000,000         4          3  2200 sqft   
7        Los Feliz    $800,000         3          2  1900 sqft   
8      Culver City    $700,000         2          2  1600 sqft   
9   Marina del Rey    $850,000         3          2  1700 sqft   

                                         Description  
0  Beautiful single-family home with modern upgrades  
1     Spacious two-story house with a large backyard  
2    Charming bungalow with original hardwood floors  