In [None]:
import json
import openai
import time
import re

# OpenAI API setup
openai.api_base = "https://openai.vocareum.com/v1"
api_key = "voc-615907097126677342454766bbd54dcda1a5.27571968"
openai.api_key = api_key

# Number of listings to generate
TOTAL_LISTINGS = 100  
BATCH_SIZE = 50  # Generate 50 at a time
OUTPUT_FILE = "listings.json"

In [None]:
listings_prompt = """
You are an experienced real estate agent with extensive knowledge of property listings across all 50 states in the U.S., 
spanning diverse neighborhoods from luxury estates to budget-friendly homes.

Generate real estate listings following this schema:

Neighborhood: A real neighborhood in a randomly selected city
City: The city where the property is located
State: The state where the property is located
Price: The property price, ranging from $100,000 to $5,000,000
Bedrooms: Number of bedrooms, ranging from 1 to 15
Bathrooms: Number of bathrooms, ranging from 1 to 5
House Size: Property size, ranging from 500 sqft to 50,000 sqft

Description: A 40-word description of the house.
Neighborhood Description: A brief description of the neighborhood.

The more expensive the property, the higher the number of bedrooms and bathrooms, the larger the size, 
and the more detailed the descriptions of the property and neighborhood.

Return exactly {} listings in a structured JSON format.
"""

In [None]:
def clean_json_output(response_text):
    """Removes Markdown JSON formatting from OpenAI output."""
    cleaned_text = re.sub(r"```json\n(.*)\n```", r"\1", response_text, flags=re.DOTALL)
    return cleaned_text

def generate_listings(batch_size):
    """Generates a batch of real estate listings."""
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are an experienced real estate agent."},
                {"role": "user", "content": listings_prompt.format(batch_size)}
            ],
            temperature=0.7
        )

        raw_output = response["choices"][0]["message"]["content"]
        clean_output = clean_json_output(raw_output)  # Remove ` ```json ` wrapping
        return json.loads(clean_output)["listings"]  # Extract listings from JSON
    except Exception as e:
        print(f"Error occurred: {e}")
        return []


In [None]:

# Generate listings in batches and append to file
all_listings = []
for _ in range(1):
    batch = generate_listings(50)
    if batch:
        all_listings.extend(batch)
        with open(OUTPUT_FILE, "w") as f:
            json.dump(all_listings, f, indent=4)
    time.sleep(1)  # Avoid rate limits

print(f"✅ Successfully generated {len(all_listings)} listings and saved to {OUTPUT_FILE}")

In [None]:
batch["choices"][0]["message"]["content"]


# Vector DB


In [None]:
from langchain_openai import OpenAIEmbeddings
import json
from langchain.vectorstores import Chroma
from uuid import uuid4
from langchain_core.documents import Document

listings_path = "listings.json"

with open(listings_path, "r") as f:
    listings = json.load(f)

print(listings[0]) 

In [None]:
# Prepare documents for ChromaDB
documents = [
    Document(
        page_content=listing["Description"],  # Store the main listing text
        metadata={
            "id": str(uuid4()),  # Generate a unique ID
            "neighborhood": listing["Neighborhood"],
            "city": listing["City"],
            "state": listing["State"],
            "price": listing["Price"],
            "bedrooms": listing["Bedrooms"],
            "bathrooms": listing["Bathrooms"],
            "house_size": listing["House Size"],
            "neighborhood_description": listing["Neighborhood Description"],
        }
    )
    for listing in listings
]

# Check the first document
print(documents[10])

# TESTING GRADIO


In [None]:
import gradio as gr
import os

import time  # Debugging execution time

def search_houses(state, city, house_size, max_price, num_bedrooms, amenities, description):
    start_time = time.time()  # Start timing

    # Dummy results in the correct format
    results = [
        ["https://via.placeholder.com/300",  # Image URL
         f"🏡 House in {city}, {state}",      # Title
         f"Size: {house_size} sq ft, Bedrooms: {num_bedrooms}, Price: ${max_price}, "
         f"Amenities: {', '.join(amenities) if amenities else 'No specific amenities selected'}"]
    ]

    end_time = time.time()  # End timing
    print(f"Search function executed in {end_time - start_time:.3f} seconds")  # Debugging output

    return results  # Must be a list of lists (or tuples)

# List of available amenities
amenities_list = ["Pool", "Garage", "Garden", "Gym", "Fireplace", "Balcony"]

# Gradio Interface
with gr.Blocks() as demo:
    # Welcome message
    gr.Markdown("## Welcome to HomeMatch 🏡\nFill in your preferences below and hit 'Search' to find your perfect home!")

    with gr.Row():
        state = gr.Textbox(label="State", placeholder="Enter the state (e.g., California)")
        city = gr.Textbox(label="City", placeholder="Enter the city (e.g., San Francisco)")

    with gr.Row():
        house_size = gr.Textbox(label="House Size (sq ft)", placeholder="e.g., 2000")
        max_price = gr.Textbox(label="Maximum Price", placeholder="e.g., 500000")

    with gr.Row():
        num_bedrooms = gr.Slider(1, 10, step=1, label="Number of Bedrooms", value=3)

    amenities = gr.CheckboxGroup(amenities_list, label="Select Amenities")

    description = gr.Textbox(label="Description", placeholder="Additional details about the house...")

    search_button = gr.Button("Search")

    # Output Display
    results_output = gr.Gallery(label="Search Results")

    # Button Click Action
    search_button.click(
        search_houses,
        inputs=[state, city, house_size, max_price, num_bedrooms, amenities, description],
        outputs=results_output
    )

# Launch the Gradio app
demo.launch(debug=True)

In [None]:
print(search_houses("California", "San Francisco", "2000", "500000", 3, ["Pool", "Garage"], "Nice house"))
