## Generating 20 home listings 

In [22]:
from pathlib import Path
import json, os, time
from typing import List
import json, time
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from langchain.embeddings import HuggingFaceEmbeddings

In [10]:
import os
from dotenv import load_dotenv
load_dotenv() #load all the env variables

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [12]:
# --- 1. model --------------------------------------------------------------
llm = ChatGroq(
    model_name="Llama3-8b-8192",  # or another Groq model
    temperature=0.8,
    max_tokens=512   # plenty for one JSON listing
)

llm.invoke("Hello")

AIMessage(content="Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 11, 'total_tokens': 37, 'completion_time': 0.033664224, 'prompt_time': 0.003691001, 'queue_time': 0.124086486, 'total_time': 0.037355225}, 'model_name': 'Llama3-8b-8192', 'system_fingerprint': 'fp_8af39bf2ae', 'finish_reason': 'stop', 'logprobs': None}, id='run--68afad66-f4e1-427a-9ab6-b09a432a7cae-0', usage_metadata={'input_tokens': 11, 'output_tokens': 26, 'total_tokens': 37})

In [14]:
#defining a Pydantic outputparser to format the llm output
class Listing(BaseModel):
    neighborhood: str = Field(..., description="Name of the neighborhood")
    price: int = Field(..., description="Listing price in whole US dollars")
    bedrooms: int = Field(..., description="Number of bedrooms (1-6)")
    bathrooms: int = Field(..., description="Number of bathrooms (1-4)")
    house_size: str = Field(..., description='Living area, e.g. "2150 sqft"')
    description: str = Field(
        ..., description="5–6 engaging sentences describing the property"
    )
    neighborhood_description: str = Field(
        ..., description="3–4 sentences describing the neighborhood"
    )
#defining the parser
parser = PydanticOutputParser(pydantic_object=Listing)

In [15]:
# designing a Prompt template (the parser adds format instructions automatically)
prompt = PromptTemplate(
    template=(
        "You are an expert real-estate copywriter.\n\n"
        "Generate a **fictional but realistic** property listing that follows this brief:\n"
        "• Each call must describe a different neighborhood.\n"
        "• Keep data plausible and coherent.\n\n"
        "{format_instructions}\n"
    ),
    input_variables=[],                       # no runtime variables needed
    partial_variables={
        "format_instructions": parser.get_format_instructions()
    },
)

In [16]:
# Example LLM chain
chain = prompt | llm | parser    

# Generate one listing
listing = chain.invoke({})
print(listing)

neighborhood='River Oaks' price=949000 bedrooms=5 bathrooms=4 house_size='4200 sqft' description="Welcome to this stunning River Oaks estate, boasting 5 spacious bedrooms, 4 luxurious bathrooms, and a sprawling 4200 sqft of living area. Upon entry, you'll be greeted by a grand foyer with soaring ceilings and elegant lighting. The open-concept living area features a gourmet kitchen, complete with high-end appliances and ample counter space. The expansive backyard is perfect for entertaining, with a sparkling pool, outdoor kitchen, and lush landscaping. This property is a true gem, offering the perfect blend of luxury and livability." neighborhood_description='River Oaks is a charming and historic neighborhood nestled in the heart of Houston. Known for its beautiful tree-lined streets, tranquil atmosphere, and strong sense of community, River Oaks is a highly sought-after destination for families and professionals alike. With its proximity to downtown Houston, major highways, and world-c

In [18]:
# ---------------------------------------------------------------------
# Generate N listings with retry / pacing
# ---------------------------------------------------------------------
def generate_listings(
    n: int,
    chain,                     # the LangChain pipeline (prompt | llm | parser)
    pause: float = 0.3,        # polite delay between calls in seconds
    max_retries: int = 2,      # how many times to retry on an error
) -> List[Listing]:
    """Generate *n* real-estate listings via the Groq chain."""
    
    listings: List[Listing] = []
    
    for i in range(n):
        attempts = 0
        while attempts <= max_retries:
            try:
                listing = chain.invoke({})        # no inputs thanks to partial_variables
                listings.append(listing)
                break                            # success → exit retry loop
            except Exception as e:
                attempts += 1
                if attempts > max_retries:
                    print(f"[{i}] failed after {max_retries} retries → {e}")
                else:
                    print(f"[{i}] error, retrying ({attempts}) → {e}")
                    time.sleep(pause)
        time.sleep(pause)   # pacing to avoid rate limits
    return listings

In [20]:
# ---------------------------------------------------------------------
# Optional helper: save to JSON
# ---------------------------------------------------------------------
def save_listings(
    listings: List[Listing],
    path: str = "./listings/listings.json",
):
    """Serialize Listing objects to pretty-printed JSON."""
    
    Path(path).parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w") as fp:
        json.dump([l.dict() for l in listings], fp, indent=2)
    print(f"✅ Saved {len(listings)} listings → {path}")

data = generate_listings(50, chain)
save_listings(data)

[2] error, retrying (1) → Failed to parse Listing from completion {"neighborhood": ["River Oaks"], "price": 1250000, "bedrooms": 4, "bathrooms": 3, "house_size": "3500 sqft", "description": "Impeccably maintained colonial-style home in River Oaks, one of Houston's most sought-after neighborhoods. This stunning property boasts a sleek and modern interior, complete with high ceilings, gleaming hardwood floors, and expansive windows. The spacious living room flows seamlessly into the gourmet kitchen, equipped with top-of-the-line appliances and ample storage. Four generously sized bedrooms, including a serene master retreat, offer ample space for relaxation and rejuvenation. Enjoy the tranquil Texas climate in the lush backyard, complete with a sparkling pool and lush greenery.", "neighborhood_description": "River Oaks is a picturesque neighborhood in west-central Houston, known for its stunning architecture, picturesque parks, and family-friendly atmosphere. Residents enjoy easy access t

/tmp/ipykernel_126020/4170285092.py:12: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  json.dump([l.dict() for l in listings], fp, indent=2)


In [27]:
#Loading an embedding model from huggingface
embedding_model = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs={"device": "cpu"}  # or "cuda" if GPU is available
)
embedding = embedding_model.embed_query("I am looking for a wonderful house in a cozy neighboorhood")
len(embedding)

768

In [None]:
#Load the listings
# 2. -------------- Load listings.json -------------------------------------
LISTING_FILE = "./listings/listings.json"

with open(LISTING_FILE, "r") as fp:
    raw_listings = json.load(fp) 

In [None]:
from pydantic import BaseModel, Field
from typing import Optional, List

# Defining a pydantic output parser for our query cleaning LLM
class BuyerPreferences(BaseModel):
    bedrooms: Optional[int] = Field(None, description="Number of bedrooms")
    bathrooms: Optional[int] = Field(None, description="Number of bathrooms")
    house_size: Optional[str] = Field(None, description="Desired house size (e.g. '2000 sqft')")
    amenities: Optional[List[str]] = Field(None, description="Desired amenities (e.g. backyard, solar panels)")
    transportation: Optional[List[str]] = Field(None, description="Transportation preferences (e.g. bike paths, public transit)")
    neighborhood_traits: Optional[List[str]] = Field(None, description="Neighborhood qualities (e.g. quiet, walkable)")
    price_range: Optional[str] = Field(None, description="Approximate price range or budget")
    lifestyle: Optional[str] = Field(None, description="Lifestyle fit, e.g. remote work, family-friendly")
    summary: str = Field(..., description="One concise summary sentence combining the above preferences")
