In [1]:
from utils.qdrant_connection import upload_to_qdrant

## User Tests

In [2]:
from config.llm_config import llm
from agents.user_parser_agent import run_user_parser_agent

print("Running REMAS: run_user_parser_agent")
user_input = input("Type your message: ")

response = run_user_parser_agent(user_input=user_input, llm=llm)
print("Agent Response:", response)

Running REMAS: run_user_parser_agent
Agent Response: {'hard_attributes': {'state': ['Queens'], 'picture_url': None, 'price': 2800, 'num_bedrooms': 3, 'available_from': 'September 15'}, 'soft_attributes': 'spacious, near the subway, perfect for families, pet-friendly, close to schools'}


In [2]:
from utils.qdrant_connection import scroll_qdrant
result = scroll_qdrant("user_agent_listings", limit=5)

for point in result[0]:
    print("\n📌 Point ID:", point.id)
    print("Payload:", point.payload)


📌 Point ID: 488f9ccf-0fb9-fafc-ba70-b812e5891018
Payload: {'listing_id': '488f9ccf0fb9fafcba70b812e5891018', 'state': ['Lower East Side'], 'picture_url': None, 'price': 1200, 'bedrooms': 1, 'available_from': 'December', 'soft_attributes': 'vibrant neighborhood, easy access to public transportation, plenty of dining and cultural options', 'source': 'user_query'}

📌 Point ID: 709bf8fe-32ab-82fd-4830-9dd7ec8ec6f6
Payload: {'listing_id': '709bf8fe32ab82fd48309dd7ec8ec6f6', 'state': ['Manhattan'], 'picture_url': None, 'price': None, 'bedrooms': 1, 'available_from': None, 'soft_attributes': 'convenient, close to restaurants and attractions', 'source': 'user_query'}

📌 Point ID: fcd504a9-e040-33df-0277-245f3f1a0cf1
Payload: {'listing_id': 'fcd504a9e04033df0277245f3f1a0cf1', 'state': ['Queens'], 'picture_url': None, 'price': 2800, 'bedrooms': 3, 'available_from': 'September 15', 'soft_attributes': 'spacious, near the subway, perfect for families, pet-friendly, close to schools'}


## Owner Tests

In [1]:
owner_input= " Spacious 1.5-bedroom condo in New York, near the subway. Perfect for families, pet-friendly, and close to schools. $2800 per month, available starting September 15th.  Picture: https://example.com/image99.jpg"



In [2]:
from agents.owner_parser_agent import invoke_owner_parser_agent

invoke_owner_parser_agent(owner_input=owner_input)

✅ Upserted 1 points into 'owner_agent_listings' without resetting the collection.
Listing uploaded with ID: unknown


In [3]:
from utils.qdrant_connection import scroll_qdrant
result = scroll_qdrant("owner_agent_listings", limit=45)

for point in result[0]:
    print("\n📌 Point ID:", point.id)
    print("Payload:", point.payload)



📌 Point ID: 000f06c2-872e-96e6-dd72-c50e8e92923e
Payload: {'listing_id': '000f06c2872e96e6dd72c50e8e92923e', 'state': ['Bed-Stuy', 'Brooklyn', 'New York City'], 'picture_url': None, 'price': None, 'bedrooms': 1, 'available_from': None, 'soft_attributes': 'newly renovated, updated appliances, hardwood floors, split-unit air conditioning, close to restaurants and bars, near subway, quick trip to Manhattan'}

📌 Point ID: 008c41e9-ce04-57bd-e7df-d85c5844b56c
Payload: {'listing_id': '008c41e9ce0457bde7dfd85c5844b56c', 'state': ['San Francisco'], 'picture_url': None, 'price': None, 'bedrooms': 1, 'available_from': None, 'soft_attributes': 'charming, conveniently located near parks and transportation, vibrant Mission district, fully equipped, peaceful living environment, quiet neighbors'}

📌 Point ID: 009d35b9-ca73-94e2-0dff-b40bea252941
Payload: {'listing_id': '009d35b9ca7394e20dffb40bea252941', 'state': ['New York City'], 'picture_url': None, 'price': 800, 'bedrooms': 1, 'available_from': 

## SYNTHIETIC DATA


In [25]:

# -------- prompts --------
OWNER_NORMALIZE_PROMPT = """You will receive a short listing description pulled from Airbnb data (may be informal).
Rewrite it as a concise, normal real-estate OWNER listing (2–4 sentences).
Include: location, bedrooms, notable perks/constraints, monthly price (if present), and availability timing (if present).
Output plain text only."""

CUSTOMER_SYNTH_PROMPT = """You will receive a normal real-estate OWNER listing.
Rewrite it as a RENTER request that naturally matches it (1–3 sentences).
Mention location, min bedrooms, lifestyle hints, max monthly budget (same number), and move-in time if present.
Output plain text only."""

In [26]:
# -------- helpers --------
def _chat(messages: List[dict]) -> str:
    r = client.chat.completions.create(
        model=MODEL_ID,
        messages=messages,
        temperature=TEMP
    )
    return r.choices[0].message.content.strip()

def select_seed_listings(csv_path: str, n: int) -> pd.DataFrame:
    df = pd.read_csv(csv_path)
    df["state"] = df["state"].astype(str).str.upper().str.strip()
    df = df[df["state"].isin(CITY_FILTER)]
    if len(df) > n:
        df = df.sample(n=n, random_state=42)
    return df.reset_index(drop=True)

def build_airbnb_source_text(row: pd.Series) -> str:
    """Compose a compact source text from structured fields (since Airbnb text may be odd)."""
    parts = []
    b = row.get("bedrooms")
    if pd.notna(b):
        parts.append(f"{int(b)}-bedroom")
    city = str(row.get("state") or "").title()
    if city:
        parts.append(f"in {city}")
    soft = (row.get("soft_attributes") or "").strip()
    price = row.get("price")
    avail = (row.get("available_from") or "")
    s = f"{' '.join(parts)}. {soft}".strip()
    if pd.notna(price):
        s += f" Price: ${int(float(price))}/month."
    if isinstance(avail, str) and avail:
        s += f" Available from {avail.title()}."
    return s[:800]

def owner_normalize(text: str) -> str:
    return _chat([
        {"role": "system", "content": OWNER_NORMALIZE_PROMPT},
        {"role": "user", "content": text}
    ])

def customer_from_owner(owner_text: str) -> str:
    return _chat([
        {"role": "system", "content": CUSTOMER_SYNTH_PROMPT},
        {"role": "user", "content": owner_text}
    ])

def save_jsonl(path: str, rows: List[dict]):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, "w", encoding="utf-8") as f:
        for r in rows:
            json.dump(r, f, ensure_ascii=False)
            f.write("\n")

In [27]:
df = select_seed_listings(CSV_PATH, N_SAMPLES)
if df.empty:
    print("No rows after filtering.")
print(f"Selected {len(df)} rows from {sorted(CITY_FILTER)}")
df.to_csv(SELCTED_PATH, index=False)


Selected 1000 rows from ['NEW YORK', 'NEW YORK CITY', 'NYC', 'SAN FRANCISCO']


In [None]:
# Step 1: build source (Airbnb-ish) text → normalize to owner style
owner_inputs: List[str] = []
for _, row in tqdm(df.iterrows(), total=len(df), desc="Normalize owner texts"):
    src = build_airbnb_source_text(row)
    owner_inputs.append(owner_normalize(src))
    time.sleep(0.02)  # gentle pacing

# OLD

In [1]:
# from config.llm_config import llm
# from agents.leave_me_agent import run_leave_me_agent

# def main():
#     print("Running REMAS: Leave Me Agent Example")
#     user_input = input("Type your message: ")

#     response = run_leave_me_agent(user_input=user_input, llm=llm)
#     print("Agent Response:", response)

# if __name__ == "__main__":
#     main()


In [None]:
# from remas_llm import run_llm_agent

# response = run_llm_agent(
#     user_input="Hello there!",
#     system_prompt="You must always respond with: Leave me alone!"
# )
# print("Agent:", response)


In [None]:
# # Load environment variables
# load_dotenv()
# AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
# DEPLOYMENT_NAME = "team8-gpt4o"  
# AZURE_OPENAI_ENDPOINT = "https://096290-oai.openai.azure.com"
# API_VERSION = "2023-05-15"

# # Initialize model
# llm = AzureChatOpenAI(
#     api_key=AZURE_OPENAI_API_KEY,
#     api_version=API_VERSION,
#     azure_endpoint=AZURE_OPENAI_ENDPOINT,
#     deployment_name=DEPLOYMENT_NAME,
# )

In [None]:
# # Token usage log file
# LOG_FILE = "token_usage.csv"

# # Ensure CSV file has headers
# if not os.path.exists(LOG_FILE):
#     with open(LOG_FILE, mode='w', newline='') as f:
#         writer = csv.writer(f)
#         writer.writerow(["timestamp", "input_tokens", "output_tokens", "total_tokens", "user_input"])


In [None]:
# def run_llm_agent(user_input):
#     messages = [
#         SystemMessage(content="You must always respond with: Leave me alone!"),
#         HumanMessage(content=user_input)
#     ]

#     # Token usage tracking
#     with get_openai_callback() as cb:
#         response = llm.invoke(messages)

#         # Log to CSV
#         with open(LOG_FILE, mode='a', newline='') as f:
#             writer = csv.writer(f)
#             writer.writerow([
#                 datetime.now().isoformat(),
#                 cb.prompt_tokens,
#                 cb.completion_tokens,
#                 cb.total_tokens,
#                 user_input
#             ])

#     return response.content

In [None]:
# while True:
#     user_input = input("User: ")
#     if user_input.lower() in {"exit", "quit"}:
#         break
#     response = run_llm_agent(user_input)
#     print("Agent:", response)

Agent: Leave me alone!
Agent: Leave me alone!


In [None]:
# import re

# def extract_outputs(results):
#     outputs = []
#     for r in results:
#         match = re.search(r"<output>\s*(\d)\s*</output>", r)
#         if match and match.group(1) in ["0", "1"]:
#             outputs.append(int(match.group(1)))
#         else:
#             outputs.append(-1)
#     return outputs

# def extract_explanations(results):
#     explanations = []
#     for r in results:
#         match = re.search(r"<explanation>\s*(.*?)\s*</explanation>", r, re.DOTALL)
#         if match:
#             explanations.append(match.group(1).strip())
#         else:
#             explanations.append("")
#     return explanations

In [None]:
# # -------------------- 
# # a function for inserting a system and user messages into a conversation
# # -------------------- 
# def format_prompt(system_msg: str, user_msg: str):
#     return [
#         SystemMessage(content=system_msg), 
#         HumanMessage(content=user_msg)
#     ]

In [7]:
# # -------------------- 
# # invoking a single call
# # -------------------- 

# system_msg = "Answer 'Leave me alone' no matter what you're asked."
# user_msg = "What is the capital of France?"
# prompt = format_prompt(system_msg, user_msg)
# result = gpt_4o.invoke(prompt).content   # output: "Leave me alone."

In [None]:
# # -------------------- 
# # a function for running multiple calls in parallel
# # -------------------- 
# def invoke_llm(model: AzureChatOpenAI, prompts: list[str] | list[list[SystemMessage | HumanMessage]], with_tqdm=True) -> \
#         list[str]:
#     def single_invoke(prompt: str) -> str:
#         try:
#             result = model.invoke(prompt)
#             return result.content
#         except Exception as e:
#             print(f"LLM error: {e}")
#             return "N/A"

#     with ThreadPoolExecutor() as executor:
#         if with_tqdm:
#             results = list(tqdm(
#                 executor.map(single_invoke, prompts),
#                 total=len(prompts),
#                 desc="Processing prompts"
#             ))
#         else:
#             results = list(executor.map(single_invoke, prompts))

#     return results