Cell 1 — Setup & Inputs (Miami)

In [1]:
# --- Setup imports & helpers ---
from agents.owner_parser_agent import invoke_owner_parser_agent
from agents.user_parser_agent import invoke_user_parser_agent
from agents.matching_agent import match_for_new_user, match_for_new_owner
from utils.qdrant_connection import client
from qdrant_client.models import PayloadSchemaType
from qdrant_client.http import exceptions as qexc

SIM_COLLECTION = "similarity_collection"

def ensure_similarity_indexes():
    """Ensure similarity_collection has the payload indexes needed for filtering."""
    for field, schema in [
        ("owner_id", PayloadSchemaType.KEYWORD),
        ("user_id",  PayloadSchemaType.KEYWORD),
    ]:
        try:
            client.create_payload_index(
                collection_name=SIM_COLLECTION,
                field_name=field,
                field_schema=schema,
            )
            print(f"✅ Index ready on '{field}'")
        except qexc.UnexpectedResponse as e:
            if "already exists" in str(e).lower():
                print(f"ℹ️ Index on '{field}' already exists")
            else:
                raise

# --- Miami test inputs ---
owner_input = """
Bright 3-bedroom apartment in Miami Beach, FL, 8 minutes’ walk to the ocean.
Modern building with pool & gym, secure entry, and on-site laundry.
$2,100 per month, available in September. Great for remote work; quiet street near cafes.
Picture: https://example.com/miami/apt1.jpg
""".strip()

user_input = """
Looking for a 3-bedroom in Miami or Miami Beach, ideally near the water and cafes.
Budget up to $2,400/month. Prefer building amenities (pool/gym), quiet for remote work.
Move-in around September.
""".strip()

print("Inputs ready.")


Inputs ready.


Parse & Upload (Owner + User)

In [2]:
print("➕ Uploading owner listing…")
owner_point_id = invoke_owner_parser_agent(owner_input)
print("Owner point id:", owner_point_id)

print("\n➕ Uploading user query…")
user_point_id = invoke_user_parser_agent(user_input)
print("User point id:", user_point_id)

# Make sure similarity_collection has indexes (in case it was rebuilt)
ensure_similarity_indexes()


➕ Uploading owner listing…
✅ Upserted 1 points into 'owner_agent_listings' without resetting the collection.
✅ Uploaded owner listing with ID: b8492286d1946c8ded7b961b32bd1bb5 to 'owner_agent_listings'
✅ Uploaded profile to owner_profiles: Matthew Jackson
Owner point id: b8492286d1946c8ded7b961b32bd1bb5

➕ Uploading user query…
✅ Upserted 1 points into 'user_agent_listings' without resetting the collection.
✅ Uploaded user query with ID: d09dd8a3b791e3ba85c0730f6ba746d9 to 'user_agent_listings'
✅ Uploaded profile to user_profiles: Alice Jackson
User point id: d09dd8a3b791e3ba85c0730f6ba746d9
✅ Index ready on 'owner_id'
✅ Index ready on 'user_id'


Match Both Directions & Show Details

In [3]:
print("\n🔎 Matching for NEW USER → owner listings …")
user_matches = match_for_new_user(user_point_id)

print("🔎 Matching for NEW OWNER → user queries …")
owner_matches = match_for_new_owner(owner_point_id)

from utils.qdrant_connection import print_owner_matches_with_details, print_user_matches_with_details

print_user_matches_with_details(user_matches, top_k=5)
print_owner_matches_with_details(owner_matches, top_k=5)



🔎 Matching for NEW USER → owner listings …
🔎 Matching for NEW OWNER → user queries …
Top 5 owners for this user:
  score=0.6890  owner_id=b8492286-d194-6c8d-ed7b-961b32bd1bb5
    [state: Miami Beach, Florida | price: 2100 | bedrooms: 3 | available_from: September] soft: bright apartment, modern building with pool and gym, secure entry, on-site laundry, great for remote work, quiet street …
  score=0.4685  owner_id=b16e74a7-b701-38bb-515c-a070003b8924
    [state: Miami | price: 2400 | bedrooms: 3 | available_from: July] soft: centrally located, excellent amenities, welcoming community
  score=0.4413  owner_id=7500cc5a-558d-c61a-cb54-8317909269ca
    [state: Miami | price: 2400 | bedrooms: 3 | available_from: March] soft: spacious loft, converted textile factory, high ceilings, concrete floors, minimal noise, access to furnished rooftop, cl…
  score=0.4213  owner_id=5b6a36c9-4fd7-89dd-5771-65488e5afad8
    [state: Miami | price: 1600 | bedrooms: 3 | available_from: June] soft: located i

[{'user_id': '67bc54d5-321e-322d-5a8f-6142427bab54',
  'owner_id': 'b8492286-d194-6c8d-ed7b-961b32bd1bb5',
  'score': 0.6889799,
  'filter_used': {'collection': 'user_agent_listings',
   'state': ['Miami Beach', 'Florida'],
   'price_owner': 2100,
   'bedrooms_owner': 3,
   'available_from': 'September'},
  'timestamp': 1755192238.7120147},
 {'user_id': 'd09dd8a3-b791-e3ba-85c0-730f6ba746d9',
  'owner_id': 'b8492286-d194-6c8d-ed7b-961b32bd1bb5',
  'score': 0.6889799,
  'filter_used': {'collection': 'user_agent_listings',
   'state': ['Miami Beach', 'Florida'],
   'price_owner': 2100,
   'bedrooms_owner': 3,
   'available_from': 'September'},
  'timestamp': 1755192238.7120147}]

Profiles (Owner & User)

In [4]:
from utils.qdrant_connection import get_user_profile, get_owner_profile

owner_profile = get_owner_profile(owner_point_id)  # dict or None
user_profile  = get_user_profile(user_point_id)    # dict or None

print("Owner profile:", owner_profile)
print("User profile:", user_profile)


Owner profile: {'profile_id': 'b8492286d1946c8ded7b961b32bd1bb5', 'type': 'owner', 'full_name': 'Matthew Jackson', 'email': 'mahmoudbj48@gmail.com', 'phone': '+1-555-3086', 'application_date': '2025-07-19', 'number_of_shows': '0'}
User profile: {'profile_id': 'd09dd8a3b791e3ba85c0730f6ba746d9', 'type': 'user', 'full_name': 'Alice Jackson', 'email': 'user6@example.com', 'phone': '+1-555-2538', 'application_date': '2025-07-10', 'number_of_shows': '0'}


Quick Estimated Summaries

In [5]:
from agents.matching_agent import summarize_estimated_for_user, summarize_estimated_for_owner

print("\n=== Estimated summary for USER ===")
summarize_estimated_for_user(user_point_id, user_matches, check_top_k=5)

print("\n=== Estimated summary for OWNER ===")
summarize_estimated_for_owner(owner_point_id, owner_matches, check_top_k=5)



=== Estimated summary for USER ===
=== Estimated opportunities for you ===
- You appear as the #1 candidate in ~1 listing(s).
- You appear in the top 5 for ~6 listing(s).
- You have a strong ‘hard-attribute’ fit with ~2 listing(s).
- Total listings evaluated in this preview: 8
- Your current best score: 0.6890 (owner_id=b8492286-d194-6c8d-ed7b-961b32bd1bb5)

Note: These are early estimates based on current matches.
Final invitations depend on scheduling, fairness (giving chances to those with fewer shows),
and listing popularity. You may not be invited to all matched properties.


=== Estimated summary for OWNER ===
=== Estimated demand for your listing ===
- Your listing appears as the #1 match for ~1 user(s).
- Your listing appears in the top 5 for ~2 user(s).
- There are ~2 user(s) whose requirements strongly fit your listing.
- Total users evaluated in this preview: 2
- Best current candidate score: 0.6890 (user_id=67bc54d5-321e-322d-5a8f-6142427bab54)

Note: These are early estim

Trigger LLM Showing Decisions (saves CSV)

In [2]:
import os, csv
from datetime import datetime
from agents.manage_showings_agent import daily_llm_showing_decisions

def run_daily_decisions():
    results = daily_llm_showing_decisions(top_k=10, show_progress=True)

    # Keep CSV tidy: only write selected columns
    fieldnames = [
        "owner_id", "show", "num", "considered", "mean_top5",
        "sample", "owner_application_date", "owner_number_of_shows",
        "error_type", "error"
    ]
    out_path = f"logs/showings_decisions_{datetime.now().strftime('%Y%m%d-%H%M%S')}.csv"
    os.makedirs(os.path.dirname(out_path), exist_ok=True)

    with open(out_path, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=fieldnames)
        w.writeheader()
        for r in results:
            if "error" in r:
                w.writerow({
                    "owner_id": r.get("owner_id"),
                    "error_type": r.get("error_type"),
                    "error": r.get("error"),
                })
                continue

            dec = r.get("decision", {}) or {}
            own = r.get("owner_profile", {}) or {}
            w.writerow({
                "owner_id": r.get("owner_id"),
                "show": dec.get("show"),
                "num": dec.get("num"),
                "considered": r.get("considered"),
                "mean_top5": r.get("mean_top5"),
                "sample": r.get("sample"),
                "owner_application_date": own.get("application_date"),
                "owner_number_of_shows": own.get("number_of_shows"),
            })

    print(f"✅ Done. {len(results)} owners processed. CSV → {out_path}")
    return results, out_path

results, decisions_csv = run_daily_decisions()

# Peek a few rows from results
print("\nSample results preview:")
for r in results[:5]:
    if "error" in r:
        print(f"- owner_id={r.get('owner_id')} ERROR: {r.get('error_type')} {r.get('error')}")
    else:
        dec = r.get("decision", {})
        print(f"- owner_id={r.get('owner_id')} considered={r.get('considered')} mean_top5={r.get('mean_top5')} decision={dec}")


ManageShowings:   0%|          | 0/3 [00:00<?, ?owner/s]

✅ Done. 3 owners processed. CSV → logs/showings_decisions_20250815-121119.csv

Sample results preview:
- owner_id=dc6cb70f-cb42-43cf-a1e0-12fbe5d27d50 considered=10 mean_top5=0.6431 decision={'show': '1', 'num': '5'}
- owner_id=41683df7-430b-f0c0-5cd1-2f54295f2525 considered=10 mean_top5=0.6733 decision={'show': '1', 'num': '5'}
- owner_id=ec4161ac-90d8-bb2a-867f-e5fe03a441b7 considered=6 mean_top5=0.4864 decision={'show': '1', 'num': '4'}


 Apply Decisions to Dataset (Dry Run first)

In [2]:
# If you already added organize_dataset_after_showings to utils, import it.
# Otherwise, define it earlier or copy in the implementation we discussed.
try:
    from utils.qdrant_connection import organize_dataset_after_showings
except ImportError:
    raise RuntimeError("organize_dataset_after_showings not found. Make sure it's available in utils.qdrant_connection")

# Dry run (no writes) to preview effects
summary_preview = organize_dataset_after_showings(results, top_k_for_recovery=10, dry_run=False)
print(summary_preview)

# If everything looks good, actually apply:
# summary_applied = organize_dataset_after_showings(results, top_k_for_recovery=10, dry_run=False)
# print(summary_applied)


Applying showing decisions:   0%|          | 0/3 [00:00<?, ?owner/s]


=== organize_dataset_after_showings summary ===
owners_processed:   3
owners_with_show:   0
owners_incremented: 0
users_incremented:  0
pairs_deleted:      0
{'owners_processed': 3, 'owners_with_show': 0, 'owners_incremented': 0, 'users_incremented': 0, 'pairs_deleted': 0, 'updated_owner_ids': [], 'updated_user_ids': [], 'deleted_pair_ids': [], 'dry_run': False}
