## Test parser and matching

In [22]:
from agents.owner_parser_agent import invoke_owner_parser_agent
from agents.user_parser_agent import invoke_user_parser_agent
from agents.matching_agent import match_for_new_user, match_for_new_owner

In [25]:
# --- 1) Inputs (aligned) ---
owner_input = """
Modern 1-bedroom loft in SoMa, San Francisco, with floor-to-ceiling windows and hardwood floors.
Comes fully furnished, includes high-speed internet, and access to a rooftop deck with skyline views.
$3,200 per month, available starting September.
Picture: https://example.com/apt-photos/soma-loft.jpg
""".strip()

user_input = """
Looking for a 1-bedroom apartment in San Francisco, preferably in SoMa or nearby areas.
Must have good natural light and be suitable for remote work; furnished is preferred.
My budget is up to $3,500 per month and I’d like to move in by September.
""".strip()

# --- 2) Parse + upload (each returns the new point id) ---
print("➕ Uploading owner listing…")
owner_point_id = invoke_owner_parser_agent(owner_input)
print("Owner point id:", owner_point_id)

print("\n➕ Uploading user query…")
user_point_id = invoke_user_parser_agent(user_input)
print("User point id:", user_point_id)

➕ Uploading owner listing…
✅ Upserted 1 points into 'owner_agent_listings' without resetting the collection.
✅ Uploaded owner listing with ID: 19499b240b8be1d744057c5dfeb0fef7 to 'owner_agent_listings'
✅ Uploaded profile to owner_profiles: Mia Taylor
Owner point id: 19499b240b8be1d744057c5dfeb0fef7

➕ Uploading user query…
✅ Upserted 1 points into 'user_agent_listings' without resetting the collection.
✅ Uploaded user query with ID: f13bf0a4bb93ab3bd350198e9d0e5ed3 to 'user_agent_listings'
✅ Uploaded profile to user_profiles: Joseph Miller
User point id: f13bf0a4bb93ab3bd350198e9d0e5ed3


In [26]:
# --- 3) Match both directions ---

print("\n🔎 Matching for NEW USER → owner listings …")
user_matches = match_for_new_user(user_point_id)

print("🔎 Matching for NEW OWNER → user queries …")
owner_matches = match_for_new_owner(owner_point_id)


🔎 Matching for NEW USER → owner listings …
🔎 Matching for NEW OWNER → user queries …


In [27]:
from utils.qdrant_connection import print_owner_matches_with_details , print_user_matches_with_details

print_user_matches_with_details(user_matches, top_k=5)
print_owner_matches_with_details(owner_matches, top_k=5)


Top 4 owners for this user:
  score=0.4771  owner_id=351d2aba-79e4-a5c7-eb49-922ad9526a07
    [state: San Francisco | price: 800 | bedrooms: 1 | available_from: September] soft: sunny room, quiet living environment, fast WiFi, access to nearby restaurants, cafes, and Whole Foods
  score=0.4729  owner_id=19499b24-0b8b-e1d7-4405-7c5dfeb0fef7
    [state: SoMa, San Francisco | price: 3200 | bedrooms: 1 | available_from: September] soft: modern design, floor-to-ceiling windows, hardwood floors, fully furnished, high-speed internet, access to rooftop deck w…
  score=0.3463  owner_id=d4fdbd1e-b07d-632e-8a95-ccba2f93167a
    [state: San Francisco | price: 1600 | bedrooms: 1 | available_from: September] soft: spacious, safe neighborhood, 30-minute bus ride to downtown, accommodates up to 2 adults, private bathroom, shared kitch…
  score=0.2609  owner_id=7f05e363-16db-12bf-fb69-6d47bc66dbea
    [state: San Francisco | price: 2400 | bedrooms: 1 | available_from: September] soft: iconic Russian Hi

[{'user_id': 'f13bf0a4-bb93-ab3b-d350-198e9d0e5ed3',
  'owner_id': '19499b24-0b8b-e1d7-4405-7c5dfeb0fef7',
  'score': 0.47292337,
  'filter_used': {'collection': 'user_agent_listings',
   'state': ['SoMa', 'San Francisco'],
   'price_owner': 3200,
   'bedrooms_owner': 1,
   'available_from': 'September'},
  'timestamp': 1755088781.3974662}]

In [29]:
## retrive owner profile from qdrant collection
from utils.qdrant_connection import client, OWNER_PROFILES_COLLECTION
from qdrant_client.http.models import Filter, FieldCondition, MatchValue

def get_owner_profile(owner_id):
    filter = Filter(
        must=[
            FieldCondition(
                key="owner_id",
                match=MatchValue(value=owner_id)
            )
        ]
    )
    
    response = client.scroll(
        collection_name=OWNER_PROFILES_COLLECTION,
        filter=filter,
        limit=1
    )
    
    if response:
        return response[0].payload
    else:
        return None
    



In [2]:
from utils.qdrant_connection import get_user_profile , get_owner_profile
owner_profile = get_owner_profile(owner_point_id)  # -> dict or None
user_profile  = get_user_profile(user_point_id)    # -> dict or None

print(owner_profile)
print(user_profile)


{'profile_id': '19499b240b8be1d744057c5dfeb0fef7', 'type': 'owner', 'full_name': 'Mia Taylor', 'email': 'user1@example.com', 'phone': '+1-555-4943', 'application_date': '2025-07-03', 'number_of_shows': '0'}
{'profile_id': 'f13bf0a4bb93ab3bd350198e9d0e5ed3', 'type': 'user', 'full_name': 'Joseph Miller', 'email': 'moodmath48@gmail.com', 'phone': '+1-555-6559', 'application_date': '2025-07-08', 'number_of_shows': '0'}


## Decicion

In [None]:
import os, csv
from datetime import datetime
from agents.manage_showings_agent import daily_llm_showing_decisions
from langchain_community.callbacks import get_openai_callback
from config.llm_config import llm  # not used here, but you may keep it

def run_daily_decisions():

    with get_openai_callback() as cb:
        results = daily_llm_showing_decisions(top_k=10, show_progress=True)

    # Keep CSV tidy: only write selected columns
    fieldnames = [
        "owner_id", "show", "num", "considered", "mean_top5",
        "sample", "owner_application_date", "owner_number_of_shows",
        "error_type", "error"
    ]
    out_path = f"logs/showings_decisions_{datetime.now().strftime('%Y%m%d-%H%M%S')}.csv"
    os.makedirs(os.path.dirname(out_path), exist_ok=True)

    with open(out_path, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=fieldnames)
        w.writeheader()
        for r in results:
            if "error" in r:
                w.writerow({
                    "owner_id": r.get("owner_id"),
                    "error_type": r.get("error_type"),
                    "error": r.get("error"),
                })
                continue

            dec = r.get("decision", {}) or {}
            own = r.get("owner_profile", {}) or {}
            w.writerow({
                "owner_id": r.get("owner_id"),
                "show": dec.get("show"),
                "num": dec.get("num"),
                "considered": r.get("considered"),
                "mean_top5": r.get("mean_top5"),
                "sample": r.get("sample"),
                "owner_application_date": own.get("application_date"),
                "owner_number_of_shows": own.get("number_of_shows"),
            })

    print(f"✅ Done. {len(results)} owners processed. CSV → {out_path}")




In [2]:
run_daily_decisions()

ManageShowings:   0%|          | 0/5 [00:00<?, ?owner/s]

✅ Done. 5 owners processed. CSV → logs/showings_decisions_20250813-175122.csv


## Display Qdrant Collections

In [None]:
# Schema/profile explorer for Qdrant collections
# Inspects payload keys, their types, null %s, sample values, and basic stats for numerics.
# Also reports ID format/length and (optionally) whether certain fields like `score` / `timestamp` exist.

from qdrant_client import QdrantClient
from qdrant_client.http import exceptions as qexc
from collections import Counter, defaultdict
from datetime import datetime
import pandas as pd
import math, json, random

# --- Qdrant config (reuse yours or import from utils.qdrant_connection) ---
QDRANT_URL  = "https://3cf2848d-0574-468d-a996-0efabdea92b9.us-west-1-0.aws.cloud.qdrant.io"
QDRANT_KEY  = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.79h_Yg9qXYtICf-fs1CMuMdK5Rw13OnE_DJR953fYQ4"

client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_KEY)

COLLECTIONS = [
    "owner_agent_listings",
    "user_agent_listings",
    "similarity_collection",
    "sampled_owner_agent_listings1",
    "owner_profiles",
    "user_profiles",
]

MAX_SCAN = 2000      # cap how many points to scan per collection (tweak as you wish)
SCROLL_BATCH = 1000  # Qdrant page size

def _scroll_some(collection_name, max_scan=MAX_SCAN, batch=SCROLL_BATCH):
    out, next_page = [], None
    while len(out) < max_scan:
        recs, next_page = client.scroll(
            collection_name=collection_name,
            with_payload=True,
            with_vectors=False,
            limit=min(batch, max_scan - len(out)),
            offset=next_page
        )
        out.extend(recs)
        if next_page is None:
            break
    return out

def _typename(v):
    if v is None: return "null"
    t = type(v)
    if t in (int, float, bool, str): return t.__name__
    if isinstance(v, list): return "list"
    if isinstance(v, dict): return "dict"
    return t.__name__

def _try_float(v):
    try:
        f = float(v)
        if math.isnan(f): return None
        return f
    except Exception:
        return None

def profile_collection(name: str, max_scan=MAX_SCAN):
    # Count first for context
    total = client.count(collection_name=name, exact=True).count
    print(f"\n=== {name} ===")
    print(f"Total points: {total}")

    # Scroll some
    records = _scroll_some(name, max_scan=max_scan)
    n = len(records)
    print(f"Scanned: {n} points (max_scan={max_scan})")

    # ID profile
    id_lengths = [len(str(r.id)) for r in records]
    id_prefixes = [str(r.id)[:8] for r in records]
    print(f"ID lengths: min={min(id_lengths) if id_lengths else '-'} "
          f"max={max(id_lengths) if id_lengths else '-'} "
          f"median={sorted(id_lengths)[len(id_lengths)//2] if id_lengths else '-'}")
    print(f"Sample IDs: {[str(r.id) for r in records[:3]]}")

    # Payload key coverage
    key_counter = Counter()
    # For each key, track types and a few sample values
    type_counter = defaultdict(Counter)
    samples = defaultdict(list)

    # Numeric stats
    numeric_stats = defaultdict(lambda: {"min": float("inf"), "max": float("-inf"), "count": 0})

    # Boolean / small-cardinality tracking
    value_counts_small = defaultdict(Counter)

    for r in records:
        p = r.payload or {}
        for k, v in p.items():
            key_counter[k] += 1
            tname = _typename(v)
            type_counter[k][tname] += 1

            if len(samples[k]) < 5:
                samples[k].append(v)

            # numeric stats
            fv = _try_float(v)
            if fv is not None:
                s = numeric_stats[k]
                s["min"] = min(s["min"], fv)
                s["max"] = max(s["max"], fv)
                s["count"] += 1

            # track small cardinality (strings/ints only)
            if isinstance(v, (str, int, float, bool)) and key_counter[k] <= 2000:
                value_counts_small[k][v] += 1

    rows = []
    for k in sorted(key_counter.keys()):
        present = key_counter[k]
        nulls = n - present
        coverage = present / n * 100 if n else 0.0
        types = ", ".join(f"{t}:{c}" for t, c in type_counter[k].most_common())

        stat = numeric_stats[k]
        num_summary = "-"
        if stat["count"] > 0:
            num_summary = f"[min={stat['min']:.4g}, max={stat['max']:.4g}, count={stat['count']}]"

        # choose a readable sample preview (stringify and truncate)
        smpls = []
        for v in samples[k]:
            s = v
            if isinstance(s, (dict, list)):
                s = json.dumps(s)[:120] + ("…" if len(json.dumps(s)) > 120 else "")
            else:
                s = str(s)
                if len(s) > 120:
                    s = s[:120] + "…"
            smpls.append(s)

        # small-cardinality hint (only if ≤ 20 distinct)
        vc = value_counts_small[k]
        small_card_hint = "-"
        if 0 < len(vc) <= 20:
            common = ", ".join(f"{str(val)[:30]}({cnt})" for val, cnt in vc.most_common(5))
            small_card_hint = f"{len(vc)} distinct | top: {common}"

        rows.append({
            "key": k,
            "coverage_%": round(coverage, 2),
            "present_count": present,
            "null_count": nulls,
            "types": types,
            "numeric_stats": num_summary,
            "small_cardinality": small_card_hint,
            "samples": " | ".join(smpls),
        })

    df = pd.DataFrame(rows).sort_values(["coverage_%", "key"], ascending=[False, True])
    display(df)

    # Convenience: highlight common fields of interest if present
    for fld in ["owner_id", "user_id", "listing_id", "score", "timestamp", "state", "price", "bedrooms", "available_from"]:
        if fld in key_counter:
            print(f"• Field '{fld}' → coverage {round(key_counter[fld]/n*100,2)}%")

    return df


In [4]:
# --- Run profiles ---
dfs = {}
for col in COLLECTIONS:
    try:
        dfs[col] = profile_collection(col, max_scan=MAX_SCAN)
    except qexc.UnexpectedResponse as e:
        print(f"Error accessing {col}: {e}")


=== owner_agent_listings ===
Total points: 1012
Scanned: 1012 points (max_scan=2000)
ID lengths: min=36 max=36 median=36
Sample IDs: ['000f06c2-872e-96e6-dd72-c50e8e92923e', '008c41e9-ce04-57bd-e7df-d85c5844b56c', '009d35b9-ca73-94e2-0dff-b40bea252941']


Unnamed: 0,key,coverage_%,present_count,null_count,types,numeric_stats,small_cardinality,samples
0,available_from,100.0,1012,0,"null:604, str:408",-,"12 distinct | top: October(50), August(43), Ma...",None | None | May | None | None
1,bedrooms,100.0,1012,0,int:1012,"[min=0, max=6, count=1012]","7 distinct | top: 1(732), 2(175), 3(74), 4(23)...",1 | 1 | 1 | 2 | 2
2,listing_id,100.0,1012,0,str:1012,-,-,000f06c2872e96e6dd72c50e8e92923e | 008c41e9ce0...
3,picture_url,100.0,1012,0,"null:1002, str:10",-,6 distinct | top: https://example.com/apt-phot...,None | None | None | None | None
4,price,100.0,1012,0,"int:522, null:490","[min=800, max=4500, count=522]","17 distinct | top: 2500(79), 3000(75), 800(70)...",None | None | 800 | None | None
5,soft_attributes,100.0,1012,0,str:1012,-,-,"newly renovated, updated appliances, hardwood ..."
7,state,100.0,1012,0,list:1012,-,-,"[""Bed-Stuy"", ""Brooklyn"", ""New York City""] | [""..."
6,source,0.1,1,1011,str:1,-,1 distinct | top: user_query(1),user_query


• Field 'listing_id' → coverage 100.0%
• Field 'state' → coverage 100.0%
• Field 'price' → coverage 100.0%
• Field 'bedrooms' → coverage 100.0%
• Field 'available_from' → coverage 100.0%

=== user_agent_listings ===
Total points: 1007
Scanned: 1007 points (max_scan=2000)
ID lengths: min=36 max=36 median=36
Sample IDs: ['00235b8e-07f6-72f4-98d8-51a511449524', '0025d524-935f-6e94-0e53-ee262d848ce6', '0052d54b-39dc-bc06-adb5-38eb13efeac0']


Unnamed: 0,key,coverage_%,present_count,null_count,types,numeric_stats,small_cardinality,samples
0,available_from,100.0,1007,0,"null:567, str:440",-,"16 distinct | top: October(75), March(42), Aug...",December | October | None | None | January
1,bedrooms,100.0,1007,0,int:1007,"[min=1, max=6, count=1007]","6 distinct | top: 1(732), 2(171), 3(74), 4(23)...",1 | 1 | 1 | 1 | 1
2,listing_id,100.0,1007,0,str:1007,-,-,00235b8e07f672f498d851a511449524 | 0025d524935...
3,picture_url,100.0,1007,0,null:1007,-,-,None | None | None | None | None
4,price,100.0,1007,0,"int:603, null:404","[min=99, max=5000, count=603]",-,2400 | 3000 | None | None | 3000
5,soft_attributes,100.0,1007,0,str:1007,-,-,"near downtown, near Union Square | close to pu..."
7,state,100.0,1007,0,list:1007,-,-,"[""San Francisco""] | [""New York City""] | [""New ..."
6,source,99.9,1006,1,str:1006,-,1 distinct | top: user_query(1006),user_query | user_query | user_query | user_qu...


• Field 'listing_id' → coverage 100.0%
• Field 'state' → coverage 100.0%
• Field 'price' → coverage 100.0%
• Field 'bedrooms' → coverage 100.0%
• Field 'available_from' → coverage 100.0%

=== similarity_collection ===
Total points: 30077
Scanned: 2000 points (max_scan=2000)
ID lengths: min=36 max=36 median=36
Sample IDs: ['00002dfd-2619-8928-90fc-a885f9bf7e7c', '00018f3a-6b1b-7701-9322-70b25d96fae7', '00034545-0053-1cfc-2e2e-670af297d33d']


Unnamed: 0,key,coverage_%,present_count,null_count,types,numeric_stats,small_cardinality,samples
0,filter_used,100.0,2000,0,dict:2000,-,-,"{""collection"": ""user_agent_listings"", ""state"":..."
1,owner_id,100.0,2000,0,str:2000,-,-,d9e08424-bc77-656e-72f3-44c488406268 | 5fe3c73...
2,score,100.0,2000,0,float:2000,"[min=0.1694, max=0.9001, count=2000]",-,0.55240315 | 0.6045309 | 0.5314168 | 0.5395528...
3,timestamp,100.0,2000,0,float:2000,"[min=1.755e+09, max=1.755e+09, count=2000]",-,1754906840.3216486 | 1754906437.3464315 | 1754...
4,user_id,100.0,2000,0,str:2000,-,-,b4c4af9a-95cf-3b26-d0d3-6cb6285752e8 | 3e91e30...


• Field 'owner_id' → coverage 100.0%
• Field 'user_id' → coverage 100.0%
• Field 'score' → coverage 100.0%
• Field 'timestamp' → coverage 100.0%

=== sampled_owner_agent_listings ===
Total points: 50
Scanned: 50 points (max_scan=2000)
ID lengths: min=36 max=36 median=36
Sample IDs: ['044e9ae9-c6a7-c53d-a570-2a26b4e455ed', '04cc4a5f-1b97-b3e0-be73-94dc559d189b', '06e4884b-aa37-22a6-e9bd-1fd6a76c3f8a']


Unnamed: 0,key,coverage_%,present_count,null_count,types,numeric_stats,small_cardinality,samples
0,available_from,100.0,50,0,"null:31, str:19",-,"10 distinct | top: October(4), December(4), Ja...",March | January | None | None | None
1,bedrooms,100.0,50,0,int:50,"[min=1, max=5, count=50]","5 distinct | top: 1(35), 2(10), 4(2), 3(2), 5(1)",1 | 1 | 1 | 2 | 1
2,listing_id,100.0,50,0,str:50,-,-,044e9ae9c6a7c53da5702a26b4e455ed | 04cc4a5f1b9...
3,picture_url,100.0,50,0,"null:48, str:2",-,2 distinct | top: https://example.com/image99....,None | None | None | None | None
4,price,100.0,50,0,"int:28, null:22","[min=800, max=3000, count=28]","9 distinct | top: 2500(6), 3000(4), 1200(4), 2...",1600 | 3000 | None | None | None
5,soft_attributes,100.0,50,0,str:50,-,-,"two blocks from Central Park, convenient subwa..."
6,state,100.0,50,0,list:50,-,-,"[""New York City""] | [""Brooklyn"", ""New York Cit..."


• Field 'listing_id' → coverage 100.0%
• Field 'state' → coverage 100.0%
• Field 'price' → coverage 100.0%
• Field 'bedrooms' → coverage 100.0%
• Field 'available_from' → coverage 100.0%

=== owner_profiles ===
Total points: 1012
Scanned: 1012 points (max_scan=2000)
ID lengths: min=36 max=36 median=36
Sample IDs: ['000f06c2-872e-96e6-dd72-c50e8e92923e', '008c41e9-ce04-57bd-e7df-d85c5844b56c', '009d35b9-ca73-94e2-0dff-b40bea252941']


Unnamed: 0,key,coverage_%,present_count,null_count,types,numeric_stats,small_cardinality,samples
0,application_date,100.0,1012,0,str:1012,-,-,2025-07-01 | 2025-07-19 | 2025-07-20 | 2025-07...
1,email,100.0,1012,0,str:1012,-,"10 distinct | top: user1@example.com(119), mhm...",user3@example.com | user5@example.com | mhmod....
2,full_name,100.0,1012,0,str:1012,-,-,Mia Rodriguez | Thomas Moore | Sara Moore | Av...
3,number_of_shows,100.0,1012,0,str:1012,"[min=0, max=0, count=1012]",1 distinct | top: 0(1012),0 | 0 | 0 | 0 | 0
4,phone,100.0,1012,0,str:1012,-,-,+1-555-4320 | +1-555-7108 | +1-555-1478 | +1-5...
5,profile_id,100.0,1012,0,str:1012,-,-,000f06c2-872e-96e6-dd72-c50e8e92923e | 008c41e...
6,type,100.0,1012,0,str:1012,-,1 distinct | top: owner(1012),owner | owner | owner | owner | owner



=== user_profiles ===
Total points: 1007
Scanned: 1007 points (max_scan=2000)
ID lengths: min=36 max=36 median=36
Sample IDs: ['00235b8e-07f6-72f4-98d8-51a511449524', '0025d524-935f-6e94-0e53-ee262d848ce6', '0052d54b-39dc-bc06-adb5-38eb13efeac0']


Unnamed: 0,key,coverage_%,present_count,null_count,types,numeric_stats,small_cardinality,samples
0,application_date,100.0,1007,0,str:1007,-,-,2025-07-18 | 2025-07-08 | 2025-07-12 | 2025-07...
1,email,100.0,1007,0,str:1007,-,"10 distinct | top: user2@example.com(131), use...",user7@example.com | user5@example.com | user7@...
2,full_name,100.0,1007,0,str:1007,-,-,Daniel Williams | Daniel Martin | John Brown |...
3,number_of_shows,100.0,1007,0,str:1007,"[min=0, max=0, count=1007]",1 distinct | top: 0(1007),0 | 0 | 0 | 0 | 0
4,phone,100.0,1007,0,str:1007,-,-,+1-555-4293 | +1-555-8036 | +1-555-2249 | +1-5...
5,profile_id,100.0,1007,0,str:1007,-,-,00235b8e-07f6-72f4-98d8-51a511449524 | 0025d52...
6,type,100.0,1007,0,str:1007,-,1 distinct | top: user(1007),user | user | user | user | user


## NeXT
