## Test parser and matching

In [1]:
from agents.owner_parser_agent import invoke_owner_parser_agent
from agents.user_parser_agent import invoke_user_parser_agent
from agents.matching_agent import match_for_new_user, match_for_new_owner

In [2]:
# --- 1) Inputs (aligned) ---
owner_input = """
Modern 1-bedroom apartment in San Fransisco, just 2 blocks from Central Park.
Features hardwood floors, floor-to-ceiling windows, a fully equipped kitchen, and a gym in the building.
$2,200 per month, available starting September.
Picture: https://example.com/apt-photos/centralpark1.jpg
""".strip()

user_input = """
Looking for a 1-bedroom apartment in San Fransisco, preferably close to Central Park or major subway lines.
Natural light and building amenities like a gym are important. Budget up to $2,500 per month.
Move-in date flexible, but ideally in September.
""".strip()


# --- 2) Parse + upload (each returns the new point id) ---
print("➕ Uploading owner listing…")
owner_point_id = invoke_owner_parser_agent(owner_input)
print("Owner point id:", owner_point_id)

print("\n➕ Uploading user query…")
user_point_id = invoke_user_parser_agent(user_input)
print("User point id:", user_point_id)

➕ Uploading owner listing…
✅ Upserted 1 points into 'owner_agent_listings' without resetting the collection.
✅ Uploaded owner listing with ID: 4bed098f15fc0a936f9e1636ba9efbe1 to 'owner_agent_listings'
✅ Uploaded profile to owner_profiles: Charlotte Gonzalez
Owner point id: 4bed098f15fc0a936f9e1636ba9efbe1

➕ Uploading user query…
✅ Upserted 1 points into 'user_agent_listings' without resetting the collection.
✅ Uploaded user query with ID: 097a26be7d8ded042cb73e7e3f2ba0f4 to 'user_agent_listings'
✅ Uploaded profile to user_profiles: Robert Smith
User point id: 097a26be7d8ded042cb73e7e3f2ba0f4


In [3]:
# --- 3) Match both directions ---

print("\n🔎 Matching for NEW USER → owner listings …")
user_matches = match_for_new_user(user_point_id)

print("🔎 Matching for NEW OWNER → user queries …")
owner_matches = match_for_new_owner(owner_point_id)


🔎 Matching for NEW USER → owner listings …
🔎 Matching for NEW OWNER → user queries …


In [4]:
from utils.qdrant_connection import print_owner_matches_with_details , print_user_matches_with_details

print_user_matches_with_details(user_matches, top_k=5)
print_owner_matches_with_details(owner_matches, top_k=5)


Top 5 owners for this user:
  score=0.7568  owner_id=4bed098f-15fc-0a93-6f9e-1636ba9efbe1
    [state: San Francisco | price: 2200 | bedrooms: 1 | available_from: September] soft: modern, hardwood floors, floor-to-ceiling windows, fully equipped kitchen, gym in the building, near Central Park
  score=0.5517  owner_id=b33d10a7-c117-61ac-61e0-dd37cc81a0e7
    [state: San Francisco, Russian Hill | price: 2400 | bedrooms: 2 | available_from: April] soft: prestigious neighborhood, spacious layout, updated kitchen, nearby restaurants and shops, wood library, office space
  score=0.5446  owner_id=ec4161ac-90d8-bb2a-867f-e5fe03a441b7
    [state: San Francisco | price: 2400 | bedrooms: 2 | available_from: October] soft: spacious, 5 minutes from the subway, balcony with city views, newly renovated kitchen, in-unit washer/dryer
  score=0.5322  owner_id=f9318209-9dd7-e72b-beac-df4cfa0fb33c
    [state: San Francisco | price: 2500 | bedrooms: 1 | available_from: None] soft: prime neighborhood, easy a

[{'user_id': '097a26be-7d8d-ed04-2cb7-3e7e3f2ba0f4',
  'owner_id': '4bed098f-15fc-0a93-6f9e-1636ba9efbe1',
  'score': 0.7567988,
  'filter_used': {'collection': 'user_agent_listings',
   'state': ['San Francisco'],
   'price_owner': 2200,
   'bedrooms_owner': 1,
   'available_from': 'September'},
  'timestamp': 1755167582.0638237},
 {'user_id': 'f13bf0a4-bb93-ab3b-d350-198e9d0e5ed3',
  'owner_id': '4bed098f-15fc-0a93-6f9e-1636ba9efbe1',
  'score': 0.3688518,
  'filter_used': {'collection': 'user_agent_listings',
   'state': ['San Francisco'],
   'price_owner': 2200,
   'bedrooms_owner': 1,
   'available_from': 'September'},
  'timestamp': 1755167582.0638237},
 {'user_id': '777f238a-6ce5-7ebc-94a8-c743087981fe',
  'owner_id': '4bed098f-15fc-0a93-6f9e-1636ba9efbe1',
  'score': 0.31416792,
  'filter_used': {'collection': 'user_agent_listings',
   'state': ['San Francisco'],
   'price_owner': 2200,
   'bedrooms_owner': 1,
   'available_from': 'September'},
  'timestamp': 1755167582.0638237

In [5]:
from utils.qdrant_connection import get_user_profile , get_owner_profile
owner_profile = get_owner_profile(owner_point_id)  # -> dict or None
user_profile  = get_user_profile(user_point_id)    # -> dict or None

print(owner_profile)
print(user_profile)


{'profile_id': '4bed098f15fc0a936f9e1636ba9efbe1', 'type': 'owner', 'full_name': 'Charlotte Gonzalez', 'email': 'mahmoudbj48@gmail.com', 'phone': '+1-555-9951', 'application_date': '2025-07-21', 'number_of_shows': '0'}
{'profile_id': '097a26be7d8ded042cb73e7e3f2ba0f4', 'type': 'user', 'full_name': 'Robert Smith', 'email': 'mahmoudbj48@gmail.com', 'phone': '+1-555-9951', 'application_date': '2025-07-08', 'number_of_shows': '0'}


In [6]:
from agents.matching_agent import summarize_estimated_for_user, summarize_estimated_for_owner
summarize_estimated_for_user(user_point_id, user_matches, check_top_k=5)
summarize_estimated_for_owner(owner_point_id, owner_matches, check_top_k=5)

=== Estimated opportunities for you ===
- You appear as the #1 candidate in ~3 listing(s).
- You appear in the top 5 for ~24 listing(s).
- You have a strong ‘hard-attribute’ fit with ~28 listing(s).
- Total listings evaluated in this preview: 49
- Your current best score: 0.7568 (owner_id=4bed098f-15fc-0a93-6f9e-1636ba9efbe1)

Note: These are early estimates based on current matches.
Final invitations depend on scheduling, fairness (giving chances to those with fewer shows),
and listing popularity. You may not be invited to all matched properties.

=== Estimated demand for your listing ===
- Your listing appears as the #1 match for ~1 user(s).
- Your listing appears in the top 5 for ~2 user(s).
- There are ~3 user(s) whose requirements strongly fit your listing.
- Total users evaluated in this preview: 3
- Best current candidate score: 0.7568 (user_id=097a26be-7d8d-ed04-2cb7-3e7e3f2ba0f4)

Note: These are early estimates to help you gauge interest.
Actual showings are scheduled by our 

## Decicion

In [3]:
import os, csv
from datetime import datetime
from agents.manage_showings_agent import daily_llm_showing_decisions
from langchain_community.callbacks import get_openai_callback
from config.llm_config import llm  # not used here, but you may keep it

def run_daily_decisions():

    with get_openai_callback() as cb:
        results = daily_llm_showing_decisions(top_k=10, show_progress=True)

    # Keep CSV tidy: only write selected columns
    fieldnames = [
        "owner_id", "show", "num", "considered", "mean_top5",
        "sample", "owner_application_date", "owner_number_of_shows",
        "error_type", "error"
    ]
    out_path = f"logs/showings_decisions_{datetime.now().strftime('%Y%m%d-%H%M%S')}.csv"
    os.makedirs(os.path.dirname(out_path), exist_ok=True)

    with open(out_path, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=fieldnames)
        w.writeheader()
        for r in results:
            if "error" in r:
                w.writerow({
                    "owner_id": r.get("owner_id"),
                    "error_type": r.get("error_type"),
                    "error": r.get("error"),
                })
                continue

            dec = r.get("decision", {}) or {}
            own = r.get("owner_profile", {}) or {}
            w.writerow({
                "owner_id": r.get("owner_id"),
                "show": dec.get("show"),
                "num": dec.get("num"),
                "considered": r.get("considered"),
                "mean_top5": r.get("mean_top5"),
                "sample": r.get("sample"),
                "owner_application_date": own.get("application_date"),
                "owner_number_of_shows": own.get("number_of_shows"),
            })

    print(f"✅ Done. {len(results)} owners processed. CSV → {out_path}")




In [5]:
run_daily_decisions()

ManageShowings:   0%|          | 0/50 [00:00<?, ?owner/s]

✅ Done. 50 owners processed. CSV → logs/showings_decisions_20250814-114938.csv


## Display Qdrant Collections

In [1]:
# Schema/profile explorer for Qdrant collections
# Inspects payload keys, their types, null %s, sample values, and basic stats for numerics.
# Also reports ID format/length and (optionally) whether certain fields like `score` / `timestamp` exist.

from qdrant_client import QdrantClient
from qdrant_client.http import exceptions as qexc
from collections import Counter, defaultdict
from datetime import datetime
import pandas as pd
import math, json, random

# --- Qdrant config (reuse yours or import from utils.qdrant_connection) ---
QDRANT_URL  = "https://3cf2848d-0574-468d-a996-0efabdea92b9.us-west-1-0.aws.cloud.qdrant.io"
QDRANT_KEY  = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.79h_Yg9qXYtICf-fs1CMuMdK5Rw13OnE_DJR953fYQ4"

client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_KEY)

COLLECTIONS = [
    "owner_agent_listings",
    "user_agent_listings",
    "similarity_collection",
    "sampled_owner_agent_listings12-50",
    "owner_profiles",
    "user_profiles",
]

MAX_SCAN = 2000      # cap how many points to scan per collection (tweak as you wish)
SCROLL_BATCH = 1000  # Qdrant page size

def _scroll_some(collection_name, max_scan=MAX_SCAN, batch=SCROLL_BATCH):
    out, next_page = [], None
    while len(out) < max_scan:
        recs, next_page = client.scroll(
            collection_name=collection_name,
            with_payload=True,
            with_vectors=False,
            limit=min(batch, max_scan - len(out)),
            offset=next_page
        )
        out.extend(recs)
        if next_page is None:
            break
    return out

def _typename(v):
    if v is None: return "null"
    t = type(v)
    if t in (int, float, bool, str): return t.__name__
    if isinstance(v, list): return "list"
    if isinstance(v, dict): return "dict"
    return t.__name__

def _try_float(v):
    try:
        f = float(v)
        if math.isnan(f): return None
        return f
    except Exception:
        return None

def profile_collection(name: str, max_scan=MAX_SCAN):
    # Count first for context
    total = client.count(collection_name=name, exact=True).count
    print(f"\n=== {name} ===")
    print(f"Total points: {total}")

    # Scroll some
    records = _scroll_some(name, max_scan=max_scan)
    n = len(records)
    print(f"Scanned: {n} points (max_scan={max_scan})")

    # ID profile
    id_lengths = [len(str(r.id)) for r in records]
    id_prefixes = [str(r.id)[:8] for r in records]
    print(f"ID lengths: min={min(id_lengths) if id_lengths else '-'} "
          f"max={max(id_lengths) if id_lengths else '-'} "
          f"median={sorted(id_lengths)[len(id_lengths)//2] if id_lengths else '-'}")
    print(f"Sample IDs: {[str(r.id) for r in records[:3]]}")

    # Payload key coverage
    key_counter = Counter()
    # For each key, track types and a few sample values
    type_counter = defaultdict(Counter)
    samples = defaultdict(list)

    # Numeric stats
    numeric_stats = defaultdict(lambda: {"min": float("inf"), "max": float("-inf"), "count": 0})

    # Boolean / small-cardinality tracking
    value_counts_small = defaultdict(Counter)

    for r in records:
        p = r.payload or {}
        for k, v in p.items():
            key_counter[k] += 1
            tname = _typename(v)
            type_counter[k][tname] += 1

            if len(samples[k]) < 5:
                samples[k].append(v)

            # numeric stats
            fv = _try_float(v)
            if fv is not None:
                s = numeric_stats[k]
                s["min"] = min(s["min"], fv)
                s["max"] = max(s["max"], fv)
                s["count"] += 1

            # track small cardinality (strings/ints only)
            if isinstance(v, (str, int, float, bool)) and key_counter[k] <= 2000:
                value_counts_small[k][v] += 1

    rows = []
    for k in sorted(key_counter.keys()):
        present = key_counter[k]
        nulls = n - present
        coverage = present / n * 100 if n else 0.0
        types = ", ".join(f"{t}:{c}" for t, c in type_counter[k].most_common())

        stat = numeric_stats[k]
        num_summary = "-"
        if stat["count"] > 0:
            num_summary = f"[min={stat['min']:.4g}, max={stat['max']:.4g}, count={stat['count']}]"

        # choose a readable sample preview (stringify and truncate)
        smpls = []
        for v in samples[k]:
            s = v
            if isinstance(s, (dict, list)):
                s = json.dumps(s)[:120] + ("…" if len(json.dumps(s)) > 120 else "")
            else:
                s = str(s)
                if len(s) > 120:
                    s = s[:120] + "…"
            smpls.append(s)

        # small-cardinality hint (only if ≤ 20 distinct)
        vc = value_counts_small[k]
        small_card_hint = "-"
        if 0 < len(vc) <= 20:
            common = ", ".join(f"{str(val)[:30]}({cnt})" for val, cnt in vc.most_common(5))
            small_card_hint = f"{len(vc)} distinct | top: {common}"

        rows.append({
            "key": k,
            "coverage_%": round(coverage, 2),
            "present_count": present,
            "null_count": nulls,
            "types": types,
            "numeric_stats": num_summary,
            "small_cardinality": small_card_hint,
            "samples": " | ".join(smpls),
        })

    df = pd.DataFrame(rows).sort_values(["coverage_%", "key"], ascending=[False, True])
    display(df)

    # Convenience: highlight common fields of interest if present
    for fld in ["owner_id", "user_id", "listing_id", "score", "timestamp", "state", "price", "bedrooms", "available_from"]:
        if fld in key_counter:
            print(f"• Field '{fld}' → coverage {round(key_counter[fld]/n*100,2)}%")

    return df


In [2]:
# --- Run profiles ---
dfs = {}
for col in COLLECTIONS:
    try:
        dfs[col] = profile_collection(col, max_scan=MAX_SCAN)
    except qexc.UnexpectedResponse as e:
        print(f"Error accessing {col}: {e}")


=== owner_agent_listings ===
Total points: 1012
Scanned: 1012 points (max_scan=2000)
ID lengths: min=36 max=36 median=36
Sample IDs: ['000f06c2-872e-96e6-dd72-c50e8e92923e', '008c41e9-ce04-57bd-e7df-d85c5844b56c', '009d35b9-ca73-94e2-0dff-b40bea252941']


Unnamed: 0,key,coverage_%,present_count,null_count,types,numeric_stats,small_cardinality,samples
0,available_from,100.0,1012,0,"null:604, str:408",-,"12 distinct | top: October(50), August(43), Ma...",None | None | May | None | None
1,bedrooms,100.0,1012,0,int:1012,"[min=0, max=6, count=1012]","7 distinct | top: 1(732), 2(175), 3(74), 4(23)...",1 | 1 | 1 | 2 | 2
2,listing_id,100.0,1012,0,str:1012,-,-,000f06c2872e96e6dd72c50e8e92923e | 008c41e9ce0...
3,picture_url,100.0,1012,0,"null:1002, str:10",-,6 distinct | top: https://example.com/apt-phot...,None | None | None | None | None
4,price,100.0,1012,0,"int:522, null:490","[min=800, max=4500, count=522]","17 distinct | top: 2500(79), 3000(75), 800(70)...",None | None | 800 | None | None
5,soft_attributes,100.0,1012,0,str:1012,-,-,"newly renovated, updated appliances, hardwood ..."
7,state,100.0,1012,0,list:1012,-,-,"[""Bed-Stuy"", ""Brooklyn"", ""New York City""] | [""..."
6,source,0.1,1,1011,str:1,-,1 distinct | top: user_query(1),user_query


• Field 'listing_id' → coverage 100.0%
• Field 'state' → coverage 100.0%
• Field 'price' → coverage 100.0%
• Field 'bedrooms' → coverage 100.0%
• Field 'available_from' → coverage 100.0%

=== user_agent_listings ===
Total points: 1007
Scanned: 1007 points (max_scan=2000)
ID lengths: min=36 max=36 median=36
Sample IDs: ['00235b8e-07f6-72f4-98d8-51a511449524', '0025d524-935f-6e94-0e53-ee262d848ce6', '0052d54b-39dc-bc06-adb5-38eb13efeac0']


Unnamed: 0,key,coverage_%,present_count,null_count,types,numeric_stats,small_cardinality,samples
0,available_from,100.0,1007,0,"null:567, str:440",-,"16 distinct | top: October(75), March(42), Aug...",December | October | None | None | January
1,bedrooms,100.0,1007,0,int:1007,"[min=1, max=6, count=1007]","6 distinct | top: 1(732), 2(171), 3(74), 4(23)...",1 | 1 | 1 | 1 | 1
2,listing_id,100.0,1007,0,str:1007,-,-,00235b8e07f672f498d851a511449524 | 0025d524935...
3,picture_url,100.0,1007,0,null:1007,-,-,None | None | None | None | None
4,price,100.0,1007,0,"int:603, null:404","[min=99, max=5000, count=603]",-,2400 | 3000 | None | None | 3000
5,soft_attributes,100.0,1007,0,str:1007,-,-,"near downtown, near Union Square | close to pu..."
7,state,100.0,1007,0,list:1007,-,-,"[""San Francisco""] | [""New York City""] | [""New ..."
6,source,99.9,1006,1,str:1006,-,1 distinct | top: user_query(1006),user_query | user_query | user_query | user_qu...


• Field 'listing_id' → coverage 100.0%
• Field 'state' → coverage 100.0%
• Field 'price' → coverage 100.0%
• Field 'bedrooms' → coverage 100.0%
• Field 'available_from' → coverage 100.0%

=== similarity_collection ===
Total points: 30077
Scanned: 2000 points (max_scan=2000)
ID lengths: min=36 max=36 median=36
Sample IDs: ['00002dfd-2619-8928-90fc-a885f9bf7e7c', '00018f3a-6b1b-7701-9322-70b25d96fae7', '00034545-0053-1cfc-2e2e-670af297d33d']


Unnamed: 0,key,coverage_%,present_count,null_count,types,numeric_stats,small_cardinality,samples
0,filter_used,100.0,2000,0,dict:2000,-,-,"{""collection"": ""user_agent_listings"", ""state"":..."
1,owner_id,100.0,2000,0,str:2000,-,-,d9e08424-bc77-656e-72f3-44c488406268 | 5fe3c73...
2,score,100.0,2000,0,float:2000,"[min=0.1694, max=0.9001, count=2000]",-,0.55240315 | 0.6045309 | 0.5314168 | 0.5395528...
3,timestamp,100.0,2000,0,float:2000,"[min=1.755e+09, max=1.755e+09, count=2000]",-,1754906840.3216486 | 1754906437.3464315 | 1754...
4,user_id,100.0,2000,0,str:2000,-,-,b4c4af9a-95cf-3b26-d0d3-6cb6285752e8 | 3e91e30...


• Field 'owner_id' → coverage 100.0%
• Field 'user_id' → coverage 100.0%
• Field 'score' → coverage 100.0%
• Field 'timestamp' → coverage 100.0%

=== sampled_owner_agent_listings12-50 ===
Total points: 50
Scanned: 50 points (max_scan=2000)
ID lengths: min=36 max=36 median=36
Sample IDs: ['000f06c2-872e-96e6-dd72-c50e8e92923e', '0c5d2803-431d-8623-38db-94261add7a9a', '13c3d2e5-c679-0673-267a-fd10f366f8f6']


Unnamed: 0,key,coverage_%,present_count,null_count,types,numeric_stats,small_cardinality,samples
0,available_from,100.0,50,0,"null:27, str:23",-,"10 distinct | top: March(4), September(4), May...",None | None | March | January | None
1,bedrooms,100.0,50,0,int:50,"[min=1, max=4, count=50]","4 distinct | top: 1(38), 2(8), 3(3), 4(1)",1 | 1 | 1 | 1 | 1
2,listing_id,100.0,50,0,str:50,-,-,000f06c2872e96e6dd72c50e8e92923e | 0c5d2803431...
3,picture_url,100.0,50,0,"null:48, str:2",-,2 distinct | top: https://example.com/image3.j...,None | None | None | None | None
4,price,100.0,50,0,"null:25, int:25","[min=800, max=3500, count=25]","10 distinct | top: 3000(5), 1600(5), 2400(4), ...",None | None | 3000 | 1600 | None
5,soft_attributes,100.0,50,0,str:50,-,-,"newly renovated, updated appliances, hardwood ..."
6,state,100.0,50,0,list:50,-,-,"[""Bed-Stuy"", ""Brooklyn"", ""New York City""] | [""..."


• Field 'listing_id' → coverage 100.0%
• Field 'state' → coverage 100.0%
• Field 'price' → coverage 100.0%
• Field 'bedrooms' → coverage 100.0%
• Field 'available_from' → coverage 100.0%

=== owner_profiles ===
Total points: 1012
Scanned: 1012 points (max_scan=2000)
ID lengths: min=36 max=36 median=36
Sample IDs: ['000f06c2-872e-96e6-dd72-c50e8e92923e', '008c41e9-ce04-57bd-e7df-d85c5844b56c', '009d35b9-ca73-94e2-0dff-b40bea252941']


Unnamed: 0,key,coverage_%,present_count,null_count,types,numeric_stats,small_cardinality,samples
0,application_date,100.0,1012,0,str:1012,-,-,2025-07-01 | 2025-07-19 | 2025-07-20 | 2025-07...
1,email,100.0,1012,0,str:1012,-,"10 distinct | top: user1@example.com(119), mhm...",user3@example.com | user5@example.com | mhmod....
2,full_name,100.0,1012,0,str:1012,-,-,Mia Rodriguez | Thomas Moore | Sara Moore | Av...
3,number_of_shows,100.0,1012,0,str:1012,"[min=0, max=0, count=1012]",1 distinct | top: 0(1012),0 | 0 | 0 | 0 | 0
4,phone,100.0,1012,0,str:1012,-,-,+1-555-4320 | +1-555-7108 | +1-555-1478 | +1-5...
5,profile_id,100.0,1012,0,str:1012,-,-,000f06c2-872e-96e6-dd72-c50e8e92923e | 008c41e...
6,type,100.0,1012,0,str:1012,-,1 distinct | top: owner(1012),owner | owner | owner | owner | owner



=== user_profiles ===
Total points: 1007
Scanned: 1007 points (max_scan=2000)
ID lengths: min=36 max=36 median=36
Sample IDs: ['00235b8e-07f6-72f4-98d8-51a511449524', '0025d524-935f-6e94-0e53-ee262d848ce6', '0052d54b-39dc-bc06-adb5-38eb13efeac0']


Unnamed: 0,key,coverage_%,present_count,null_count,types,numeric_stats,small_cardinality,samples
0,application_date,100.0,1007,0,str:1007,-,-,2025-07-18 | 2025-07-08 | 2025-07-12 | 2025-07...
1,email,100.0,1007,0,str:1007,-,"10 distinct | top: user2@example.com(131), use...",user7@example.com | user5@example.com | user7@...
2,full_name,100.0,1007,0,str:1007,-,-,Daniel Williams | Daniel Martin | John Brown |...
3,number_of_shows,100.0,1007,0,str:1007,"[min=0, max=0, count=1007]",1 distinct | top: 0(1007),0 | 0 | 0 | 0 | 0
4,phone,100.0,1007,0,str:1007,-,-,+1-555-4293 | +1-555-8036 | +1-555-2249 | +1-5...
5,profile_id,100.0,1007,0,str:1007,-,-,00235b8e-07f6-72f4-98d8-51a511449524 | 0025d52...
6,type,100.0,1007,0,str:1007,-,1 distinct | top: user(1007),user | user | user | user | user


## NeXT


In [11]:
# Starvation audit (read-only): suggest small relaxations to improve matches
from datetime import datetime, timezone, timedelta
from typing import List, Dict, Any, Optional, Tuple
from collections import defaultdict

from qdrant_client.models import Record
from qdrant_client.http import exceptions as qexc
from qdrant_client import QdrantClient

from utils.qdrant_connection import (
    client,
    OWNER_COLLECTION,
    USER_COLLECTION,
)

# If you have these constants in your utils, import them.
# Otherwise, set the collection names explicitly here:
try:
    from utils.qdrant_connection import OWNER_PROFILES_COLLECTION, USER_PROFILES_COLLECTION
except Exception:
    OWNER_PROFILES_COLLECTION = "owner_profiles"
    USER_PROFILES_COLLECTION  = "user_profiles"

# -------- helpers --------

def _parse_date(s: Optional[str]) -> Optional[datetime]:
    if not s or not isinstance(s, str):
        return None
    try:
        return datetime.strptime(s[:10], "%Y-%m-%d").replace(tzinfo=timezone.utc)
    except Exception:
        return None

def _days_since(d: Optional[datetime]) -> Optional[int]:
    if not d:
        return None
    return (datetime.now(timezone.utc) - d).days

def _as_list(x):
    if x is None:
        return []
    return x if isinstance(x, list) else [x]

# def _availability_ok(user_avail: Optional[str], owner_avail: Optional[str]) -> bool:
#     # basic month-prefix equality if both present; otherwise OK
#     if not user_avail or not owner_avail:
#         return True
#     return str(user_avail).strip().lower()[:3] == str(owner_avail).strip().lower()[:3]

def _hard_match_user_to_owner(user_p: dict, owner_p: dict) -> bool:
    """
    User seeks (budget >= price, min bedrooms <= offered, state overlap, avail OK)
    """
    u_states = set(map(lambda s: str(s).strip().lower(), _as_list(user_p.get("state"))))
    o_states = set(map(lambda s: str(s).strip().lower(), _as_list(owner_p.get("state"))))
    state_ok = (not u_states or not o_states) or bool(u_states & o_states)

    try:
        price_ok = (owner_p.get("price") is None) or (user_p.get("price") is None) \
                   or (float(owner_p["price"]) <= float(user_p["price"]))
    except Exception:
        price_ok = True

    try:
        beds_ok = (owner_p.get("bedrooms") is None) or (user_p.get("bedrooms") is None) \
                  or (int(owner_p["bedrooms"]) >= int(user_p["bedrooms"]))
    except Exception:
        beds_ok = True

    # avail_ok = _availability_ok(user_p.get("available_from"), owner_p.get("available_from"))
    # return bool(state_ok and price_ok and beds_ok and avail_ok)
    return bool(state_ok and price_ok and beds_ok)

def _get_point(collection: str, pid: str, with_payload=True, with_vectors=True) -> Optional[Record]:
    try:
        recs: List[Record] = client.retrieve(
            collection_name=collection,
            ids=[str(pid)],
            with_payload=with_payload,
            with_vectors=with_vectors,
        )
        return recs[0] if recs else None
    except Exception:
        return None

def _search_top(collection: str, vector, topn: int = 300) -> List[Record]:
    # Pure vector search; we’ll post-filter hard-attributes in Python
    if vector is None:
        return []
    try:
        return client.search(
            collection_name=collection,
            query_vector=vector,
            limit=topn,
        )
    except Exception:
        return []

def _count_matches_user(user_payload: dict, owners: List[Record]) -> int:
    cnt = 0
    for r in owners:
        p = r.payload or {}
        if _hard_match_user_to_owner(user_payload, p):
            cnt += 1
    return cnt

def _count_matches_owner(owner_payload: dict, users: List[Record]) -> int:
    cnt = 0
    for r in users:
        p = r.payload or {}
        if _hard_match_user_to_owner(p, owner_payload):  # reuse inverse rule
            cnt += 1
    return cnt

def _scroll_profiles(collection: str, page: int = 1000):
    next_page = None
    while True:
        recs, next_page = client.scroll(
            collection_name=collection,
            with_payload=True,
            with_vectors=False,
            limit=page,
            offset=next_page
        )
        for r in recs:
            yield r
        if next_page is None:
            break

# -------- main audit --------

def audit_starved_users_and_owners(
    price_delta = 100.0,  
    room_delta = 1,
    user_topn: int = 300,
    owner_topn: int = 300,
    days_threshold: int = 30,
    shows_threshold: int = 1,
    limit_users: Optional[int] = None,
    limit_owners: Optional[int] = None,
) -> Dict[str, List[Dict[str, Any]]]:
    """
    Finds starved users/owners and simulates small relaxations to see if matches improve.
    Read-only (no writes).
    Returns {"users": [...], "owners": [...]}
    """
    results = {"users": [], "owners": []}

    # --- Users ---
    processed = 0
    for prof in _scroll_profiles(USER_PROFILES_COLLECTION):
        p = prof.payload or {}
        # Filter by starvation criteria
        days = _days_since(_parse_date(p.get("application_date")))
        try:
            shows = int(p.get("number_of_shows", 0))
        except Exception:
            shows = 0

        if days is None or days < days_threshold or shows > shows_threshold:
            continue

        user_id = str(prof.id)
        # Retrieve listing point (vector + payload)
        urec = _get_point(USER_COLLECTION, user_id, with_payload=True, with_vectors=True)
        if not urec:
            continue

        up = urec.payload or {}
        uvec = urec.vector

        # Search candidate owners once (vector-only), then post-filter
        owner_cands = _search_top(OWNER_COLLECTION, uvec, topn=user_topn)

        # Baseline matches
        base_cnt = _count_matches_user(up, owner_cands)

        # Relaxation A: bedrooms need - room_delta
        up_relax_bed = dict(up)
        try:
            if up_relax_bed.get("bedrooms") is not None:
                up_relax_bed["bedrooms"] = max(0, int(up_relax_bed["bedrooms"]) - room_delta)
        except Exception:
            pass
        cnt_bed = _count_matches_user(up_relax_bed, owner_cands)

        # Relaxation B: budget +price_delta
        up_relax_price = dict(up)
        try:
            if up_relax_price.get("price") is not None:
                up_relax_price["price"] = float(up_relax_price["price"]) + price_delta
        except Exception:
            pass
        cnt_price = _count_matches_user(up_relax_price, owner_cands)

        results["users"].append({
            "user_id": user_id,
            "days_since_application": days,
            "number_of_shows": shows,
            "baseline_matches": base_cnt,
            "relax_bedrooms_minus_matches": cnt_bed,
            "relax_budget_plus_matches": cnt_price,
            "delta_bedrooms": cnt_bed - base_cnt,
            "delta_budget": cnt_price - base_cnt,
        })

        processed += 1
        if limit_users is not None and processed >= limit_users:
            break

    # --- Owners ---
    processed = 0
    for prof in _scroll_profiles(OWNER_PROFILES_COLLECTION):
        p = prof.payload or {}
        days = _days_since(_parse_date(p.get("application_date")))
        try:
            shows = int(p.get("number_of_shows", 0))
        except Exception:
            shows = 0

        if days is None or days < days_threshold or shows > shows_threshold:
            continue

        owner_id = str(prof.id)
        orec = _get_point(OWNER_COLLECTION, owner_id, with_payload=True, with_vectors=True)
        if not orec:
            continue

        op = orec.payload or {}
        ovec = orec.vector

        # Search candidate users once
        user_cands = _search_top(USER_COLLECTION, ovec, topn=owner_topn)

        # Baseline matches
        base_cnt = _count_matches_owner(op, user_cands)

        # Relaxation A (owner): price -price_delta (becomes more affordable)
        op_relax_price = dict(op)
        try:
            if op_relax_price.get("price") is not None:
                op_relax_price["price"] = max(0.0, float(op_relax_price["price"]) -price_delta)
        except Exception:
            pass
        cnt_price = _count_matches_owner(op_relax_price, user_cands)

        results["owners"].append({
            "owner_id": owner_id,
            "days_since_application": days,
            "number_of_shows": shows,
            "baseline_matches": base_cnt,
            "relax_price_minus_matches": cnt_price,
            "delta_price": cnt_price - base_cnt,
        })

        processed += 1
        if limit_owners is not None and processed >= limit_owners:
            break

    # Pretty print short summary
    def _top_n(arr, key, n=5, desc=True):
        return sorted(arr, key=lambda x: x.get(key, 0), reverse=desc)[:n]

    print(f"\n=== Starved USERS: top potential gains (bedrooms -{room_delta}) ===")
    for r in _top_n(results["users"], "delta_bedrooms"):
        print(f"user={r['user_id']}  base={r['baseline_matches']}  +bed:{r['relax_bedrooms_minus_matches']}  Δ={r['delta_bedrooms']}  days={r['days_since_application']} shows={r['number_of_shows']}")

    print(f"\n=== Starved USERS: top potential gains (budget +{price_delta}) ===")
    for r in _top_n(results["users"], "delta_budget"):
        print(f"user={r['user_id']}  base={r['baseline_matches']}  +$:{r['relax_budget_plus_matches']}  Δ={r['delta_budget']}  days={r['days_since_application']} shows={r['number_of_shows']}")

    print(f"\n=== Starved OWNERS: top potential gains (price -{price_delta}) ===")
    for r in _top_n(results["owners"], "delta_price"):
        print(f"owner={r['owner_id']}  base={r['baseline_matches']}  -$:{r['relax_price_minus_matches']}  Δ={r['delta_price']}  days={r['days_since_application']} shows={r['number_of_shows']}")

    return results


In [12]:
# ---- Run it (you can tweak limits while testing) ----
results = audit_starved_users_and_owners(
    user_topn=300,
    owner_topn=300,
    days_threshold=30,
    shows_threshold=1,
    limit_users=50,   # e.g., 50 for quick test
    limit_owners=50,  # e.g., 50 for quick test
)

  return client.search(



=== Starved USERS: top potential gains (bedrooms -1) ===
user=1b3dafdd-c068-5a4a-a23c-9734244b4af8  base=66  +bed:249  Δ=183  days=43 shows=0
user=1e40f8a0-a69f-ab66-f61f-85fe66d3f641  base=73  +bed:218  Δ=145  days=31 shows=0
user=199c2b60-8897-f863-01b1-57bfa9824fcf  base=67  +bed:200  Δ=133  days=35 shows=0
user=031c6fe9-744d-3c94-196e-48997db0d8f2  base=36  +bed:127  Δ=91  days=35 shows=0
user=02d2cc3a-23b3-0361-776c-0480c369f2d6  base=22  +bed:62  Δ=40  days=31 shows=0

=== Starved USERS: top potential gains (budget +100.0) ===
user=0422535c-58ad-ac69-c236-1dec59b11dd1  base=187  +$:201  Δ=14  days=36 shows=0
user=1d106037-8b71-de31-685e-779958859bd1  base=57  +$:70  Δ=13  days=39 shows=0
user=0025d524-935f-6e94-0e53-ee262d848ce6  base=250  +$:250  Δ=0  days=37 shows=0
user=0052d54b-39dc-bc06-adb5-38eb13efeac0  base=209  +$:209  Δ=0  days=33 shows=0
user=00c74781-0965-898f-4d9b-81d17f64ee9e  base=13  +$:13  Δ=0  days=34 shows=0

=== Starved OWNERS: top potential gains (price -100