In [1]:
import re
import os
import joblib
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

In [2]:
DATA_PATH = "../data/nexkey_synthetic_dataset_v1"

properties = pd.read_csv(f"{DATA_PATH}/properties.csv")

PROPERTY_FEATURES = [
    "beds",
    "baths",
    "sqft",
    "purchase_price",
    "arv",
    "entry_fee",
    "estimated_monthly_payment",
]

QUERY_FEATURES = [
    "beds_min",
    "baths_min",
    "sqft_min",
    "purchase_price_max",
    "arv_min",
    "entry_fee_max",
    "monthly_payment_max",
]

class DealRanker(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(1)

# Load trained model
model = DealRanker(input_dim=len(QUERY_FEATURES) + len(PROPERTY_FEATURES))
model.load_state_dict(torch.load("../models/checkpoints/numeric_ranker.pt"))
model.eval()

# Load scaler trained in Step 4
scaler = joblib.load("../models/checkpoints/numeric_scaler.joblib")
print("Loaded model + scaler ✅")

Loaded model + scaler ✅


In [3]:
def parse_user_prompt(prompt: str) -> dict:
    """
    Turn a user message into numeric filters.
    We keep it simple and predictable for now.
    """
    text = prompt.lower()

    # Defaults (reasonable investor defaults)
    q = {
        "beds_min": 2,
        "baths_min": 1.0,
        "sqft_min": 800,
        "purchase_price_max": 500000,
        "arv_min": 250000,
        "entry_fee_max": 40000,
        "monthly_payment_max": 3500,
    }

    # beds like "3 bed", "3 beds", "3+ bed"
    m = re.search(r"(\d+)\s*\+?\s*bed", text)
    if m:
        q["beds_min"] = int(m.group(1))

    # baths like "2 bath", "2.5 baths"
    m = re.search(r"(\d+(\.\d+)?)\s*\+?\s*bath", text)
    if m:
        q["baths_min"] = float(m.group(1))

    # sqft like "1500 sqft"
    m = re.search(r"(\d{3,5})\s*sq\s*ft|(\d{3,5})\s*sqft", text)
    if m:
        q["sqft_min"] = int([x for x in m.groups() if x][0])

    # money helper: supports 350k, 1.2m, 350000
    def parse_money(s):
        s = s.replace(",", "").strip()
        if s.endswith("k"):
            return float(s[:-1]) * 1000
        if s.endswith("m"):
            return float(s[:-1]) * 1_000_000
        return float(s)

    # purchase under like "under 350k", "max 400k", "$300000"
    m = re.search(r"(under|max|<=)\s*\$?\s*([\d\.,]+[km]?)", text)
    if m:
        q["purchase_price_max"] = int(parse_money(m.group(2)))

    # entry fee like "entry under 20k", "down under 15k"
    m = re.search(r"(entry|down)\s*(under|max|<=)?\s*\$?\s*([\d\.,]+[km]?)", text)
    if m:
        q["entry_fee_max"] = int(parse_money(m.group(3)))

    # monthly payment like "payment under 2500"
    m = re.search(r"(payment|monthly)\s*(under|max|<=)?\s*\$?\s*([\d\.,]+[km]?)", text)
    if m:
        q["monthly_payment_max"] = int(parse_money(m.group(3)))

    # ARV like "arv at least 450k", "arv >= 400k"
    m = re.search(r"arv\s*(at least|>=|minimum|min)?\s*\$?\s*([\d\.,]+[km]?)", text)
    if m:
        q["arv_min"] = int(parse_money(m.group(2)))

    return q

In [4]:
def recommend_from_prompt(prompt: str, top_k: int = 5):
    q = parse_user_prompt(prompt)

    # --- hard filters (same logic as Step 4) ---
    filtered = properties.copy()
    filtered = filtered[filtered["beds"] >= q["beds_min"]]
    filtered = filtered[filtered["baths"] >= q["baths_min"]]
    filtered = filtered[filtered["sqft"] >= q["sqft_min"]]
    filtered = filtered[filtered["purchase_price"] <= q["purchase_price_max"]]
    filtered = filtered[filtered["arv"] >= q["arv_min"]]
    filtered = filtered[filtered["entry_fee"] <= q["entry_fee_max"]]
    filtered = filtered[filtered["estimated_monthly_payment"] <= q["monthly_payment_max"]]

    if len(filtered) == 0:
        print("No deals match those hard filters. Try loosening price/entry/payment.")
        return None

    # --- build model input ---
    query_vec = np.array([
        q["beds_min"],
        q["baths_min"],
        q["sqft_min"],
        q["purchase_price_max"],
        q["arv_min"],
        q["entry_fee_max"],
        q["monthly_payment_max"],
    ], dtype=np.float32)

    property_mat = (
        filtered[PROPERTY_FEATURES]
        .apply(pd.to_numeric, errors="coerce")
        .astype(np.float32)
        .values
    )

    query_mat = np.repeat(query_vec.reshape(1, -1), len(filtered), axis=0).astype(np.float32)
    X = np.hstack([query_mat, property_mat]).astype(np.float32)

    # --- scale exactly like training ---
    X_scaled = scaler.transform(X).astype(np.float32)
    X_t = torch.from_numpy(X_scaled)

    # --- score ---
    with torch.no_grad():
        scores = model(X_t).numpy()

    out = filtered.copy()
    out["score"] = scores
    out = out.sort_values("score", ascending=False).head(top_k)

    # nice display
    cols = ["deal_type","city","state","beds","baths","sqft","purchase_price","arv","entry_fee","estimated_monthly_payment","score"]
    return out[cols], q

In [5]:
user_prompt = "Looking for 3 beds, under 350k, entry under 20k, payment under 2500"
results, parsed = recommend_from_prompt(user_prompt, top_k=5)

print("USER PROMPT:", user_prompt)
print("PARSED FILTERS:", parsed)
results

USER PROMPT: Looking for 3 beds, under 350k, entry under 20k, payment under 2500
PARSED FILTERS: {'beds_min': 3, 'baths_min': 1.0, 'sqft_min': 800, 'purchase_price_max': 350000, 'arv_min': 250000, 'entry_fee_max': 20000, 'monthly_payment_max': 2500}


Unnamed: 0,deal_type,city,state,beds,baths,sqft,purchase_price,arv,entry_fee,estimated_monthly_payment,score
6153,Hybrid,Charlotte,NC,3,2.0,1785,168730.0,290192.0,5674.0,1191.05,1.829119
1057,Subto,Riverton,LA,3,3.5,1948,175373.0,303625.0,6847.0,1346.37,1.816079
2997,DSCR Carryback,Springfield,OK,3,3.0,1708,148324.0,264185.0,8025.0,1001.78,1.814215
4568,Subto,Springfield,UT,4,3.0,1555,243398.0,426858.0,7966.0,1729.83,1.801673
12395,Land Deal,Fairview,OR,3,3.0,2305,242582.0,441058.0,8485.0,1789.8,1.800506
