In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

In [2]:
import joblib

In [3]:
DATA_PATH = "../data/nexkey_synthetic_dataset_v1"

queries = pd.read_csv(f"{DATA_PATH}/queries.csv")
properties = pd.read_csv(f"{DATA_PATH}/properties.csv")
interactions = pd.read_csv(f"{DATA_PATH}/interactions.csv")

In [4]:
class DealRanker(nn.Module):
    def __init__(self, input_dim):
        super().__init__()

        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.net(x).squeeze(1)

In [5]:
FEATURE_DIM = 14  # 7 query + 7 property features

model = DealRanker(FEATURE_DIM)
model.load_state_dict(
    torch.load("../models/checkpoints/numeric_ranker.pt")
)

model.eval()  # important: inference mode

DealRanker(
  (net): Sequential(
    (0): Linear(in_features=14, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [6]:
scaler = joblib.load("../models/checkpoints/numeric_scaler.joblib")
print("Scaler loaded!")

Scaler loaded!


In [7]:
QUERY_FEATURES = [
    "beds_min",
    "baths_min",
    "sqft_min",
    "purchase_price_max",
    "arv_min",
    "entry_fee_max",
    "monthly_payment_max",
]

PROPERTY_FEATURES = [
    "beds",
    "baths",
    "sqft",
    "purchase_price",
    "arv",
    "entry_fee",
    "estimated_monthly_payment",
]

In [8]:
# Pick a random query
query_row = queries.sample(1).iloc[0]

print("USER QUERY:")
print(query_row["query_text"])

USER QUERY:
Show me hybrid deals in TX, UT with at least 1+ beds and 1.5+ baths under $200,902.


In [9]:
# --- Hard filters: remove clearly impossible deals before ML scoring ---
filtered = properties.copy()

filtered = filtered[filtered["beds"] >= query_row["beds_min"]]
filtered = filtered[filtered["baths"] >= query_row["baths_min"]]
filtered = filtered[filtered["sqft"] >= query_row["sqft_min"]]

filtered = filtered[filtered["purchase_price"] <= query_row["purchase_price_max"]]
filtered = filtered[filtered["arv"] >= query_row["arv_min"]]

filtered = filtered[filtered["entry_fee"] <= query_row["entry_fee_max"]]
filtered = filtered[filtered["estimated_monthly_payment"] <= query_row["monthly_payment_max"]]

print("Properties before filter:", len(filtered))
print("Properties after filter:", len(filtered))

Properties before filter: 6
Properties after filter: 6


In [10]:
# --- SAFETY: force numeric types, and handle missing values ---

# 1) Make sure query values are numeric float32
query_vec = (
    pd.to_numeric(query_row[QUERY_FEATURES], errors="coerce")
    .astype(np.float32)
    .values
)

# 2) Make sure property features are numeric float32
property_mat = (
    filtered[PROPERTY_FEATURES]
    .apply(pd.to_numeric, errors="coerce")
    .astype(np.float32)
    .values
)

# 3) Replace NaNs if any (important for torch)
query_vec = np.nan_to_num(query_vec, nan=0.0)
property_mat = np.nan_to_num(property_mat, nan=0.0)

# 4) Repeat query vector for each property row
query_mat = np.repeat(query_vec.reshape(1, -1), len(filtered), axis=0).astype(np.float32)

# 5) Combine into model input matrix
X = np.hstack([query_mat, property_mat]).astype(np.float32)

# 6) Convert to torch tensor
X = torch.from_numpy(X)

In [11]:
print("X dtype:", X.dtype)
print("X shape:", X.shape)
print("Any NaNs?", torch.isnan(X).any().item())

X dtype: torch.float32
X shape: torch.Size([6, 14])
Any NaNs? False


In [12]:
# Convert tensor -> numpy for scaler -> back to torch
X_np = X.numpy()
X_scaled = scaler.transform(X_np).astype(np.float32)
X = torch.from_numpy(X_scaled)

In [13]:
with torch.no_grad():
    scores = model(X)

In [14]:
filtered = filtered.copy()
filtered["score"] = scores.numpy()

top_5 = filtered.sort_values("score", ascending=False).head(5)

In [15]:
top_5[[
    "deal_type",
    "city",
    "state",
    "beds",
    "baths",
    "purchase_price",
    "arv",
    "entry_fee",
    "estimated_monthly_payment",
    "score"
]]

Unnamed: 0,deal_type,city,state,beds,baths,purchase_price,arv,entry_fee,estimated_monthly_payment,score
8801,DSCR Carryback,Franklin,AL,3,2.0,145065.0,246200.0,6312.0,1041.08,2.207043
6153,Hybrid,Charlotte,NC,3,2.0,168730.0,290192.0,5674.0,1191.05,2.196916
1057,Subto,Riverton,LA,3,3.5,175373.0,303625.0,6847.0,1346.37,2.180548
14536,DSCR Carryback,Knoxville,TN,4,3.0,176660.0,307933.0,6181.0,1309.9,2.17999
1198,Subto,Franklin,MI,5,4.0,194122.0,339742.0,7559.0,1307.48,2.080359
