In [1]:
# Basic data analysis libraries
import pandas as pd
import numpy as np

# Make tables display nicely in Jupyter
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 200)

In [2]:
# Path to your dataset folder (relative to notebooks/)
DATA_PATH = "../data/nexkey_synthetic_dataset_v1"

# Load CSV files
queries = pd.read_csv(f"{DATA_PATH}/queries.csv")
properties = pd.read_csv(f"{DATA_PATH}/properties.csv")
interactions = pd.read_csv(f"{DATA_PATH}/interactions.csv")

# Quick sanity check
print("Queries:", queries.shape)
print("Properties:", properties.shape)
print("Interactions:", interactions.shape)

Queries: (30000, 16)
Properties: (15000, 27)
Interactions: (480000, 4)


In [3]:
# Show 5 example user queries
queries.head()

Unnamed: 0,query_id,query_text,states,deal_type_pref,exit_strategy_pref,property_type_pref,beds_min,baths_min,sqft_min,year_built_min,condition_pref,occupancy_pref,purchase_price_max,arv_min,entry_fee_max,monthly_payment_max
0,1,Any seller finance or Co-Living deals in IL? E...,IL,Seller Finance,Co-Living,Manufactured,1,1.5,801,1967,Any,Tenant-Occupied,181969.0,163774.0,14785.0,2931.0
1,2,"Find me cash opportunities in TN with 1+ beds,...",TN,Cash,Any,Townhouse,1,1.5,878,1931,Heavy Rehab,Vacant,221361.0,303820.0,29468.0,1198.0
2,3,"Show me hybrid deals in MO, SC with at least 1...",MO|SC,Hybrid,LTR,Any,1,1.5,839,1995,Turnkey,Any,168685.0,219890.0,17730.0,800.0
3,4,"Show me hybrid deals in CA, AL with at least 3...",CA|AL,Hybrid,Fix & Flip,Single Family,3,1.0,788,1946,Light Rehab,Tenant-Occupied,325748.0,471854.0,6544.0,1569.0
4,5,"Need Multi-Family in CO, OR (turnkey condition...",CO|OR,Cash,Any,Multi-Family,1,1.5,1292,1938,Turnkey,Any,209787.0,194441.0,26328.0,2055.0


In [4]:
# Show 5 example properties (deals)
properties.head()

Unnamed: 0,property_id,deal_type,exit_strategy_hint,property_type,address,city,state,zip,county,beds,baths,sqft,year_built,condition,occupancy,purchase_price,arv,est_repairs,entry_fee,interest_rate,term_months,estimated_monthly_payment,mao_70_rule,days_on_market,latitude,longitude,source
0,1,Subto,Fix & Flip,Single Family,6952 Magnolia Blvd,Raleigh,NC,61750,Wake,4,1.0,3150,1974,Medium Rehab,Vacant,795154.0,1076754.0,303434.0,11202.0,6.0,360,5551.01,450294.0,29,34.142587,-120.241628,synthetic
1,2,Hybrid,Wholesale,Single Family,864 Pine Ln,Sacramento,CA,17747,Sacramento,5,1.5,2599,1996,Medium Rehab,Vacant,344961.0,487065.0,72413.0,13474.0,6.67,240,2859.29,268533.0,14,46.819081,-116.409342,synthetic
2,3,Cash,STR,Condo,1129 Mason Blvd,Charleston,SC,25439,Charleston,4,2.5,3132,1988,Light Rehab,Vacant,582686.0,816415.0,111704.0,582686.0,0.0,0,0.0,459786.0,8,36.457628,-103.144248,synthetic
3,4,Subto,LTR,Manufactured,9512 Magnum Way,Greenville,AL,66045,County,4,2.0,1737,1995,Turnkey,Tenant-Occupied,226715.0,302119.0,5708.0,18215.0,5.56,180,1971.98,205775.0,19,33.888426,-97.8317,synthetic
4,5,Seller Finance,Wholesale,Single Family,1372 Sikes Cir,Fairview,AL,68829,County,3,4.0,1892,2020,Turnkey,Owner-Occupied,349695.0,570318.0,95140.0,12371.0,7.88,180,3629.96,304083.0,9,25.034245,-118.741046,synthetic


In [5]:
# Show example interactions
interactions.head(10)

Unnamed: 0,query_id,property_id,relevance,match_score
0,1,7401,3,5.6761
1,1,10231,3,5.3172
2,1,5068,2,5.1669
3,1,11268,2,5.1313
4,1,14528,2,4.8833
5,1,11416,1,4.3003
6,1,6894,1,4.2206
7,1,12111,1,3.7474
8,1,10993,0,3.6462
9,1,10183,0,3.6108


In [6]:
# Pick a random query
sample_query_id = interactions.sample(1)["query_id"].iloc[0]

# Get the query text
query_text = queries.loc[
    queries["query_id"] == sample_query_id, "query_text"
].iloc[0]

print("USER QUERY:")
print(query_text)
print("\n" + "="*80 + "\n")

# Get top 5 matching deals for that query
top_matches = (
    interactions[interactions["query_id"] == sample_query_id]
    .sort_values("relevance", ascending=False)
    .head(5)
)

# Merge with property info
top_matches = top_matches.merge(
    properties, on="property_id", how="left"
)

# Show important deal info
top_matches[[
    "relevance",
    "deal_type",
    "city",
    "state",
    "beds",
    "baths",
    "purchase_price",
    "arv",
    "entry_fee",
    "estimated_monthly_payment"
]]

USER QUERY:
Any cash or Fix & Flip deals in CA? Entry fee under $12,883 and purchase under $118,092.




Unnamed: 0,relevance,deal_type,city,state,beds,baths,purchase_price,arv,entry_fee,estimated_monthly_payment
0,3,Cash,Sacramento,CA,3,2.0,352133.0,510331.0,352133.0,0.0
1,3,Cash,Riverside,CA,3,2.0,214652.0,324175.0,214652.0,0.0
2,2,Hybrid,Riverside,CA,3,2.0,271514.0,431986.0,12307.0,1793.65
3,2,Cash,Los Angeles,CA,5,2.0,448129.0,515469.0,448129.0,0.0
4,2,Cash,Riverside,CA,2,1.5,481654.0,660021.0,481654.0,0.0
