In [1]:
import json, math, random
import pandas as pd

DATASET = "dataset_embeddings_compact.csv"

# 12-dim baseline labels (same order we built them)
DIM_LABELS = [
    "count_pois",               # 0
    "mean_distance",            # 1
    "min_distance",             # 2
    "max_distance",             # 3
    "median_distance",          # 4
    "std_distance",             # 5
    "mean_inverse_distance",    # 6
    "max_inverse_distance",     # 7
    "sum_inverse_distance",     # 8
    "ratio_within_near_radius", # 9   (<= R1)
    "ratio_within_mid_radius",  # 10  (<= R2)
    "ratio_within_far_radius",  # 11  (<= R3)
]

POI_KEYS = [
    "emb_sport_and_leisure","emb_medical","emb_education_prim","emb_veterinary",
    "emb_food_and_drink_stores","emb_arts_and_entertainment","emb_food_and_drink",
    "emb_park_like","emb_security","emb_religion","emb_education_sup",
]
MB_KEYS = ["emb_metro", "emb_bus"]

def parse_json_list(v):
    """Return Python list from JSON/text, or None if missing/bad."""
    if v is None: return None
    if isinstance(v, float) and math.isnan(v): return None
    if isinstance(v, list): return v
    try:
        return json.loads(v)
    except Exception:
        return None

def print_vector(name, vec):
    """Pretty-print a 12-dim vector with labels; handle None gracefully."""
    if vec is None:
        print(f"  {name}: None")
        return
    if len(vec) != 12:
        print(f"  {name}: length={len(vec)} (expected 12) -> {vec}")
        return
    print(f"  {name}:")
    for i, (label, val) in enumerate(zip(DIM_LABELS, vec)):
        # Count as int; rest as float
        if i == 0:
            try:
                print(f"    dim{i:02d}: {label:<25} = {int(val)}")
            except Exception:
                print(f"    dim{i:02d}: {label:<25} = {val}")
        else:
            print(f"    dim{i:02d}: {label:<25} = {float(val):.6f}")


In [2]:
df = pd.read_csv(DATASET)
df["id"] = pd.to_numeric(df["id"], errors="coerce").astype("Int64")
print("Rows:", len(df), "Cols:", len(df.columns))
display(df.head(2))


Rows: 25215 Cols: 40


Unnamed: 0,id,monto,superficie_t,dormitorios,dormitorios_faltante,banos,banos_faltante,antiguedad,antiguedad_faltante,Or_N,...,emb_veterinary,emb_food_and_drink_stores,emb_arts_and_entertainment,emb_food_and_drink,emb_park_like,emb_security,emb_religion,emb_education_sup,emb_metro,emb_bus
0,1548097259,11200,92.0,2,0,2,0,1,0,0,...,"[1.0, 579.3739013671875, 579.3739013671875, 57...","[12.0, 528.0105369885763, 26.096040725708008, ...","[25.0, 1581.808360595703, 255.53167724609375, ...","[25.0, 419.6531335449219, 172.73153686523438, ...","[6.0, 493.31689453125, 265.3938903808594, 599....","[7.0, 1969.0926862444196, 1354.3077392578125, ...","[3.0, 504.3206278483073, 455.2417297363281, 58...","[7.0, 1452.850838797433, 781.4993896484375, 18...",,"[8.0, 252.7959213256836, 136.25413513183594, 3..."
1,2732119230,17490,275.0,3,0,4,0,2,0,0,...,"[1.0, 761.529296875, 761.529296875, 761.529296...","[10.0, 491.62200317382815, 150.35333251953125,...","[24.0, 1546.2690048217773, 284.37957763671875,...","[24.0, 340.82357088724774, 109.64118957519531,...","[3.0, 441.74968973795575, 251.0115203857422, 5...","[7.0, 2130.9099644252233, 1559.4659423828125, ...","[1.0, 297.7435607910156, 297.7435607910156, 29...","[7.0, 1511.268345424107, 979.7745971679688, 18...",,"[11.0, 270.19501426003194, 152.1919708251953, ..."


In [3]:
# Pick a random apartment id
row = df.sample(1, random_state=None).iloc[0]
aid = int(row["id"])
print(f"Apartment ID: {aid}\n")

# Print each POI class
for col in POI_KEYS:
    if col in df.columns:
        vec = parse_json_list(row.get(col))
        print_vector(col, vec)
    else:
        print(f"  {col}: (column missing)")
print()

# Print Metro & Bus
for col in MB_KEYS:
    if col in df.columns:
        vec = parse_json_list(row.get(col))
        print_vector(col, vec)
    else:
        print(f"  {col}: (column missing)")


Apartment ID: 1592118339

  emb_sport_and_leisure:
    dim00: count_pois                = 152
    dim01: mean_distance             = 656.081117
    dim02: min_distance              = 123.261864
    dim03: max_distance              = 2302.822266
    dim04: median_distance           = 645.479095
    dim05: std_distance              = 325.021337
    dim06: mean_inverse_distance     = 0.002081
    dim07: max_inverse_distance      = 0.008113
    dim08: sum_inverse_distance      = 0.316258
    dim09: ratio_within_near_radius  = 0.434211
    dim10: ratio_within_mid_radius   = 0.980263
    dim11: ratio_within_far_radius   = 1.000000
  emb_medical:
    dim00: count_pois                = 29
    dim01: mean_distance             = 1166.072811
    dim02: min_distance              = 314.228333
    dim03: max_distance              = 2324.206787
    dim04: median_distance           = 920.794678
    dim05: std_distance              = 719.595367
    dim06: mean_inverse_distance     = 0.001312
    dim07:

In [None]:
APT_ID = None  # put an integer id here to force a specific apt (e.g., 1540798899)

if APT_ID is not None:
    hit = df.loc[df["id"] == APT_ID]
    if hit.empty:
        print(f"id {APT_ID} not found")
    else:
        row = hit.iloc[0]
        aid = int(row["id"])
        print(f"Apartment ID: {aid}\n")
        for col in POI_KEYS:
            vec = parse_json_list(row.get(col))
            print_vector(col, vec)
        print()
        for col in MB_KEYS:
            vec = parse_json_list(row.get(col))
            print_vector(col, vec)
