In [2]:
import matplotlib
import sklearn
import numpy as np
import pandas as pd
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import tensorflow as tf
import os


HDB_model

In [3]:
import xgboost as xgb
import pandas as pd
import numpy as np
import re
import json

def predict_hdb_price(house_info, model, X_columns_hdb):
    df_input = pd.DataFrame([house_info])

    storey_order = [f"{i:02d} TO {i+2:02d}" for i in range(1, 100, 3)]
    storey_map = {v: i for i, v in enumerate(storey_order)}
    df_input["storey_range"] = df_input["storey_range"].map(storey_map).fillna(-1).astype(int)

    df_input["month"] = pd.to_datetime(df_input["month"], format="%Y-%m", errors="coerce")
    df_input["year"] = df_input["month"].dt.year
    df_input["month_num"] = df_input["month"].dt.month
    df_input = df_input.drop(columns=["month"])

    def lease_to_months(x):
        if isinstance(x, str):
            years = int(re.search(r"(\d+)\s*year", x).group(1)) if re.search(r"(\d+)\s*year", x) else 0
            months = int(re.search(r"(\d+)\s*month", x).group(1)) if re.search(r"(\d+)\s*month", x) else 0
            return years * 12 + months
        return np.nan

    df_input["remaining_lease_months"] = df_input["remaining_lease"].apply(lease_to_months)
    df_input = df_input.drop(columns=["remaining_lease"])


    df_input = pd.get_dummies(df_input, columns=["flat_type"], drop_first=True)


    df_input = df_input.reindex(columns=X_columns_hdb, fill_value=0)

    y_pred = model.predict(df_input)[0]
    return y_pred


with open("hdb_features.json", "r") as f:
    X_columns_hdb = json.load(f)

model_hdb = xgb.XGBRegressor()
model_hdb.load_model("hdb_model.json")

house_info = {
    "month": "2021-11",
    "flat_type": "EXECUTIVE",
    "storey_range": "04 TO 06",
    "floor_area_sqm": 140,
    "lease_commence_date": 1997,
    "remaining_lease": "74 years 11 months",
    "postal": 650627
}

pred_price = predict_hdb_price(house_info, model_hdb, X_columns_hdb)
print(f"预测房价: ${pred_price:,.0f}")


预测房价: $685,930


Condo model

In [None]:
import xgboost as xgb
import pandas as pd
import numpy as np
from datetime import datetime
import pickle
import json
import category_encoders

def predict_price(house_info, model, te, X_columns, current_year=datetime.now().year):
    df_input = pd.DataFrame([house_info])

    df_input["building_age"] = current_year - df_input["year_completed"]
    df_input["is_year_missing"] = df_input["year_completed"].isna().astype(int)

    df_input["project_name_enc"] = te.transform(df_input[["project_name"]])

    df_input = pd.get_dummies(df_input, columns=["type_of_sale", "market_segment"], drop_first=True)

    df_input = df_input.reindex(columns=X_columns, fill_value=0)

    y_pred_log = model.predict(df_input)
    y_pred_psf = np.expm1(y_pred_log)[0]

    price = y_pred_psf * df_input["area_sqft"].iloc[0]
    return price

with open("target_encoder_condo.pkl", "rb") as f:
    te = pickle.load(f)

with open("condo_features.json", "r") as f:
    X_columns = json.load(f)


model_condo = xgb.XGBRegressor()
model_condo.load_model("condo_model.json")

house_info = {'project_name': 'NORMANTON PARK', 'area_sqft': 721.19, 'type_of_sale': 'Resale', 'tenure': np.int64(99), 'year_completed': np.float64(2018.0), 'postal_district': np.int64(19), 'market_segment': 'Outside Central Region', 'sale_year': 2077, 'sale_month': 12, 'floor_level_num': 13.0}

pred_price = predict_price(house_info, model_condo, te, X_columns)
print(f"预测总价: ${pred_price:,.2f}")


预测总价: $849,540.08
