In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import StackingRegressor
from sklearn.metrics import root_mean_squared_error

In [None]:
import pandas as pd

df = pd.read_csv('Housing.csv')
df

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished
...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,1820000,3000,2,1,1,yes,no,yes,no,no,2,no,unfurnished
541,1767150,2400,3,1,1,no,no,no,no,no,0,no,semi-furnished
542,1750000,3620,2,1,1,yes,no,no,no,no,0,no,unfurnished
543,1750000,2910,3,1,1,no,no,no,no,no,0,no,furnished


In [3]:
binary_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
for col in binary_cols:
    df[col] = df[col].map({'yes': 1, 'no': 0})

df = pd.get_dummies(df, columns=['furnishingstatus'], drop_first=True)

x = df.drop('price', axis=1)
y = df['price']

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((436, 13), (109, 13), (436,), (109,))

In [5]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [6]:
base_models = [
    ('knn', KNeighborsRegressor(n_neighbors=5)),
    ('dt', DecisionTreeRegressor(random_state=42))
]

meta_model = LinearRegression()

stacking_model = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model
)

In [7]:
results = {}
for name, model in [('KNN', KNeighborsRegressor(n_neighbors=5)), 
                    ('Decision Tree', DecisionTreeRegressor(random_state=42)), 
                    ('Stacking', stacking_model)]:
    model.fit(x_train_scaled, y_train)
    y_pred = model.predict(x_test_scaled)
    rmse = root_mean_squared_error(y_test, y_pred)
    results[name] = rmse

for name, rmse in results.items():
    print(f"RMSE của {name}: {rmse:,.2f}")

RMSE của KNN: 1,398,656.73
RMSE của Decision Tree: 1,625,669.90
RMSE của Stacking: 1,331,034.37
