In [39]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_percentage_error
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

In [40]:
df = pd.read_csv("../Dataset/house_prices_with_missing.csv")

In [41]:
df = df.dropna()

In [42]:
df

Unnamed: 0,ID,Status,Gender,Married,Education,Self_Employed,Area,Coapplicant,Dependents,Income,Loan_Amount,Property_Age,Bedrooms,Bathrooms,Area_SqFt,Price
0,1,Y,Female,Yes,Not Graduate,No,Urban,No,1,67034.0,200940.0,11,1.0,2.0,1794.0,913919
3,4,Y,Male,Yes,Graduate,No,Rural,No,0,40871.0,294864.0,42,5.0,3.0,1395.0,844871
5,6,Y,Male,No,Graduate,No,Semiurban,Yes,0,34151.0,251176.0,35,1.0,3.0,1658.0,793236
6,7,Y,Female,No,Graduate,No,Semiurban,No,1,68346.0,208764.0,14,4.0,2.0,1244.0,922017
7,8,N,Male,Yes,Graduate,No,Semiurban,No,0,46117.0,133163.0,14,5.0,3.0,2588.0,1106206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
488,489,N,Male,No,Graduate,Yes,Rural,No,2,48327.0,226881.0,19,5.0,2.0,834.0,722523
491,492,Y,Male,No,Not Graduate,Yes,Urban,No,2,43795.0,136048.0,42,5.0,3.0,4091.0,1382792
493,494,Y,Female,No,Graduate,No,Urban,No,0,31172.0,158285.0,9,5.0,1.0,3500.0,1320534
494,495,Y,Female,No,Graduate,No,Semiurban,No,0,48085.0,309204.0,17,1.0,1.0,2358.0,946032


In [43]:
df['Status'] = df['Status'].map({'Y': 1, 'N': 0})
df['Gender'] = df['Gender'].map({'Male': 1, 'Female': 0})
df['Married'] = df['Married'].map({'Yes': 1, 'No': 0})
df['Education'] = df['Education'].map({'Graduate': 1, 'Not Graduate': 0})
df['Self_Employed'] = df['Self_Employed'].map({'Yes': 1, 'No': 0})
df['Area'] = df['Area'].map({'Urban': 1, 'Semiurban': 2, 'Rural': 3})
df['Coapplicant'] = df['Coapplicant'].map({'Yes': 1, 'No': 0})
df['Dependents'] = df['Dependents'].map({'0': 0, '1': 1, '2': 2, '3+': 3})

In [44]:
# กำหนด Feature และ Target
X = df.drop(columns=['ID', 'Price'])  # Features
y = df['Price']  # Target (ราคาบ้าน)

In [45]:
# แบ่งชุดข้อมูลเป็น Train/Test (80% เทรน, 20% ทดสอบ)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [46]:
# Standardize ข้อมูล (ทำให้ข้อมูลอยู่ใน Scale เดียวกัน)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [47]:
# สร้างและเทรนโมเดล KNN
knr = KNeighborsRegressor(n_neighbors=5, weights='distance')  # ใช้ 5 neighbors
knr.fit(X_train, y_train)

In [48]:
# ทำนายผล
y_pred = knr.predict(X_test)

In [49]:
# คำนวณ MAPE (Mean Absolute Percentage Error)
mape = mean_absolute_percentage_error(y_test, y_pred)

# แสดงผลเป็นเปอร์เซ็นต์
accuracy_percent = 100 - (mape * 100)

print(f"Accuracy: {accuracy_percent:.2f}%")


Accuracy: 87.30%


In [50]:
with open('modelKnr.pkl', 'wb') as f:
    pickle.dump(knr, f)

In [51]:
with open("scalerKNR.pkl", "wb") as file:
    pickle.dump(scaler, file)