In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer,StandardScaler
from sklearn.metrics import r2_score,mean_absolute_error,root_mean_squared_error

In [2]:
df=pd.read_csv('Encoded_House.csv')

x=df.drop(['log_price','price'],axis=1).values
y=df['log_price'].values.reshape(-1, 1)

#====== Test Train Split======
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

#====== Standerization ======
scaler_x = StandardScaler()
x_train = scaler_x.fit_transform(x_train)
x_test = scaler_x.transform(x_test)

#### SVM

In [9]:
from sklearn.svm import SVR

sv=SVR()
sv.fit(x_train,y_train.ravel())
y_pred_svm=sv.predict(x_test)

#### Decision Tree

In [10]:
from sklearn.tree import DecisionTreeRegressor

dt=DecisionTreeRegressor()
dt.fit(x_train,y_train)
y_pred_dt=dt.predict(x_test)

#### Random Forest

In [12]:
from sklearn.ensemble import RandomForestRegressor

rf=RandomForestRegressor(n_estimators=100)
rf.fit(x_train,y_train.ravel())
y_pred_rf=rf.predict(x_test)

#### XGBoost

In [13]:
from xgboost import XGBRegressor

xgb = XGBRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
xgb.fit(x_train, y_train)
y_pred_xgb = xgb.predict(x_test)

#### Evalution

In [14]:
# ====== Evalution ======
def evaluate(y_true, y_pred, label="Model"):
    r2 = r2_score(y_true, y_pred)
    rmse = root_mean_squared_error(y_true, y_pred)
    print(f"{label} [ R²: {r2:.6f}, RMSE: {rmse:.5f} ]")

evaluate(y_test, y_pred_svm, "SVR")
evaluate(y_test, y_pred_dt, "Decision Tree")
evaluate(y_test, y_pred_rf, "Random Forest")
evaluate(y_test, y_pred_xgb, "XGBoost")

SVR [ R²: 0.625968, RMSE: 0.43844 ]
Decision Tree [ R²: 0.560552, RMSE: 0.47523 ]
Random Forest [ R²: 0.774021, RMSE: 0.34079 ]
XGBoost [ R²: 0.760819, RMSE: 0.35060 ]
