In [7]:
!pip install xgboost



In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBRegressor  
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

df = pd.read_csv('D:/house_data.csv')
print(df.columns)

imputer = SimpleImputer(strategy='mean')  
df_imputed = pd.DataFrame(imputer.fit_transform(df.select_dtypes(include=['float64', 'int64'])))
df[df_imputed.columns] = df_imputed

df = df.drop(columns=['Date'])

encoder = LabelEncoder()
df['Postal Code'] = encoder.fit_transform(df['Postal Code'].astype(str))

X = df.drop('Price', axis=1)  
y = df['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = XGBRegressor()
model.fit(X_train, y_train)

param_grid = {
    'max_depth': [3, 5],
    'learning_rate': [0.01, 0.1],
    'n_estimators': [100, 200]
}

grid_search = GridSearchCV(estimator=XGBRegressor(), param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

y_pred_default = model.predict(X_test)
y_pred_tuned = best_model.predict(X_test)

mse_default = mean_squared_error(y_test, y_pred_default)
mse_tuned = mean_squared_error(y_test, y_pred_tuned)

print(f"Best Parameters: {grid_search.best_params_}")
print(f"MSE (Default Model): {mse_default}")
print(f"MSE (Tuned Model): {mse_tuned}")


Index(['id', 'Date', 'number of bedrooms', 'number of bathrooms',
       'living area', 'lot area', 'number of floors', 'waterfront present',
       'number of views', 'condition of the house', 'grade of the house',
       'Area of the house(excluding basement)', 'Area of the basement',
       'Built Year', 'Renovation Year', 'Postal Code', 'Lattitude',
       'Longitude', 'living_area_renov', 'lot_area_renov',
       'Number of schools nearby', 'Distance from the airport', 'Price'],
      dtype='object')
Best Parameters: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 200}
MSE (Default Model): 4431736909.993363
MSE (Tuned Model): 6293635066.762872
