In [None]:
import numpy as np
import pandas as pd

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import cross_val_score


In [None]:
tr = pd.read_csv("train.csv")
ts = pd.read_csv("test.csv")

In [None]:
y = np.log1p(tr["SalePrice"])
X = tr.drop("SalePrice", axis=1)

In [None]:
ids = ts["Id"]

In [None]:
all_data = pd.concat([X, ts])

In [None]:
num_cols = all_data.select_dtypes(include=["int64", "float64"]).columns
cat_cols = all_data.select_dtypes(include=["object"]).columns

In [None]:


num_pipe = Pipeline([
    ("imp", SimpleImputer(strategy="median"))
])

cat_pipe = Pipeline([
    ("imp", SimpleImputer(strategy="most_frequent")),
    ("oh", OneHotEncoder(handle_unknown="ignore"))
])

prep = ColumnTransformer([
    ("num", num_pipe, num_cols),
    ("cat", cat_pipe, cat_cols)
])

In [None]:

gbr = GradientBoostingRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=4,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42
)


In [None]:

pipe = Pipeline([
    ("prep", prep),
    ("gbr", gbr)
])


In [None]:

r2 = cross_val_score(pipe, X, y, cv=5, scoring="r2")
print("R² scores:", r2)
print("Mean R²:", r2.mean())


In [None]:

pipe.fit(X, y)

In [None]:
pred = np.expm1(pipe.predict(ts))

In [None]:

sub = pd.DataFrame({
    "Id": ids,
    "SalePrice": pred
})

sub.to_csv("submission.csv", index=False)
print("submission.csv saved")


R² scores: [0.9117784  0.89109411 0.89872119 0.91325599 0.89703327]
Mean R²: 0.9023765926038998
submission.csv saved
