In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
data = pd.read_csv('train.csv')

In [None]:
data.head()

# eda & cleaning

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
missing_values=data.isnull().sum()
print("Missing values:\n", missing_values[missing_values>0].sort_values(ascending=False))

In [None]:
numeric_features = data.select_dtypes(include=[np.number]).columns.tolist()
print(f"Numeric features: {len(numeric_features)}")

In [None]:
'SalePrice' in numeric_features

In [None]:
data_numeric = data[numeric_features].copy()

In [None]:
data_numeric.fillna(data_numeric.median(), inplace=True)

# split the data

In [None]:
X = data_numeric.drop("SalePrice", axis=1)
y = data_numeric["SalePrice"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Training Set:", X_train.shape)
print("Test Set:", X_test.shape)

# build the stacked regressor

In [None]:
base_models = [
    ('rf', RandomForestRegressor(n_estimators=50, random_state=42)),
    ('gbr', GradientBoostingRegressor(n_estimators=50, random_state=42)),
    ('svr', SVR())
]

#final estimator
meta_model = LinearRegression()

stacked_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    cv=5,
    n_jobs=-1
)

model_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('stacked_regressor', stacked_regressor),
])

# train the model and evaluate metrics

In [None]:
model_pipeline.fit(X_train, y_train)

In [None]:
y_pred = model_pipeline.predict(X_test)

# metrics

In [None]:
mae = np.mean(np.abs(y_test - y_pred))
print(f"Test MAE: {mae:.2f}")

In [None]:
mse = mean_squared_error(y_test, y_pred)
print(f"Test MSE: {mse:.2f}")

In [None]:
rmse = np.sqrt(mse)
(f"Test RMSE: {rmse:.2f}")

In [None]:
r2 = r2_score(y_test, y_pred)
print(f"Test R2: {r2:.2f}")

In [None]:
sample_input = X_test.iloc[[0]]
sample_input

In [None]:
sample_prediction = model_pipeline.predict(sample_input)
print(f"Prediction: {sample_prediction}")