In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor

from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split, GridSearchCV, KFold, cross_val_score
from sklearn.metrics import root_mean_squared_error, r2_score

import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

import numpy as np
import math
from scipy.stats import skew

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
PATH_TRAIN = r"..\datasets\train.csv"
PATH_TEST = r"..\datasets\test.csv"

In [None]:
df_train = pd.read_csv(PATH_TRAIN)
df_test = pd.read_csv(PATH_TEST)

In [None]:
print("Shape Train:", df_train.shape)
print("Shape Test:", df_test.shape)

In [None]:
DROPPED_COL = ["Id", "Utilities"]
X = df_train.drop(["SalePrice", *DROPPED_COL], axis=1)
y = df_train["SalePrice"]

In [None]:
X, X_temp, y, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.543, random_state=42)

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin


class GroupMedianImputer(BaseEstimator, TransformerMixin):
    def __init__(self, group_col, target_col):
        self.group_col = group_col
        self.target_col = target_col

    def fit(self, X, y=None):
        self.median_values = X.groupby(self.group_col)[
            self.target_col].median()
        self.global_median_ = X[self.target_col].median()
        return self

    def transform(self, X):
        df = X.copy()
        df[self.target_col] = df[self.target_col].fillna(
            df[self.group_col].map(self.median_values))
        df[self.target_col] = df[self.target_col].fillna(self.global_median_)
        return df

In [None]:
ORDINAL_MAPS = [

    # -------------------- Quality (Premium features) --------------------
    {
        "columns": [
            "ExterQual", "KitchenQual", "HeatingQC"
        ],
        "ordinalMap": {
            "Ex": 5,
            "Gd": 4,
            "TA": 3,
            "Fa": 2,
            "Po": 1,
            np.nan: 0
        }
    },

    # -------------------- Condition (Depreciation factors) --------------------
    {
        "columns": [
            "ExterCond", "BsmtCond", "GarageCond"
        ],
        "ordinalMap": {
            "Ex": 5,
            "Gd": 4,
            "TA": 3,
            "Fa": 2,
            "Po": 1,
            np.nan: 0
        }
    },

    # -------------------- Optional Feature Quality --------------------
    {
        "columns": [
            "BsmtQual", "FireplaceQu", "GarageQual", "PoolQC"
        ],
        "ordinalMap": {
            "Ex": 5,
            "Gd": 4,
            "TA": 3,
            "Fa": 2,
            "Po": 1,
            np.nan: 0  # No feature present
        }
    },

    # -------------------- Basement Finish / Exposure --------------------
    {
        "columns": ["BsmtExposure"],
        "ordinalMap": {
            "Gd": 4,
            "Av": 3,
            "Mn": 2,
            "No": 1,
            np.nan: 0
        }
    },
    {
        "columns": ["BsmtFinType1", "BsmtFinType2"],
        "ordinalMap": {
            "GLQ": 6,
            "ALQ": 5,
            "BLQ": 4,
            "Rec": 3,
            "LwQ": 2,
            "Unf": 1,
            np.nan: 0
        }
    },

    # -------------------- Garage --------------------
    {
        "columns": ["GarageFinish"],
        "ordinalMap": {
            "Fin": 3,
            "RFn": 2,
            "Unf": 1,
            np.nan: 0
        }
    },

    # -------------------- Functional Rating (NEW) --------------------
    {
        "columns": ["Functional"],
        "ordinalMap": {
            "Typ": 8,   # Typical
            "Min1": 7,  # Minor Deductions 1
            "Min2": 6,  # Minor Deductions 2
            "Mod": 5,   # Moderate Deductions
            "Maj1": 4,  # Major Deductions 1
            "Maj2": 3,  # Major Deductions 2
            "Sev": 2,   # Severely Damaged
            "Sal": 1    # Salvage only
        }
    },

    # -------------------- Access Features --------------------
    {
        "columns": ["PavedDrive"],
        "ordinalMap": {
            "Y": 2,
            "P": 1,
            "N": 0
        }
    },
    {
        "columns": ["Street"],
        "ordinalMap": {
            "Pave": 1,
            "Grvl": 0
        }
    },
    {
        "columns": ["Alley"],
        "ordinalMap": {
            "Pave": 2,
            "Grvl": 1,
            np.nan: 0  # No alley access
        }
    },

    # -------------------- Binary Features --------------------
    {
        "columns": ["CentralAir"],
        "ordinalMap": {
            "Y": 1,
            "N": 0
        }
    },

    # -------------------- Lot Characteristics --------------------
    {
        "columns": ["LotShape"],
        "ordinalMap": {
            "Reg": 3,  # Regular
            "IR1": 2,  # Slightly irregular
            "IR2": 1,  # Moderately irregular
            "IR3": 0   # Irregular
        }
    },
    {
        "columns": ["LandContour"],
        "ordinalMap": {
            "Lvl": 3,  # Near Flat/Level
            "Bnk": 2,  # Banked
            "HLS": 1,  # Hillside
            "Low": 0   # Depression
        }
    },
    {
        "columns": ["LandSlope"],
        "ordinalMap": {
            "Gtl": 2,  # Gentle slope
            "Mod": 1,  # Moderate slope
            "Sev": 0   # Severe slope
        }
    },

    # -------------------- Utilities / Electrical --------------------
    {
        "columns": ["Electrical"],
        "ordinalMap": {
            "SBrkr": 5,  # Standard Circuit Breakers
            "FuseA": 4,  # Fuse Box over 60 AMP
            "FuseF": 3,  # 60 AMP Fuse Box
            "FuseP": 2,  # 60 AMP Fuse Box (Poor)
            "Mix": 1,    # Mixed
            np.nan: 0
        }
    },

    # -------------------- Fence --------------------
    {
        "columns": ["Fence"],
        "ordinalMap": {
            "GdPrv": 4,  # Good Privacy
            "MnPrv": 3,  # Minimum Privacy
            "GdWo": 2,   # Good Wood
            "MnWw": 1,   # Minimum Wood/Wire
            np.nan: 0    # No Fence
        }
    }
]


def map_ordinal(X, verbose=False):
    """
    Apply ordinal mappings with comprehensive error handling.

    Args:
        X: DataFrame to transform
        verbose: If True, prints mapping statistics

    Returns:
        DataFrame with ordinal mappings applied
    """
    X = X.copy()

    for item in ORDINAL_MAPS:
        ordinal_map = item["ordinalMap"]

        for col in item["columns"]:
            if col not in X.columns:
                if verbose:
                    print(f"⚠️  Column '{col}' not found - skipping")
                continue

            # Get original unique values
            original_values = set(X[col].dropna().unique())
            mapped_values = set(ordinal_map.keys()) - {np.nan}

            # Check for unmapped values (excluding NaN)
            unmapped = original_values - mapped_values
            if unmapped and verbose:
                print(f"⚠️  Column '{col}' has unmapped values: {unmapped}")

            # Apply mapping
            X[col] = X[col].map(ordinal_map)

            if verbose:
                print(
                    f"✓ Mapped '{col}': {len(original_values)} unique → numeric")

    return X


X = map_ordinal(X)
X_val = map_ordinal(X_val)

In [None]:
X.isna().sum()[lambda x:x > 0].sort_values(ascending=False)

In [None]:
plt.figure(figsize=(50, 50))
sns.heatmap(X.corr(numeric_only=True), annot=True)

In [None]:
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')


def encode_OHE(X, fit=True):
    categorical_columns = X.select_dtypes("object").columns

    one_hot_encoded = encoder.fit_transform(
        X[categorical_columns]) if fit else encoder.transform(X[categorical_columns])

    one_hot_X = pd.DataFrame(
        one_hot_encoded, columns=encoder.get_feature_names_out(categorical_columns))

    X_encoded = pd.concat([X.reset_index(
        drop=True), one_hot_X.reset_index(drop=True)], axis=1)

    X_encoded = X_encoded.drop(categorical_columns, axis=1)
    return X_encoded


X_encoded = encode_OHE(X)
X_val_encoded = encode_OHE(X_val, fit=False)

In [None]:
xgbr = XGBRegressor(eval_metric="rmse", early_stopping_rounds=100)

# fit model no training data
eval_set = [(X_encoded, y), (X_val_encoded, y_val)]
xgbr.fit(X_encoded, y,
         eval_set=eval_set,
         verbose=False)
# make predictions for test data
y_pred = xgbr.predict(X_val_encoded)
# evaluate predictions
rmse = root_mean_squared_error(y_val, y_pred)
print(f"RMSE: {rmse}")


results = xgbr.evals_result()
epochs = len(results['validation_0']['rmse'])
x_axis = range(0, epochs)
# plot rmse
fig, ax = plt.subplots()
ax.plot(x_axis, results['validation_0']['rmse'], label='Train')
ax.plot(x_axis, results['validation_1']['rmse'], label='Test')
ax.legend()

plt.ylabel('RMSE')
plt.title('XGBoost RMSE')
plt.show()

Prepare df_test

In [None]:
id_test = df_test["Id"]
df_test = df_test.drop(DROPPED_COL, axis=1)
df_test = map_ordinal(df_test)
df_test_encoded = encode_OHE(df_test, fit=False)

In [None]:
df_submision = pd.DataFrame()
df_submision["Id"] = id_test
df_submision["SalePrice"] = xgbr.predict(df_test_encoded)

In [None]:
df_submision.to_csv("sub4_test.csv", index=False)

In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
import xgboost as xgb
import lightgbm as lgb
from scipy.stats import skew
from scipy.special import boxcox1p
import warnings
warnings.filterwarnings('ignore')

# Load data
train = pd.read_csv(
    r'D:\dev\python\ai_ds_3projects\house-prices\datasets\train_augmented_4x_rule_aware.csv')
test = pd.read_csv(
    r'D:\dev\python\ai_ds_3projects\house-prices\datasets\test.csv')

# Save test IDs
test_ID = test['Id']

# Remove outliers - keep only the most extreme
train = train.drop(train[(train['GrLivArea'] > 4000) &
                   (train['SalePrice'] < 300000)].index)

# Log transform target
y_train = np.log1p(train['SalePrice'].values)

# Combine train and test
all_data = pd.concat((train, test)).reset_index(drop=True)
all_data.drop(['SalePrice'], axis=1, inplace=True)

# Handle missing values FIRST


def handle_missing(df):
    # Fill numeric with 0
    for col in ['GarageYrBlt', 'GarageArea', 'GarageCars', 'BsmtFinSF1', 'BsmtFinSF2',
                'BsmtUnfSF', 'TotalBsmtSF', 'BsmtFullBath', 'BsmtHalfBath', 'MasVnrArea']:
        df[col].fillna(0, inplace=True)

    # LotFrontage by neighborhood median
    df['LotFrontage'] = df.groupby('Neighborhood')['LotFrontage'].transform(
        lambda x: x.fillna(x.median()))

    # Categorical with None
    for col in ['PoolQC', 'MiscFeature', 'Alley', 'Fence', 'FireplaceQu', 'GarageType',
                'GarageFinish', 'GarageQual', 'GarageCond', 'BsmtQual', 'BsmtCond',
                'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'MasVnrType', 'MSSubClass']:
        df[col].fillna('None', inplace=True)

    # Mode for these
    for col in ['Functional', 'Electrical', 'KitchenQual', 'Exterior1st', 'Exterior2nd',
                'SaleType', 'MSZoning', 'Utilities']:
        if col in df.columns:
            df[col].fillna(df[col].mode()[0] if len(
                df[col].mode()) > 0 else 'None', inplace=True)

    return df


all_data = handle_missing(all_data)

# Label encoding for ordinal features
ordinal_map = {
    'ExterQual': {'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4, 'Ex': 5},
    'ExterCond': {'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4, 'Ex': 5},
    'BsmtQual': {'None': 0, 'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4, 'Ex': 5},
    'BsmtCond': {'None': 0, 'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4, 'Ex': 5},
    'BsmtExposure': {'None': 0, 'No': 1, 'Mn': 2, 'Av': 3, 'Gd': 4},
    'BsmtFinType1': {'None': 0, 'Unf': 1, 'LwQ': 2, 'Rec': 3, 'BLQ': 4, 'ALQ': 5, 'GLQ': 6},
    'BsmtFinType2': {'None': 0, 'Unf': 1, 'LwQ': 2, 'Rec': 3, 'BLQ': 4, 'ALQ': 5, 'GLQ': 6},
    'HeatingQC': {'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4, 'Ex': 5},
    'KitchenQual': {'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4, 'Ex': 5},
    'Functional': {'Sal': 1, 'Sev': 2, 'Maj2': 3, 'Maj1': 4, 'Mod': 5, 'Min2': 6, 'Min1': 7, 'Typ': 8},
    'FireplaceQu': {'None': 0, 'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4, 'Ex': 5},
    'GarageQual': {'None': 0, 'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4, 'Ex': 5},
    'GarageCond': {'None': 0, 'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4, 'Ex': 5},
    'PoolQC': {'None': 0, 'Fa': 1, 'TA': 2, 'Gd': 3, 'Ex': 4},
    'Fence': {'None': 0, 'MnWw': 1, 'GdWo': 2, 'MnPrv': 3, 'GdPrv': 4}
}

for col, mapping in ordinal_map.items():
    all_data[col] = all_data[col].map(mapping)

# Feature Engineering


def add_features(df):
    # Total square footage
    df['TotalSF'] = df['TotalBsmtSF'] + df['1stFlrSF'] + df['2ndFlrSF']

    # Total bathrooms
    df['TotalBath'] = df['FullBath'] + 0.5*df['HalfBath'] + \
        df['BsmtFullBath'] + 0.5*df['BsmtHalfBath']

    # Total porch area
    df['TotalPorchSF'] = df['OpenPorchSF'] + df['3SsnPorch'] + \
        df['EnclosedPorch'] + df['ScreenPorch'] + df['WoodDeckSF']

    # Binary features
    df['HasPool'] = (df['PoolArea'] > 0).astype(int)
    df['Has2ndFloor'] = (df['2ndFlrSF'] > 0).astype(int)
    df['HasGarage'] = (df['GarageArea'] > 0).astype(int)
    df['HasBsmt'] = (df['TotalBsmtSF'] > 0).astype(int)
    df['HasFireplace'] = (df['Fireplaces'] > 0).astype(int)

    # Age features
    df['HouseAge'] = df['YrSold'] - df['YearBuilt']
    df['RemodAge'] = df['YrSold'] - df['YearRemodAdd']
    df['IsNew'] = (df['YearBuilt'] >= 2000).astype(int)
    df['IsRemodeled'] = (df['YearBuilt'] != df['YearRemodAdd']).astype(int)

    # Key quality interactions
    df['OverallQual_TotalSF'] = df['OverallQual'] * df['TotalSF']
    df['OverallQual_GrLivArea'] = df['OverallQual'] * df['GrLivArea']
    df['OverallQual_TotalBath'] = df['OverallQual'] * df['TotalBath']
    df['OverallQual_GarageCars'] = df['OverallQual'] * df['GarageCars']
    df['ExterQual_TotalSF'] = df['ExterQual'] * df['TotalSF']
    df['KitchenQual_TotalSF'] = df['KitchenQual'] * df['TotalSF']
    df['BsmtQual_TotalBsmtSF'] = df['BsmtQual'] * df['TotalBsmtSF']

    # Area ratios
    df['LivingArea_Ratio'] = df['GrLivArea'] / (df['TotalSF'] + 1)
    df['Bsmt_Ratio'] = df['TotalBsmtSF'] / (df['TotalSF'] + 1)
    df['GarageArea_Ratio'] = df['GarageArea'] / (df['TotalSF'] + 1)

    # Quality scores
    df['TotalQual'] = df['OverallQual'] + df['OverallCond']
    df['QualityScore'] = df['ExterQual'] + \
        df['KitchenQual'] + df['BsmtQual'] + df['GarageQual']

    # Polynomial features for key variables
    df['GrLivArea_Squared'] = df['GrLivArea'] ** 2
    df['TotalSF_Squared'] = df['TotalSF'] ** 2
    df['OverallQual_Squared'] = df['OverallQual'] ** 2
    df['OverallQual_Cubed'] = df['OverallQual'] ** 3

    # Total living area
    df['TotalLivingArea'] = df['GrLivArea'] + df['TotalBsmtSF']

    # Room size
    df['AvgRoomSize'] = df['GrLivArea'] / (df['TotRmsAbvGrd'] + 1)

    # Bathroom to room ratio
    df['Bath_Room_Ratio'] = df['TotalBath'] / (df['TotRmsAbvGrd'] + 1)

    # Garage interaction
    df['GarageCars_Area'] = df['GarageCars'] * df['GarageArea']

    # Year sold features
    df['SoldRecent'] = (df['YrSold'] >= 2008).astype(int)

    return df


# Apply feature engineering
all_data = add_features(all_data)

# Convert to categorical
all_data['MSSubClass'] = all_data['MSSubClass'].astype(str)
all_data['OverallCond'] = all_data['OverallCond'].astype(str)
all_data['YrSold'] = all_data['YrSold'].astype(str)
all_data['MoSold'] = all_data['MoSold'].astype(str)

# Get dummy variables
all_data = pd.get_dummies(all_data)
all_data.drop(['Id'], axis=1, inplace=True)

# Fix skewness with more aggressive threshold
numeric_feats = all_data.dtypes[all_data.dtypes != "object"].index
skewed_feats = all_data[numeric_feats].apply(
    lambda x: skew(x.dropna())).sort_values(ascending=False)
skewness = pd.DataFrame({'Skew': skewed_feats})
skewness = skewness[abs(skewness) > 0.75]

skewed_features = skewness.index
lam = 0.15
for feat in skewed_features:
    all_data[feat] = boxcox1p(all_data[feat], lam)

# Split back and ensure no NaN
X_train = all_data[:len(y_train)].fillna(0)
X_test = all_data[len(y_train):].fillna(0)

print(f"Features: {X_train.shape[1]}")
print(f"Training samples: {X_train.shape[0]}")

# Advanced Stacking Class


class StackingAveragedModels(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, base_models, meta_model, n_folds=10):
        self.base_models = base_models
        self.meta_model = meta_model
        self.n_folds = n_folds

    def fit(self, X, y):
        self.base_models_ = [list() for x in self.base_models]
        self.meta_model_ = clone(self.meta_model)
        kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=42)

        out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models)))
        for i, model in enumerate(self.base_models):
            for train_index, holdout_index in kfold.split(X, y):
                instance = clone(model)
                self.base_models_[i].append(instance)
                instance.fit(X.iloc[train_index], y[train_index])
                y_pred = instance.predict(X.iloc[holdout_index])
                out_of_fold_predictions[holdout_index, i] = y_pred

        self.meta_model_.fit(out_of_fold_predictions, y)
        return self

    def predict(self, X):
        meta_features = np.column_stack([
            np.column_stack([model.predict(X)
                            for model in base_models]).mean(axis=1)
            for base_models in self.base_models_])
        return self.meta_model_.predict(meta_features)


# Define models with even better hyperparameters
lasso = make_pipeline(RobustScaler(), Lasso(
    alpha=0.00045, random_state=1, max_iter=50000))
elasticnet = make_pipeline(RobustScaler(), ElasticNet(
    alpha=0.00045, l1_ratio=0.85, random_state=3, max_iter=50000))
ridge = make_pipeline(RobustScaler(), Ridge(alpha=12, random_state=1))
krr = KernelRidge(alpha=0.65, kernel='polynomial', degree=2, coef0=2.5)

# XGBoost with stronger regularization
xgb1 = xgb.XGBRegressor(
    colsample_bytree=0.4, gamma=0.045, learning_rate=0.04,
    max_depth=2, min_child_weight=1.5, n_estimators=3000,
    reg_alpha=0.6, reg_lambda=0.95, subsample=0.5,
    random_state=7, n_jobs=-1, verbosity=0
)

xgb2 = xgb.XGBRegressor(
    colsample_bytree=0.45, gamma=0.05, learning_rate=0.035,
    max_depth=3, min_child_weight=2, n_estimators=3500,
    reg_alpha=0.55, reg_lambda=1.0, subsample=0.52,
    random_state=42, n_jobs=-1, verbosity=0
)

# LightGBM with better parameters
lgb1 = lgb.LGBMRegressor(
    objective='regression', num_leaves=4, learning_rate=0.04,
    n_estimators=900, max_bin=55, bagging_fraction=0.75,
    bagging_freq=5, feature_fraction=0.22,
    min_data_in_leaf=5, min_sum_hessian_in_leaf=10,
    reg_alpha=0.6, reg_lambda=0.8,
    random_state=7, verbosity=-1
)

lgb2 = lgb.LGBMRegressor(
    objective='regression', num_leaves=5, learning_rate=0.035,
    n_estimators=1200, max_bin=200, bagging_fraction=0.7,
    bagging_freq=7, feature_fraction=0.23,
    min_data_in_leaf=6, min_sum_hessian_in_leaf=11,
    reg_alpha=0.65, reg_lambda=0.85,
    random_state=42, verbosity=-1
)

# GradientBoosting with deeper trees
gbr = GradientBoostingRegressor(
    n_estimators=4000, learning_rate=0.04, max_depth=3,
    max_features='sqrt', min_samples_leaf=12, min_samples_split=8,
    loss='huber', random_state=5, subsample=0.8
)

# SVR
svr_model = make_pipeline(RobustScaler(), SVR(
    C=25, epsilon=0.009, gamma=0.0004))

# Create multiple stacked ensembles
stacked_model1 = StackingAveragedModels(
    base_models=(elasticnet, krr, xgb1, lgb1),
    meta_model=ridge
)

stacked_model2 = StackingAveragedModels(
    base_models=(lasso, gbr, xgb2, lgb2),
    meta_model=elasticnet
)

# Train all models
print("\nTraining models...")
models_to_train = {
    'lasso': lasso,
    'elasticnet': elasticnet,
    'ridge': ridge,
    'krr': krr,
    'svr': svr_model,
    'xgb1': xgb1,
    'xgb2': xgb2,
    'lgb1': lgb1,
    'lgb2': lgb2,
    'gbr': gbr
}

trained = {}
for name, model in models_to_train.items():
    print(f"Training {name}...")
    model.fit(X_train, y_train)
    trained[name] = model

print("Training stacked models...")
stacked_model1.fit(X_train, y_train)
stacked_model2.fit(X_train, y_train)

# Make predictions
print("\nMaking predictions...")
preds = {name: model.predict(X_test) for name, model in trained.items()}
stacked_pred1 = stacked_model1.predict(X_test)
stacked_pred2 = stacked_model2.predict(X_test)

# Multi-level ensemble with optimized weights
final_pred = (
    0.30 * stacked_pred1 +
    0.25 * stacked_pred2 +
    0.12 * preds['xgb1'] +
    0.12 * preds['lgb1'] +
    0.08 * preds['xgb2'] +
    0.08 * preds['lgb2'] +
    0.05 * preds['ridge']
)

final_pred = np.expm1(final_pred)

# Create submission
submission = pd.DataFrame({'Id': test_ID, 'SalePrice': final_pred})
submission.to_csv('submission.csv', index=False)

print("\n" + "="*60)
print("Submission created!")
print(f"Predicted prices: ${final_pred.min():.2f} to ${final_pred.max():.2f}")
print(f"Mean: ${final_pred.mean():.2f}")
print("="*60)

Features: 4684
Training samples: 5834

Training models...
Training lasso...
Training elasticnet...
Training ridge...
Training krr...
Training svr...
Training xgb1...
Training xgb2...
Training lgb1...
Training lgb2...
Training gbr...
Training stacked models...

Making predictions...

Submission created!
Predicted prices: $42275.01 to $678536.77
Mean: $178671.93
