In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor,StackingRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler
from lightgbm import LGBMRegressor
from sklearn.model_selection import cross_val_score
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from pytorch_tabnet.tab_model import TabNetRegressor
from sklearn.neural_network import MLPRegressor
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('price_list_data.csv')
df.drop(columns='Unnamed: 0', inplace=True)

In [3]:
df['Gu_encoding'] = df['Gu'].astype('category').cat.codes + 1
df['Gu_encoding'] = df['Gu_encoding'].astype('int64')

Actual Price Index는 2024년 데이터에 전부 없으므로 생략합니다.

In [4]:
X = df[['Longitude',
       'Latitude', 'major', 'Building Year', 'Floor', 'Interest Rate', 'Jeonse Index', 'Total Population',
       'Children', 'Youth', 'Old Age',
       'Consumption', 'Distance to MC', 'Distance to NS', 'Elementary Schools Num',
       'High Schools Num', 'Market Num', 'Hospital Num', 'Park Presence',
       'Nearby Hangang', 'Bus Station', 'Gu_encoding']]
y = df['Price per Area']

테스트셋은 2024년 데이터에 존재하므로, Train셋과 Validation 셋으로 나눠줍니다.


In [6]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

스케일을 진행합니다.

In [36]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

사용할 모델입니다. 모델의 선정은 정형데이터의 회귀문제를 다룰만한 모델을 선정하였습니다.

In [17]:
models = {
    "KNN": KNeighborsRegressor(),
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(),
    "XGB": XGBRegressor(),
    "CatBoost": CatBoostRegressor(verbose=0), 
    "Gradient Boosting": GradientBoostingRegressor(),
    "Decision Tree": DecisionTreeRegressor(),
    "LightGBM": LGBMRegressor()
}

In [18]:
results = {}
for name, model in models.items():
    if name in ["KNN", "Linear Regression"]:
        model.fit(X_train_scaled, y_train)
        predictions = model.predict(X_val_scaled)
    else:
        model.fit(X_train, y_train)
        predictions = model.predict(X_val)
    
    r2 = r2_score(y_val, predictions)
    mae = mean_absolute_error(y_val, predictions)
    mse = mean_squared_error(y_val, predictions)
    rmse = np.sqrt(mse)
    
    results[name] = {"R2": r2, "MAE": mae, "RMSE": rmse}

for name, metrics in results.items():
    print(f"{name} - R²: {metrics['R2']:.4f}, MAE: {metrics['MAE']:.4f}, RMSE: {metrics['RMSE']:.4f}")

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003962 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2251
[LightGBM] [Info] Number of data points in the train set: 162930, number of used features: 22
[LightGBM] [Info] Start training from score 1261.632846
KNN - R²: 0.8765, MAE: 146.5663, RMSE: 217.2874
Linear Regression - R²: 0.4727, MAE: 335.8102, RMSE: 448.9306
Random Forest - R²: 0.9278, MAE: 105.0667, RMSE: 166.1151
XGB - R²: 0.8962, MAE: 143.6255, RMSE: 199.1470
CatBoost - R²: 0.8971, MAE: 143.5425, RMSE: 198.3046
Gradient Boosting - R²: 0.7429, MAE: 227.5855, RMSE: 313.4634
Decision Tree - R²: 0.8818, MAE: 126.3858, RMSE: 212.5834
LightGBM - R²: 0.8548, MAE: 173.6649, RMSE: 235.6139


딥러닝도 사용합니다.

In [23]:
regressor = TabNetRegressor()

X_train_np = X_train.values
X_val_np = X_val.values
y_train_np = y_train.values.reshape(-1, 1)
y_val_np = y_val.values.reshape(-1, 1)

regressor.fit(
  X_train_np, y_train_np,
  eval_set=[(X_val_np, y_val_np)],
  eval_name=['test'],
  eval_metric=['rmse'], 
  max_epochs=200,
  patience=10,  
  batch_size=2048,  
  virtual_batch_size=1028  
)

# Predicting
predictions = regressor.predict(X_val)

r2 = r2_score(y_val, predictions)
n = X_val.shape[0]  # 테스트 샘플의 수
p = X_val.shape[1]  # 변수의 수
adjusted_r2 = 1 - (1-r2) * (n - 1) / (n - p - 1)
mse = mean_squared_error(y_val, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_val, predictions)

# 성능 지표 출력
print(f'Adjusted R^2: {adjusted_r2:.4f}')
print(f'MSE: {mse:.4f}')
print(f'RMSE: {rmse:.4f}')
print(f'MAE: {mae:.4f}')



epoch 0  | loss: 1883605.86234| test_rmse: 1280.60488|  0:00:04s
epoch 1  | loss: 1342121.63845| test_rmse: 1068.45842|  0:00:07s
epoch 2  | loss: 519807.15803| test_rmse: 671.70356|  0:00:11s
epoch 3  | loss: 167951.96519| test_rmse: 633.67254|  0:00:14s
epoch 4  | loss: 128750.22913| test_rmse: 671.85304|  0:00:18s
epoch 5  | loss: 119754.9288| test_rmse: 723.75041|  0:00:21s
epoch 6  | loss: 112977.5891| test_rmse: 656.51792|  0:00:25s
epoch 7  | loss: 108636.92751| test_rmse: 560.52215|  0:00:29s
epoch 8  | loss: 103528.26681| test_rmse: 412.02595|  0:00:32s
epoch 9  | loss: 99805.02927| test_rmse: 338.20714|  0:00:35s
epoch 10 | loss: 98294.71084| test_rmse: 339.89167|  0:00:39s
epoch 11 | loss: 97098.36748| test_rmse: 312.89027|  0:00:42s
epoch 12 | loss: 94615.71588| test_rmse: 303.74825|  0:00:46s
epoch 13 | loss: 92159.46974| test_rmse: 302.07259|  0:00:49s
epoch 14 | loss: 89920.78076| test_rmse: 302.63019|  0:00:53s
epoch 15 | loss: 88312.45827| test_rmse: 297.75301|  0:00:5



KeyError: 0

In [27]:
mlp = MLPRegressor(hidden_layer_sizes=(100,50,50), activation='relu', max_iter=200, 
                   alpha=0.01, solver='adam', verbose=10, 
                   random_state=42, tol=0.0000001)
mlp.fit(X_train, y_train)

predictions = mlp.predict(X_val)

r2 = r2_score(y_val, predictions)
n = X_val.shape[0]  # 테스트 샘플의 수
p = X_val.shape[1]  # 변수의 수
adjusted_r2 = 1 - (1-r2) * (n - 1) / (n - p - 1)
mse = mean_squared_error(y_val, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_val, predictions)

# 성능 지표 출력
print(f'Adjusted R^2: {adjusted_r2:.4f}')
print(f'MSE: {mse:.4f}')
print(f'RMSE: {rmse:.4f}')
print(f'MAE: {mae:.4f}')

Iteration 1, loss = 335871.23633885
Iteration 2, loss = 191510.97870511
Iteration 3, loss = 179238.76904116
Iteration 4, loss = 172438.47812721
Iteration 5, loss = 166541.99608373
Iteration 6, loss = 161495.13234442
Iteration 7, loss = 157627.12801344
Iteration 8, loss = 156005.27408859
Iteration 9, loss = 151114.59214871
Iteration 10, loss = 149346.73105267
Iteration 11, loss = 147204.96789657
Iteration 12, loss = 145430.17196603
Iteration 13, loss = 139845.07356732
Iteration 14, loss = 142581.29280327
Iteration 15, loss = 140217.15448124
Iteration 16, loss = 138715.57038100
Iteration 17, loss = 135459.04467169
Iteration 18, loss = 132932.93848618
Iteration 19, loss = 133805.46938341
Iteration 20, loss = 130993.65841433
Iteration 21, loss = 132249.62344525
Iteration 22, loss = 127922.55170426
Iteration 23, loss = 127759.18410649
Iteration 24, loss = 125665.01255011
Iteration 25, loss = 125175.77006408
Iteration 26, loss = 124986.86426296
Iteration 27, loss = 125657.26141173
Iteration 



상위 5개 모델을 선정합니다.

RF, catBoost, XGB, DT, KNN

optuna 라이브러리를 이용해 최적화 합니다.

In [28]:
def objective(trial):
    # 탐색할 하이퍼파라미터 설정
    param = {
        'iterations': trial.suggest_int('iterations', 50, 300),  
        'depth': trial.suggest_int('depth', 4, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'random_strength': trial.suggest_int('random_strength', 1, 10),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0, 1),
        'border_count': trial.suggest_int('border_count', 1, 255),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-2, 10, log=True),
        'loss_function': 'RMSE',
        'verbose': 1
    }

    # CatBoost 모델 생성 및 훈련
    model = CatBoostRegressor(**param)
    model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)

    # 예측 및 평가
    preds = model.predict(X_val)
    mae = mean_absolute_error(y_val, preds)

    return mae

# Optuna 최적화
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=150)

# 최적의 하이퍼파라미터 출력
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[I 2024-03-04 20:36:03,997] A new study created in memory with name: no-name-15de9501-7f05-4697-a6f0-6c7acc2715b8
[I 2024-03-04 20:36:07,499] Trial 0 finished with value: 135.77328574906727 and parameters: {'iterations': 218, 'depth': 9, 'learning_rate': 0.22756849363756257, 'random_strength': 7, 'bagging_temperature': 0.2859503065770391, 'border_count': 127, 'l2_leaf_reg': 1.2489236635767564}. Best is trial 0 with value: 135.77328574906727.
[I 2024-03-04 20:36:10,715] Trial 1 finished with value: 172.05006360303614 and parameters: {'iterations': 238, 'depth': 8, 'learning_rate': 0.05745528362270814, 'random_strength': 1, 'bagging_temperature': 0.526493420035919, 'border_count': 119, 'l2_leaf_reg': 0.0111369208312368}. Best is trial 0 with value: 135.77328574906727.
[I 2024-03-04 20:36:13,922] Trial 2 finished with value: 153.43291602986966 and parameters: {'iterations': 299, 'depth': 6, 'learning_rate': 0.21935062130629882, 'random_strength': 10, 'bagging_temperature': 0.6897788340094

Number of finished trials: 150
Best trial: {'iterations': 299, 'depth': 10, 'learning_rate': 0.295302636073099, 'random_strength': 7, 'bagging_temperature': 0.023086661064806616, 'border_count': 130, 'l2_leaf_reg': 0.042969943710471}


In [30]:
def objective(trial):
    param = {
        'booster': 'gbtree',
        'objective': 'reg:squarederror',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'subsample': trial.suggest_float('subsample', 0.5, 1),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 5),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-5, 10, log=True)
    }

    model = XGBRegressor(**param)
    model.fit(X_train, y_train)

    preds = model.predict(X_val)
    mae = mean_absolute_error(y_val, preds)

    return mae

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=150)

print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[I 2024-03-04 21:00:30,251] A new study created in memory with name: no-name-dacca43d-b9c0-4831-93ae-540bd72863eb
[I 2024-03-04 21:00:31,397] Trial 0 finished with value: 126.951464325448 and parameters: {'n_estimators': 215, 'max_depth': 7, 'learning_rate': 0.21330782765360226, 'min_child_weight': 9, 'gamma': 2.528189192497759, 'subsample': 0.712050567032092, 'colsample_bytree': 0.8724645442235903, 'reg_alpha': 0.8222614493856523, 'reg_lambda': 0.018126749879536982}. Best is trial 0 with value: 126.951464325448.
[I 2024-03-04 21:00:33,297] Trial 1 finished with value: 139.56751343965635 and parameters: {'n_estimators': 669, 'max_depth': 4, 'learning_rate': 0.24052347770173688, 'min_child_weight': 8, 'gamma': 4.578881814695973, 'subsample': 0.5688266065919444, 'colsample_bytree': 0.5639548747835157, 'reg_alpha': 0.44757825870693324, 'reg_lambda': 0.0002017876702936407}. Best is trial 0 with value: 126.951464325448.
[I 2024-03-04 21:00:34,783] Trial 2 finished with value: 119.2787371175

Number of finished trials: 150
Best trial: {'n_estimators': 895, 'max_depth': 10, 'learning_rate': 0.11341709334175387, 'min_child_weight': 2, 'gamma': 1.2044585629128735, 'subsample': 0.9588896509526419, 'colsample_bytree': 0.8822700351401253, 'reg_alpha': 1.6237099018352437, 'reg_lambda': 0.0025671428314621735}


RF와 KNN, DT는 수동으로 하는게 더 나아서 수동으로 하였습니다.

In [38]:
models = {
    "KNN": KNeighborsRegressor(n_neighbors=5, p=1, n_jobs=-1),
    "Random Forest": RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1),
    "XGB": XGBRegressor(n_estimators=895, max_depth=10, learning_rate=0.11341, min_child_weight=2, gamma=1.204458 ,subsample=0.9588896, colsample_bytree=0.8822700, reg_alpha=1.6237099, reg_lambda=0.0025671, random_state=42),
    "CatBoost": CatBoostRegressor(iterations=299, depth=10, learning_rate=0.2953026, random_strength=7, bagging_temperature=0.02308666, border_count=130, l2_leaf_reg=0.042969, random_state=42), 
    "Decision Tree": DecisionTreeRegressor(random_state=42)
}

results = {}
for name, model in models.items():
    if name == "KNN":
        model.fit(X_train_scaled, y_train)
        predictions = model.predict(X_val_scaled)
    else:
        model.fit(X_train, y_train)
        predictions = model.predict(X_val)
    
    r2 = r2_score(y_val, predictions)
    n = X_val.shape[0]  # 테스트 샘플의 수
    p = X_val.shape[1]  # 변수의 수
    adjusted_r2 = 1 - (1-r2) * (n - 1) / (n - p - 1)
    mae = mean_absolute_error(y_val, predictions)
    mse = mean_squared_error(y_val, predictions)
    rmse = np.sqrt(mse)
    
    results[name] = {"R2": adjusted_r2, "MAE": mae, "RMSE": rmse}

for name, metrics in results.items():
    print(f"{name} - Adjusted R²: {metrics['R2']:.4f}, MAE: {metrics['MAE']:.4f}, RMSE: {metrics['RMSE']:.4f}")

0:	learn: 520.2724458	total: 30.4ms	remaining: 9.05s
1:	learn: 449.2429216	total: 60.5ms	remaining: 8.98s
2:	learn: 401.6691581	total: 88.8ms	remaining: 8.77s
3:	learn: 367.6918408	total: 119ms	remaining: 8.77s
4:	learn: 340.1354400	total: 148ms	remaining: 8.68s
5:	learn: 323.6280168	total: 178ms	remaining: 8.67s
6:	learn: 312.3987644	total: 209ms	remaining: 8.71s
7:	learn: 303.9837311	total: 241ms	remaining: 8.75s
8:	learn: 298.4547357	total: 271ms	remaining: 8.72s
9:	learn: 293.0161216	total: 299ms	remaining: 8.65s
10:	learn: 287.7950950	total: 329ms	remaining: 8.62s
11:	learn: 283.3305044	total: 359ms	remaining: 8.58s
12:	learn: 280.1885896	total: 387ms	remaining: 8.51s
13:	learn: 276.6559447	total: 416ms	remaining: 8.47s
14:	learn: 273.1138096	total: 444ms	remaining: 8.41s
15:	learn: 270.7411957	total: 476ms	remaining: 8.42s
16:	learn: 267.2644718	total: 510ms	remaining: 8.46s
17:	learn: 263.8545043	total: 537ms	remaining: 8.38s
18:	learn: 261.7115590	total: 566ms	remaining: 8.34s


In [39]:
knn_pipeline = Pipeline([
    ('scaler', StandardScaler()),  # 데이터 스케일링
    ('knn', KNeighborsRegressor(n_neighbors=5))  # KNN 모델
])

스태킹 모델도 만들어봅니다. KNN은 스케일이 필요하여 따로 정의합니다.

In [41]:
base_models = [
    ('Random Forest', RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1)),
    ('CatBoost', CatBoostRegressor(iterations=299, depth=10, learning_rate=0.2953026, random_strength=7, bagging_temperature=0.02308666, border_count=130, l2_leaf_reg=0.042969, random_state=42)),
    ('KNN', knn_pipeline),
    ('Decision Tree', DecisionTreeRegressor(random_state=42))
]

meta_model = XGBRegressor(n_estimators=563, max_depth=3, learning_rate=0.0151, min_child_weight=1, subsample=0.8967, colsample_bytree=0.9831, reg_alpha=3.029,reg_lambda=0.9275, random_state=42)

stacking_model = StackingRegressor(estimators=base_models, final_estimator=meta_model)

stacking_model.fit(X_train, y_train)

y_pred = stacking_model.predict(X_val)
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_val, y_pred)
r2 = r2_score(y_val, y_pred)

print(f'Stacking Model: R²={r2:.4f}, MSE={mse:.4f}, RMSE={rmse:.4f}, MAE={mae:.4f}')

0:	learn: 520.2724458	total: 26.5ms	remaining: 7.89s
1:	learn: 449.2429216	total: 53.6ms	remaining: 7.95s
2:	learn: 401.6691581	total: 80ms	remaining: 7.89s
3:	learn: 367.6918408	total: 107ms	remaining: 7.85s
4:	learn: 340.1354400	total: 133ms	remaining: 7.81s
5:	learn: 323.6280168	total: 159ms	remaining: 7.76s
6:	learn: 312.3987644	total: 189ms	remaining: 7.88s
7:	learn: 303.9837311	total: 216ms	remaining: 7.86s
8:	learn: 298.4547357	total: 243ms	remaining: 7.82s
9:	learn: 293.0161216	total: 272ms	remaining: 7.85s
10:	learn: 287.7950950	total: 302ms	remaining: 7.9s
11:	learn: 283.3305044	total: 331ms	remaining: 7.92s
12:	learn: 280.1885896	total: 361ms	remaining: 7.94s
13:	learn: 276.6559447	total: 390ms	remaining: 7.93s
14:	learn: 273.1138096	total: 419ms	remaining: 7.93s
15:	learn: 270.7411957	total: 449ms	remaining: 7.93s
16:	learn: 267.2644718	total: 479ms	remaining: 7.94s
17:	learn: 263.8545043	total: 507ms	remaining: 7.92s
18:	learn: 261.7115590	total: 536ms	remaining: 7.89s
19:

In [42]:
import joblib

joblib.dump(stacking_model, 'price_model_stacking.pkl')

['price_model_stacking.pkl']

In [8]:
import joblib

rf = RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)

joblib.dump(rf, 'price_model_rf.pkl')

['price_model_rf.pkl']

In [44]:
cat = CatBoostRegressor(iterations=299, depth=10, learning_rate=0.2953026, random_strength=7, bagging_temperature=0.02308666, border_count=130, l2_leaf_reg=0.042969, random_state=42)
cat.fit(X_train, y_train)

joblib.dump(cat, 'price_model_cat.pkl')

0:	learn: 520.2724458	total: 29.3ms	remaining: 8.74s
1:	learn: 449.2429216	total: 57.7ms	remaining: 8.57s
2:	learn: 401.6691581	total: 85.5ms	remaining: 8.44s
3:	learn: 367.6918408	total: 113ms	remaining: 8.33s
4:	learn: 340.1354400	total: 142ms	remaining: 8.33s
5:	learn: 323.6280168	total: 172ms	remaining: 8.41s
6:	learn: 312.3987644	total: 204ms	remaining: 8.53s
7:	learn: 303.9837311	total: 238ms	remaining: 8.65s
8:	learn: 298.4547357	total: 270ms	remaining: 8.69s
9:	learn: 293.0161216	total: 302ms	remaining: 8.72s
10:	learn: 287.7950950	total: 331ms	remaining: 8.65s
11:	learn: 283.3305044	total: 360ms	remaining: 8.61s
12:	learn: 280.1885896	total: 389ms	remaining: 8.57s
13:	learn: 276.6559447	total: 419ms	remaining: 8.52s
14:	learn: 273.1138096	total: 447ms	remaining: 8.45s
15:	learn: 270.7411957	total: 477ms	remaining: 8.44s
16:	learn: 267.2644718	total: 507ms	remaining: 8.41s
17:	learn: 263.8545043	total: 536ms	remaining: 8.37s
18:	learn: 261.7115590	total: 565ms	remaining: 8.33s


['price_model_cat.pkl']

In [10]:
xgb = XGBRegressor(n_estimators=895, max_depth=10, learning_rate=0.11341, min_child_weight=2, gamma=1.204458 ,subsample=0.9588896, colsample_bytree=0.8822700, reg_alpha=1.6237099, reg_lambda=0.0025671, random_state=42)
xgb.fit(X_train, y_train)

joblib.dump(xgb, 'price_model_xgb.pkl')

['price_model_xgb.pkl']

In [51]:
base_models = [
    ('Random Forest', RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1)),
    ('KNN', knn_pipeline),
    ('XGB', XGBRegressor(n_estimators=895, max_depth=10, learning_rate=0.11341, min_child_weight=2, gamma=1.204458 ,subsample=0.9588896, colsample_bytree=0.8822700, reg_alpha=1.6237099, reg_lambda=0.0025671, random_state=42)),
    ('Decision Tree', DecisionTreeRegressor(random_state=42))
]

meta_model = CatBoostRegressor(iterations=299, depth=10, learning_rate=0.2953026, random_strength=7, bagging_temperature=0.02308666, border_count=130, l2_leaf_reg=0.042969, random_state=42)

stacking_model = StackingRegressor(estimators=base_models, final_estimator=meta_model)

stacking_model.fit(X_train, y_train)

y_pred = stacking_model.predict(X_val)
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_val, y_pred)
r2 = r2_score(y_val, y_pred)

print(f'Stacking Model: R²={r2:.4f}, MSE={mse:.4f}, RMSE={rmse:.4f}, MAE={mae:.4f}')

0:	learn: 466.4214851	total: 22.6ms	remaining: 6.74s
1:	learn: 359.1226473	total: 44.6ms	remaining: 6.62s
2:	learn: 287.5569892	total: 55.2ms	remaining: 5.45s
3:	learn: 242.6301411	total: 76.3ms	remaining: 5.63s
4:	learn: 214.6638343	total: 96.7ms	remaining: 5.69s
5:	learn: 197.7607627	total: 119ms	remaining: 5.81s
6:	learn: 188.7834852	total: 140ms	remaining: 5.85s
7:	learn: 183.9392069	total: 162ms	remaining: 5.88s
8:	learn: 180.7765070	total: 183ms	remaining: 5.9s
9:	learn: 179.0125118	total: 206ms	remaining: 5.95s
10:	learn: 178.2443829	total: 228ms	remaining: 5.96s
11:	learn: 177.6616660	total: 250ms	remaining: 5.97s
12:	learn: 177.1590732	total: 272ms	remaining: 5.97s
13:	learn: 176.8658432	total: 293ms	remaining: 5.96s
14:	learn: 176.5091123	total: 315ms	remaining: 5.96s
15:	learn: 176.2726276	total: 337ms	remaining: 5.96s
16:	learn: 176.0219423	total: 358ms	remaining: 5.95s
17:	learn: 175.6830852	total: 381ms	remaining: 5.94s
18:	learn: 175.4694000	total: 402ms	remaining: 5.93s

In [52]:
import joblib

joblib.dump(stacking_model, 'price_model_stacking_cat.pkl')

['price_model_stacking_cat.pkl']