In [1]:
from google.colab import files
uploaded = files.upload()

Saving catboost_valid_preds_2021.csv to catboost_valid_preds_2021.csv
Saving knn_valid_preds_2021.csv to knn_valid_preds_2021.csv
Saving lgbm_valid_preds_2021.csv to lgbm_valid_preds_2021.csv
Saving randomforest_valid_preds_2021.csv to randomforest_valid_preds_2021.csv
Saving train.csv to train.csv


In [2]:
import pandas as pd
import numpy as np

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

from xgboost import XGBRegressor

In [3]:
# 데이터 불러오기
train_df = pd.read_csv("train.csv")

# 날짜 컬럼 처리 및 파생 변수 생성
train_df['일시'] = pd.to_datetime(train_df['일시'].astype(str), format="%Y%m%d")
train_df['year'] = train_df['일시'].dt.year
train_df['month'] = train_df['일시'].dt.month
train_df['day'] = train_df['일시'].dt.day
train_df['weekday'] = train_df['일시'].dt.weekday  # 0: 월요일, 6: 일요일
train_df['is_weekend'] = (train_df['weekday'] >= 5).astype(int)

# 컬럼명 영문화
train_df = train_df.rename(columns={
    '광진구': 'Gwangjin',
    '동대문구': 'Dongdaemun',
    '성동구': 'Seongdong',
    '중랑구': 'Jungnang'
})

In [4]:
# 각 모델별 예측 결과 로드
catboost_preds = pd.read_csv('catboost_valid_preds_2021.csv')
lgbm_preds = pd.read_csv('lgbm_valid_preds_2021.csv')
randomforest_preds = pd.read_csv('randomforest_valid_preds_2021.csv')
knn_preds = pd.read_csv('knn_valid_preds_2021.csv')



# 일시 컬럼 제거
catboost_preds = catboost_preds.drop(columns=['catboost_일시'], errors='ignore')
lgbm_preds = lgbm_preds.drop(columns=['lgbm_일시'], errors='ignore')
randomforest_preds = randomforest_preds.drop(columns=['randomforest_일시'], errors='ignore')
knn_preds = knn_preds.drop(columns=['knn_일시'], errors='ignore')



# catboost 컬럼 매핑
rename_map_catboost = {
    '광진구': 'catboost_Gwangjin',
    '동대문구': 'catboost_Dongdaemun',
    '성동구': 'catboost_Seongdong',
    '중랑구': 'catboost_Jungnang'
}
catboost_preds = catboost_preds.rename(columns=rename_map_catboost)

# lgbm 컬럼 매핑
rename_map_lgbm = {
    '광진구': 'lgbm_Gwangjin',
    '동대문구': 'lgbm_Dongdaemun',
    '성동구': 'lgbm_Seongdong',
    '중랑구': 'lgbm_Jungnang'
}
lgbm_preds = lgbm_preds.rename(columns=rename_map_lgbm)

# randomforest 컬럼 매핑
rename_map_rf = {
    'Gwangjin': 'randomforest_Gwangjin',
    'Dongdaemun': 'randomforest_Dongdaemun',
    'Seongdong': 'randomforest_Seongdong',
    'Jungnang': 'randomforest_Jungnang'
}
randomforest_preds = randomforest_preds.rename(columns=rename_map_rf)

# knn 컬럼 매핑
rename_map_knn = {
    '광진구': 'knn_Gwangjin',
    '동대문구': 'knn_Dongdaemun',
    '성동구': 'knn_Seongdong',
    '중랑구': 'knn_Jungnang'
}
knn_preds = knn_preds.rename(columns=rename_map_knn)


In [5]:
# 두 예측 DataFrame 합치기
X_meta_all = pd.concat([catboost_preds, lgbm_preds, randomforest_preds, knn_preds], axis=1)


y_valid = train_df.loc[train_df['year'] == 2021, ['Gwangjin', 'Dongdaemun', 'Seongdong', 'Jungnang']]



# 모든 조합을 딕셔너리로 관리
X_meta_dict = {
    "all": X_meta_all
}


In [8]:
### XGBoost

from joblib import dump

results_xgb = {}
xgb_meta = {}

for combo_name, X_meta in X_meta_dict.items():
    print(f"=== {combo_name} ===")
    results_xgb[combo_name] = {}


    for gu in ['Gwangjin', 'Dongdaemun', 'Seongdong', 'Jungnang']:
        gu_cols = [col for col in X_meta.columns if col.endswith(f"_{gu}")]
        X_meta_gu = X_meta[gu_cols].reset_index(drop=True)
        y_gu = y_valid[gu].reset_index(drop=True)

        meta_model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42, verbosity=0)
        meta_model.fit(X_meta_gu, y_gu)

        pred_gu = meta_model.predict(X_meta_gu)
        rmse = np.sqrt(mean_squared_error(y_gu, pred_gu))
        mae = mean_absolute_error(y_gu, pred_gu)

        xgb_meta[gu] = meta_model


        print(f"{gu} MAE: {mae:.4f}, RMSE: {rmse:.4f}")
        results_xgb[combo_name][gu] = {
            'MAE': mae,
            'RMSE': rmse
        }


        dump(meta_model, f"xgb_meta_{gu}.joblib")



    print("")


=== all ===
Gwangjin MAE: 0.6110, RMSE: 0.8376
Dongdaemun MAE: 0.4975, RMSE: 0.6778
Seongdong MAE: 0.4954, RMSE: 0.6907
Jungnang MAE: 0.3554, RMSE: 0.4754



In [15]:
from joblib import dump

save_dir = '/content/drive/MyDrive/stacking_models/'

for gu, model in xgb_meta.items():
    dump(model, save_dir + f"xgb_meta_{gu}.joblib")


In [16]:
!mkdir -p /content/drive/MyDrive/stacking_models


In [17]:
import os
print(os.listdir('/content/drive/MyDrive/stacking_models'))


['xgb_meta_Gwangjin.joblib', 'xgb_meta_Dongdaemun.joblib', 'xgb_meta_Seongdong.joblib', 'xgb_meta_Jungnang.joblib']
