In [1]:
from google.colab import files
uploaded = files.upload()

Saving catboost_2022.csv to catboost_2022.csv
Saving knn_2022.csv to knn_2022.csv
Saving lgbm_2022.csv to lgbm_2022.csv
Saving randomforest_2022.csv to randomforest_2022.csv


In [3]:
import pandas as pd
import numpy as np

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

from xgboost import XGBRegressor

In [4]:
# 각 모델별 예측 결과 로드
catboost_preds = pd.read_csv('catboost_2022.csv')
lgbm_preds = pd.read_csv('lgbm_2022.csv')
randomforest_preds = pd.read_csv('randomforest_2022.csv')
knn_preds = pd.read_csv('knn_2022.csv')



# 일시 컬럼 제거
catboost_preds = catboost_preds.drop(columns=['catboost_일시'], errors='ignore')
lgbm_preds = lgbm_preds.drop(columns=['lgbm_일시'], errors='ignore')
randomforest_preds = randomforest_preds.drop(columns=['randomforest_일시'], errors='ignore')
knn_preds = knn_preds.drop(columns=['knn_일시'], errors='ignore')



# catboost 컬럼 매핑
rename_map_catboost = {
    '광진구': 'catboost_Gwangjin',
    '동대문구': 'catboost_Dongdaemun',
    '성동구': 'catboost_Seongdong',
    '중랑구': 'catboost_Jungnang'
}
catboost_preds = catboost_preds.rename(columns=rename_map_catboost)

# lgbm 컬럼 매핑
rename_map_lgbm = {
    '광진구': 'lgbm_Gwangjin',
    '동대문구': 'lgbm_Dongdaemun',
    '성동구': 'lgbm_Seongdong',
    '중랑구': 'lgbm_Jungnang'
}
lgbm_preds = lgbm_preds.rename(columns=rename_map_lgbm)

# randomforest 컬럼 매핑
rename_map_rf = {
    'Gwangjin': 'randomforest_Gwangjin',
    'Dongdaemun': 'randomforest_Dongdaemun',
    'Seongdong': 'randomforest_Seongdong',
    'Jungnang': 'randomforest_Jungnang'
}
randomforest_preds = randomforest_preds.rename(columns=rename_map_rf)

# knn 컬럼 매핑
rename_map_knn = {
    '광진구': 'knn_Gwangjin',
    '동대문구': 'knn_Dongdaemun',
    '성동구': 'knn_Seongdong',
    '중랑구': 'knn_Jungnang'
}
knn_preds = knn_preds.rename(columns=rename_map_knn)


In [5]:
# 두 예측 DataFrame 합치기
X_meta_all = pd.concat([catboost_preds, lgbm_preds, randomforest_preds, knn_preds], axis=1)


# 모든 조합을 딕셔너리로 관리
X_meta_dict = {
    "all": X_meta_all
}


In [31]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os
print(os.listdir('/content/drive/MyDrive/stacking_models'))

from joblib import load

load_dir = '/content/drive/MyDrive/stacking_models/'

xgb_meta_loaded = {}
for gu in ['Gwangjin', 'Dongdaemun', 'Seongdong', 'Jungnang']:
    xgb_meta_loaded[gu] = load(load_dir + f"xgb_meta_{gu}.joblib")


print(xgb_meta_loaded.keys())

Mounted at /content/drive
['xgb_meta_Seongdong.joblib', 'xgb_meta_Jungnang.joblib', 'xgb_meta_Gwangjin.joblib', 'xgb_meta_Dongdaemun.joblib']
dict_keys(['Gwangjin', 'Dongdaemun', 'Seongdong', 'Jungnang'])


In [32]:
### XGBoost

results_xgb = {}




for combo_name, X_meta in X_meta_dict.items():
    print(f"=== {combo_name} ===")
    results_xgb[combo_name] = {}


    for gu in ['Gwangjin', 'Dongdaemun', 'Seongdong', 'Jungnang']:
        gu_cols = [col for col in X_meta.columns if col.endswith(f"_{gu}")]
        X_meta_gu = X_meta[gu_cols].reset_index(drop=True)


        pred_gu = xgb_meta_loaded[gu].predict(X_meta_gu)


        results_xgb[combo_name][gu] = {
            'predictions': pred_gu
        }

        print(f"{gu} 예측값 샘플: {pred_gu[:5]}")  # 예측값 일부 출력

    print("")




=== all ===
Gwangjin 예측값 샘플: [3.7363133 3.9426272 3.3618972 2.2463562 2.2627232]
Dongdaemun 예측값 샘플: [2.7128289 2.4369464 2.9592023 2.5290236 3.2210143]
Seongdong 예측값 샘플: [2.3181784 2.5540264 2.7322361 2.2461927 2.688431 ]
Jungnang 예측값 샘플: [2.7311277 2.4777446 2.3430114 2.7158337 2.1854036]



In [33]:
from google.colab import files
uploaded = files.upload()

Saving sample_submission.csv to sample_submission.csv


In [35]:
# sample_submission.csv 로드 (334개 행 - 1월 1일~11월 30일)
submission = pd.read_csv('sample_submission.csv')

n = len(submission)  # 334

# 예측값 365개 중 처음 334개만 사용
submission['광진구'] = results_xgb['all']['Gwangjin']['predictions'][:n]
submission['동대문구'] = results_xgb['all']['Dongdaemun']['predictions'][:n]
submission['성동구'] = results_xgb['all']['Seongdong']['predictions'][:n]
submission['중랑구'] = results_xgb['all']['Jungnang']['predictions'][:n]

submission.to_csv('submission.csv', index=False)
print("submission.csv 파일이 생성되었습니다 (11월 30일까지)")


submission.csv 파일이 생성되었습니다 (11월 30일까지)
