In [90]:
import pandas as pd
import random
import os
import numpy as np

from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42)

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

train_x = train.drop(['ID', 'Calories_Burned'], axis = 1)
train_y = train['Calories_Burned']
test_x = test.drop('ID', axis = 1)

ordinal_features = ['Weight_Status', 'Gender']

for feature in ordinal_features:
    le = LabelEncoder()
    le = le.fit(train_x[feature])
    train_x[feature] = le.transform(train_x[feature])

    for label in np.unique(test_x[feature]):
        if label not in le.classes_:
            le.classes_ = np.append(le.classes_, label)
    test_x[feature] = le.transform(test_x[feature])

# CatBoost 모델 정의
cat_params = {
    "learning_rate": 0.20909079092170735,
    "depth": 6,
    "od_pval": 0.236844398775451,
    "model_size_reg": 0.30614059763442997,
    "l2_leaf_reg": 5.535171839105427,
    "loss_function": "RMSE",
    "random_seed": 42,
    "verbose": 1
}

model = CatBoostRegressor(**cat_params)

# 훈련 데이터를 학습 데이터와 검증 데이터로 분할
X_train, X_val, y_train, y_val = train_test_split(train_x, train_y, test_size=0.4, random_state=91)

# 모델 학습
model.fit(X_train, y_train, eval_set=(X_val, y_val), use_best_model=True)

# 검증 데이터 예측 및 RMSE 계산
y_val_pred = model.predict(X_val)
val_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
print(f'Validation RMSE: {val_rmse:.4f}')

# 테스트 데이터 예측
test_y_pred = model.predict(test_x)

# 제출 양식에 맞게 결과를 저장합니다.
submission = pd.DataFrame({'ID': test['ID'], 'Calories_Burned': test_y_pred})
submission.to_csv('n_submission.csv', index=False)

0:	learn: 51.6750460	test: 51.9785879	best: 51.9785879 (0)	total: 4.3ms	remaining: 4.29s
1:	learn: 42.6779354	test: 43.0941645	best: 43.0941645 (1)	total: 7.15ms	remaining: 3.57s
2:	learn: 35.3428100	test: 35.8976790	best: 35.8976790 (2)	total: 27.8ms	remaining: 9.25s
3:	learn: 29.5308986	test: 30.1244561	best: 30.1244561 (3)	total: 33.7ms	remaining: 8.4s
4:	learn: 25.0071207	test: 25.7294390	best: 25.7294390 (4)	total: 39.3ms	remaining: 7.82s
5:	learn: 20.9770242	test: 21.6280166	best: 21.6280166 (5)	total: 44.4ms	remaining: 7.35s
6:	learn: 17.7432972	test: 18.4297892	best: 18.4297892 (6)	total: 63.1ms	remaining: 8.95s
7:	learn: 15.2122186	test: 15.8605080	best: 15.8605080 (7)	total: 70.5ms	remaining: 8.75s
8:	learn: 13.3240434	test: 13.9411673	best: 13.9411673 (8)	total: 74.6ms	remaining: 8.21s
9:	learn: 11.5463319	test: 12.0995764	best: 12.0995764 (9)	total: 87.6ms	remaining: 8.67s
10:	learn: 10.2463856	test: 10.7424167	best: 10.7424167 (10)	total: 111ms	remaining: 9.94s
11:	learn: 

96:	learn: 2.0040989	test: 2.4673366	best: 2.4673366 (96)	total: 1.18s	remaining: 11s
97:	learn: 1.9705676	test: 2.4303677	best: 2.4303677 (97)	total: 1.18s	remaining: 10.9s
98:	learn: 1.9545392	test: 2.4190752	best: 2.4190752 (98)	total: 1.2s	remaining: 10.9s
99:	learn: 1.9452494	test: 2.4085497	best: 2.4085497 (99)	total: 1.2s	remaining: 10.8s
100:	learn: 1.9366120	test: 2.4021349	best: 2.4021349 (100)	total: 1.22s	remaining: 10.8s
101:	learn: 1.9249184	test: 2.3919186	best: 2.3919186 (101)	total: 1.22s	remaining: 10.8s
102:	learn: 1.9099643	test: 2.3778314	best: 2.3778314 (102)	total: 1.26s	remaining: 11s
103:	learn: 1.8923755	test: 2.3636957	best: 2.3636957 (103)	total: 1.26s	remaining: 10.9s
104:	learn: 1.8861716	test: 2.3570576	best: 2.3570576 (104)	total: 1.28s	remaining: 10.9s
105:	learn: 1.8727263	test: 2.3441480	best: 2.3441480 (105)	total: 1.29s	remaining: 10.9s
106:	learn: 1.8633624	test: 2.3368688	best: 2.3368688 (106)	total: 1.3s	remaining: 10.8s
107:	learn: 1.8506221	tes

194:	learn: 1.2342438	test: 1.7717174	best: 1.7717174 (194)	total: 2.39s	remaining: 9.86s
195:	learn: 1.2341436	test: 1.7713344	best: 1.7713344 (195)	total: 2.4s	remaining: 9.86s
196:	learn: 1.2287736	test: 1.7672875	best: 1.7672875 (196)	total: 2.41s	remaining: 9.82s
197:	learn: 1.2265603	test: 1.7654842	best: 1.7654842 (197)	total: 2.41s	remaining: 9.77s
198:	learn: 1.2235465	test: 1.7636460	best: 1.7636460 (198)	total: 2.41s	remaining: 9.72s
199:	learn: 1.2198998	test: 1.7603498	best: 1.7603498 (199)	total: 2.42s	remaining: 9.67s
200:	learn: 1.2115175	test: 1.7530075	best: 1.7530075 (200)	total: 2.42s	remaining: 9.62s
201:	learn: 1.2044351	test: 1.7466911	best: 1.7466911 (201)	total: 2.42s	remaining: 9.57s
202:	learn: 1.2020462	test: 1.7443417	best: 1.7443417 (202)	total: 2.45s	remaining: 9.63s
203:	learn: 1.1939191	test: 1.7373715	best: 1.7373715 (203)	total: 2.46s	remaining: 9.6s
204:	learn: 1.1879639	test: 1.7324016	best: 1.7324016 (204)	total: 2.47s	remaining: 9.59s
205:	learn: 

303:	learn: 0.8943860	test: 1.5016873	best: 1.5016873 (303)	total: 3.19s	remaining: 7.3s
304:	learn: 0.8929276	test: 1.5007202	best: 1.5007202 (304)	total: 3.19s	remaining: 7.28s
305:	learn: 0.8928981	test: 1.5002979	best: 1.5002979 (305)	total: 3.19s	remaining: 7.25s
306:	learn: 0.8891088	test: 1.4970389	best: 1.4970389 (306)	total: 3.2s	remaining: 7.22s
307:	learn: 0.8876697	test: 1.4956155	best: 1.4956155 (307)	total: 3.2s	remaining: 7.19s
308:	learn: 0.8860675	test: 1.4947612	best: 1.4947612 (308)	total: 3.2s	remaining: 7.17s
309:	learn: 0.8841438	test: 1.4938341	best: 1.4938341 (309)	total: 3.21s	remaining: 7.14s
310:	learn: 0.8814169	test: 1.4912939	best: 1.4912939 (310)	total: 3.21s	remaining: 7.11s
311:	learn: 0.8783456	test: 1.4889399	best: 1.4889399 (311)	total: 3.21s	remaining: 7.08s
312:	learn: 0.8768943	test: 1.4881574	best: 1.4881574 (312)	total: 3.21s	remaining: 7.05s
313:	learn: 0.8750450	test: 1.4860698	best: 1.4860698 (313)	total: 3.21s	remaining: 7.02s
314:	learn: 0.

406:	learn: 0.7277700	test: 1.3882476	best: 1.3882476 (406)	total: 3.61s	remaining: 5.26s
407:	learn: 0.7256650	test: 1.3875883	best: 1.3875883 (407)	total: 3.62s	remaining: 5.25s
408:	learn: 0.7245273	test: 1.3866349	best: 1.3866349 (408)	total: 3.63s	remaining: 5.24s
409:	learn: 0.7230193	test: 1.3858713	best: 1.3858713 (409)	total: 3.63s	remaining: 5.22s
410:	learn: 0.7217548	test: 1.3854955	best: 1.3854955 (410)	total: 3.64s	remaining: 5.22s
411:	learn: 0.7186350	test: 1.3828927	best: 1.3828927 (411)	total: 3.65s	remaining: 5.21s
412:	learn: 0.7173560	test: 1.3820734	best: 1.3820734 (412)	total: 3.65s	remaining: 5.19s
413:	learn: 0.7164136	test: 1.3814989	best: 1.3814989 (413)	total: 3.68s	remaining: 5.2s
414:	learn: 0.7162968	test: 1.3810007	best: 1.3810007 (414)	total: 3.68s	remaining: 5.18s
415:	learn: 0.7148251	test: 1.3800781	best: 1.3800781 (415)	total: 3.69s	remaining: 5.17s
416:	learn: 0.7131575	test: 1.3793501	best: 1.3793501 (416)	total: 3.69s	remaining: 5.16s
417:	learn:

513:	learn: 0.6011308	test: 1.3099667	best: 1.3099667 (513)	total: 4.84s	remaining: 4.58s
514:	learn: 0.5997224	test: 1.3098919	best: 1.3098919 (514)	total: 4.84s	remaining: 4.56s
515:	learn: 0.5980978	test: 1.3088808	best: 1.3088808 (515)	total: 4.86s	remaining: 4.55s
516:	learn: 0.5969339	test: 1.3080148	best: 1.3080148 (516)	total: 4.86s	remaining: 4.54s
517:	learn: 0.5953271	test: 1.3068292	best: 1.3068292 (517)	total: 4.86s	remaining: 4.52s
518:	learn: 0.5940677	test: 1.3056844	best: 1.3056844 (518)	total: 4.88s	remaining: 4.52s
519:	learn: 0.5930631	test: 1.3056873	best: 1.3056844 (518)	total: 4.88s	remaining: 4.51s
520:	learn: 0.5923526	test: 1.3052571	best: 1.3052571 (520)	total: 4.89s	remaining: 4.5s
521:	learn: 0.5909024	test: 1.3041892	best: 1.3041892 (521)	total: 4.93s	remaining: 4.51s
522:	learn: 0.5897733	test: 1.3041426	best: 1.3041426 (522)	total: 4.94s	remaining: 4.5s
523:	learn: 0.5891474	test: 1.3036702	best: 1.3036702 (523)	total: 4.94s	remaining: 4.49s
524:	learn: 

606:	learn: 0.5241387	test: 1.2698799	best: 1.2698799 (606)	total: 5.84s	remaining: 3.78s
607:	learn: 0.5235141	test: 1.2695601	best: 1.2695601 (607)	total: 5.84s	remaining: 3.77s
608:	learn: 0.5231612	test: 1.2692065	best: 1.2692065 (608)	total: 5.85s	remaining: 3.76s
609:	learn: 0.5224351	test: 1.2688641	best: 1.2688641 (609)	total: 5.87s	remaining: 3.75s
610:	learn: 0.5216120	test: 1.2678398	best: 1.2678398 (610)	total: 5.88s	remaining: 3.74s
611:	learn: 0.5209001	test: 1.2675742	best: 1.2675742 (611)	total: 5.88s	remaining: 3.73s
612:	learn: 0.5202571	test: 1.2668494	best: 1.2668494 (612)	total: 5.88s	remaining: 3.71s
613:	learn: 0.5195117	test: 1.2663752	best: 1.2663752 (613)	total: 5.89s	remaining: 3.71s
614:	learn: 0.5182040	test: 1.2659689	best: 1.2659689 (614)	total: 5.9s	remaining: 3.69s
615:	learn: 0.5177947	test: 1.2655700	best: 1.2655700 (615)	total: 5.9s	remaining: 3.68s
616:	learn: 0.5171798	test: 1.2653025	best: 1.2653025 (616)	total: 5.91s	remaining: 3.67s
617:	learn: 

747:	learn: 0.4425428	test: 1.2336895	best: 1.2336560 (745)	total: 6.6s	remaining: 2.22s
748:	learn: 0.4416645	test: 1.2334154	best: 1.2334154 (748)	total: 6.61s	remaining: 2.22s
749:	learn: 0.4409542	test: 1.2328457	best: 1.2328457 (749)	total: 6.63s	remaining: 2.21s
750:	learn: 0.4403279	test: 1.2327420	best: 1.2327420 (750)	total: 6.64s	remaining: 2.2s
751:	learn: 0.4401753	test: 1.2327236	best: 1.2327236 (751)	total: 6.64s	remaining: 2.19s
752:	learn: 0.4396716	test: 1.2323715	best: 1.2323715 (752)	total: 6.67s	remaining: 2.19s
753:	learn: 0.4390159	test: 1.2321893	best: 1.2321893 (753)	total: 6.69s	remaining: 2.18s
754:	learn: 0.4386044	test: 1.2321870	best: 1.2321870 (754)	total: 6.72s	remaining: 2.18s
755:	learn: 0.4381189	test: 1.2321695	best: 1.2321695 (755)	total: 6.73s	remaining: 2.17s
756:	learn: 0.4376511	test: 1.2319303	best: 1.2319303 (756)	total: 6.74s	remaining: 2.16s
757:	learn: 0.4372456	test: 1.2321410	best: 1.2319303 (756)	total: 6.76s	remaining: 2.16s
758:	learn: 

839:	learn: 0.4016408	test: 1.2172625	best: 1.2171640 (837)	total: 7.76s	remaining: 1.48s
840:	learn: 0.4011264	test: 1.2169154	best: 1.2169154 (840)	total: 7.76s	remaining: 1.47s
841:	learn: 0.4009770	test: 1.2167496	best: 1.2167496 (841)	total: 7.77s	remaining: 1.46s
842:	learn: 0.4005090	test: 1.2165384	best: 1.2165384 (842)	total: 7.77s	remaining: 1.45s
843:	learn: 0.4001817	test: 1.2164355	best: 1.2164355 (843)	total: 7.77s	remaining: 1.44s
844:	learn: 0.3997640	test: 1.2159297	best: 1.2159297 (844)	total: 7.79s	remaining: 1.43s
845:	learn: 0.3995207	test: 1.2158964	best: 1.2158964 (845)	total: 7.79s	remaining: 1.42s
846:	learn: 0.3990528	test: 1.2157629	best: 1.2157629 (846)	total: 7.8s	remaining: 1.41s
847:	learn: 0.3986854	test: 1.2154225	best: 1.2154225 (847)	total: 7.8s	remaining: 1.4s
848:	learn: 0.3983547	test: 1.2152981	best: 1.2152981 (848)	total: 7.82s	remaining: 1.39s
849:	learn: 0.3979302	test: 1.2152129	best: 1.2152129 (849)	total: 7.82s	remaining: 1.38s
850:	learn: 0

935:	learn: 0.3672257	test: 1.2020167	best: 1.2020167 (935)	total: 8.73s	remaining: 597ms
936:	learn: 0.3668943	test: 1.2016785	best: 1.2016785 (936)	total: 8.75s	remaining: 588ms
937:	learn: 0.3666335	test: 1.2015868	best: 1.2015868 (937)	total: 8.76s	remaining: 579ms
938:	learn: 0.3661810	test: 1.2014964	best: 1.2014964 (938)	total: 8.76s	remaining: 569ms
939:	learn: 0.3658704	test: 1.2014725	best: 1.2014725 (939)	total: 8.77s	remaining: 560ms
940:	learn: 0.3653001	test: 1.2011583	best: 1.2011583 (940)	total: 8.78s	remaining: 550ms
941:	learn: 0.3647530	test: 1.2009418	best: 1.2009418 (941)	total: 8.79s	remaining: 541ms
942:	learn: 0.3644913	test: 1.2008936	best: 1.2008936 (942)	total: 8.8s	remaining: 532ms
943:	learn: 0.3643963	test: 1.2006984	best: 1.2006984 (943)	total: 8.84s	remaining: 525ms
944:	learn: 0.3640105	test: 1.2007639	best: 1.2006984 (943)	total: 8.86s	remaining: 516ms
945:	learn: 0.3637361	test: 1.2007099	best: 1.2006984 (943)	total: 8.87s	remaining: 506ms
946:	learn: