In [3]:
import pandas as pd
from catboost import CatBoostRegressor, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_log_error
import numpy as np

df = pd.read_csv('EDA/train_newfeature.csv')

target = 'Rings'
id_col = 'id'

# Define feature columns (exclude ID and target)
features = [col for col in df.columns if col not in [id_col, target]]

# Specify categorical features
cat_features = ['Sex']


X_train, X_valid, y_train, y_valid = train_test_split(
    df[features], df[target], test_size=0.2, random_state=42
)

train_pool = Pool(X_train, y_train, cat_features=cat_features)
valid_pool = Pool(X_valid, y_valid, cat_features=cat_features)

model = CatBoostRegressor(
    iterations=1000,
    learning_rate=0.05,
    depth=6,
    loss_function='RMSE',
    eval_metric='RMSE',
    random_seed=42,
    early_stopping_rounds=50,
    verbose=100
)

model.fit(train_pool, eval_set=valid_pool)

y_pred = model.predict(X_valid)
rmsle = np.sqrt(mean_squared_log_error(y_valid, y_pred))
print(f'Validation RMSLE: {rmsle:.4f}')

id_series = df.loc[X_valid.index, id_col]
id_valid = pd.DataFrame({
    id_col: id_series,
    'Predicted_Rings': y_pred,
    'Actual_Rings': y_valid.values
})

print(id_valid[[id_col, 'Predicted_Rings', 'Actual_Rings']].head())

0:	learn: 2.9868817	test: 2.9993436	best: 2.9993436 (0)	total: 13.2ms	remaining: 13.2s
100:	learn: 1.8491700	test: 1.8869201	best: 1.8869201 (100)	total: 880ms	remaining: 7.84s
200:	learn: 1.8273758	test: 1.8758804	best: 1.8758804 (200)	total: 1.73s	remaining: 6.87s
300:	learn: 1.8099467	test: 1.8704558	best: 1.8704558 (300)	total: 2.54s	remaining: 5.9s
400:	learn: 1.7925098	test: 1.8659244	best: 1.8659244 (400)	total: 3.35s	remaining: 5s
500:	learn: 1.7764142	test: 1.8624273	best: 1.8623805 (497)	total: 4.19s	remaining: 4.17s
600:	learn: 1.7621509	test: 1.8589707	best: 1.8589707 (600)	total: 5.05s	remaining: 3.36s
700:	learn: 1.7493883	test: 1.8570924	best: 1.8570876 (698)	total: 5.88s	remaining: 2.51s
800:	learn: 1.7366888	test: 1.8547470	best: 1.8547028 (799)	total: 6.72s	remaining: 1.67s
900:	learn: 1.7251750	test: 1.8538870	best: 1.8537164 (894)	total: 7.55s	remaining: 830ms
999:	learn: 1.7148417	test: 1.8528457	best: 1.8527150 (995)	total: 8.36s	remaining: 0us

bestTest = 1.85271