In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import log_loss


In [2]:
dataset_df = pd.read_csv(r"C:\Users\ghibl\ICR\data\input\train.csv")
dataset_df.drop("Id",axis = 1, inplace = True)

In [3]:
le = LabelEncoder()
dataset_df["EJ"] = le.fit_transform(dataset_df["EJ"])

In [4]:
X_df = dataset_df.drop("Class",axis =1)
y_df = dataset_df["Class"].copy()
skf = StratifiedKFold(n_splits = 4, shuffle = True, random_state = 42)
scores = []

In [8]:
for tr_idx, va_idx in skf.split(X_df, y_df):
    X_train, X_valid = X_df.iloc[tr_idx], X_df.iloc[va_idx]
    y_train, y_valid = y_df.iloc[tr_idx], y_df.iloc[va_idx]

    dtrain = xgb.DMatrix(X_train, label = y_train)
    dvalid = xgb.DMatrix(X_valid, label = y_valid)

    params = {'objective': 'binary:logistic','silent': 1, 'random_state': 42}
    num_round = 500

    watchlist = [(dtrain, 'train'),(dvalid, 'eval')]
    model = xgb.train(params, dtrain, num_round, evals = watchlist, early_stopping_rounds = 20)

    y_pred = model.predict(dvalid)
    score = log_loss(y_valid, y_pred)
    scores.append(score)
    print(f'logloss: {score:.4f}')

score_CV = np.mean(scores)
print(score_CV)

Parameters: { "silent" } are not used.

[0]	train-logloss:0.47814	eval-logloss:0.53211
[1]	train-logloss:0.36030	eval-logloss:0.44204
[2]	train-logloss:0.27592	eval-logloss:0.39559
[3]	train-logloss:0.21535	eval-logloss:0.34956
[4]	train-logloss:0.16904	eval-logloss:0.31908
[5]	train-logloss:0.13712	eval-logloss:0.29670
[6]	train-logloss:0.11311	eval-logloss:0.27594
[7]	train-logloss:0.09359	eval-logloss:0.27098
[8]	train-logloss:0.07870	eval-logloss:0.25791
[9]	train-logloss:0.06733	eval-logloss:0.24985
[10]	train-logloss:0.05860	eval-logloss:0.24020
[11]	train-logloss:0.05157	eval-logloss:0.23039
[12]	train-logloss:0.04600	eval-logloss:0.22480
[13]	train-logloss:0.04142	eval-logloss:0.22488
[14]	train-logloss:0.03742	eval-logloss:0.21955
[15]	train-logloss:0.03419	eval-logloss:0.20881
[16]	train-logloss:0.03151	eval-logloss:0.20326
[17]	train-logloss:0.02885	eval-logloss:0.20064
[18]	train-logloss:0.02705	eval-logloss:0.19640
[19]	train-logloss:0.02531	eval-logloss:0.19080
[20]	train

[28]	train-logloss:0.01488	eval-logloss:0.30841
[29]	train-logloss:0.01439	eval-logloss:0.30897
[30]	train-logloss:0.01385	eval-logloss:0.31193
logloss: 0.3119
Parameters: { "silent" } are not used.

[0]	train-logloss:0.47855	eval-logloss:0.51794
[1]	train-logloss:0.35502	eval-logloss:0.41902
[2]	train-logloss:0.26778	eval-logloss:0.35898
[3]	train-logloss:0.20624	eval-logloss:0.32586
[4]	train-logloss:0.16400	eval-logloss:0.30129
[5]	train-logloss:0.13349	eval-logloss:0.27756
[6]	train-logloss:0.11023	eval-logloss:0.25446
[7]	train-logloss:0.09328	eval-logloss:0.24512
[8]	train-logloss:0.07918	eval-logloss:0.23258
[9]	train-logloss:0.06825	eval-logloss:0.22172
[10]	train-logloss:0.05986	eval-logloss:0.21227
[11]	train-logloss:0.05290	eval-logloss:0.20560
[12]	train-logloss:0.04687	eval-logloss:0.20407
[13]	train-logloss:0.04255	eval-logloss:0.20883
[14]	train-logloss:0.03796	eval-logloss:0.20478
[15]	train-logloss:0.03488	eval-logloss:0.20232
[16]	train-logloss:0.03234	eval-logloss:0.