In [1]:
import pandas as pd 
import numpy as np 

# Classifiers
from catboost import CatBoostClassifier

# Model selection
from sklearn.model_selection import StratifiedKFold, PredefinedSplit

# Metrics
from sklearn.metrics import f1_score, accuracy_score
from sklearn.metrics import make_scorer


In [2]:
tr_df = pd.read_csv('../../../data/feature/cba_train.csv')

val_df = pd.read_csv('../../../data/feature/cba_validation.csv').sample(frac=1)
cv_size = val_df.shape[0]//2
full_stack = [tr_df, val_df.iloc[:cv_size, :]]
full_df = pd.concat(full_stack)

ind_list = [-1,]*tr_df.shape[0]+[0,]*cv_size
prd = PredefinedSplit(ind_list)

ts_df = val_df.iloc[cv_size:, :]

In [3]:
model = CatBoostClassifier()

In [4]:
grid = {'learning_rate': [0.01, 0.05, 0.1],
        'depth': [4, 10, 12],
        'l2_leaf_reg': [ 3, 7, 12],
        }

In [None]:
grid_search_result = model.grid_search(grid, 
                                       X=full_df.iloc[:, :-1], 
                                       y=full_df.iloc[:,-1], 
                                       plot=True)

In [4]:
model.fit(tr_df.iloc[:, :-1], tr_df.iloc[:, -1], eval_set=(val_df.iloc[:cv_size, :-1],val_df.iloc[:cv_size, -1]), plot=False)

9.29s	remaining: 7.76s
545:	learn: 0.5126311	test: 1.0959518	best: 0.9631423 (7)	total: 9.31s	remaining: 7.74s
546:	learn: 0.5124791	test: 1.0958805	best: 0.9631423 (7)	total: 9.32s	remaining: 7.72s
547:	learn: 0.5122355	test: 1.0959277	best: 0.9631423 (7)	total: 9.34s	remaining: 7.7s
548:	learn: 0.5119895	test: 1.0959734	best: 0.9631423 (7)	total: 9.35s	remaining: 7.68s
549:	learn: 0.5118252	test: 1.0962676	best: 0.9631423 (7)	total: 9.37s	remaining: 7.66s
550:	learn: 0.5115666	test: 1.0963691	best: 0.9631423 (7)	total: 9.38s	remaining: 7.64s
551:	learn: 0.5114309	test: 1.0961397	best: 0.9631423 (7)	total: 9.4s	remaining: 7.63s
552:	learn: 0.5111675	test: 1.0959026	best: 0.9631423 (7)	total: 9.41s	remaining: 7.61s
553:	learn: 0.5109290	test: 1.0958255	best: 0.9631423 (7)	total: 9.43s	remaining: 7.59s
554:	learn: 0.5108508	test: 1.0959930	best: 0.9631423 (7)	total: 9.45s	remaining: 7.58s
555:	learn: 0.5106254	test: 1.0960654	best: 0.9631423 (7)	total: 9.47s	remaining: 7.56s
556:	learn:

<catboost.core.CatBoostClassifier at 0x1b9b124f608>

In [5]:
y_pred = model.predict(ts_df.iloc[:, :-1])
print(f1_score(ts_df.iloc[:, -1], y_pred,average='macro'))

0.2484793187347932
