In [5]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
import joblib

In [8]:
data = pd.read_csv('appl_score_sample.csv', delimiter=';')

x = data.drop(columns=['CLIENT_ID', 'Target'])
y = data['Target']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

cat_features = ['SEX', 'CHILD_FLAG', 'REALTY_FLAG', 'ACCOUNTS_FLAG', 'E_MAIL_FLAG', 'DCI', 'FOREIGN_PASSPORT_FLAG', 'EDUCATION']


model = XGBClassifier(
        iterations=100, 
        depth=6, 
        learning_rate=0.1, 
        loss_function='Logloss', 
        cat_features=cat_features)
    
model.fit(x_train, y_train)

model.save_model('catboost_model.cbm')
model.predict(x_test)

Parameters: { "cat_features", "depth", "iterations", "loss_function" } are not used.



array([1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1,
       1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1,
       1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
       0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1,
       1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,

In [14]:


def load_and_preprocess_data(file_path):

    data = pd.read_csv(file_path, delimiter=';')

    x = data.drop(columns=['CLIENT_ID', 'Target'])
    y = data['Target']

    cat_features = ['SEX', 'CHILD_FLAG', 'REALTY_FLAG', 'ACCOUNTS_FLAG', 'E_MAIL_FLAG', 'DCI', 'FOREIGN_PASSPORT_FLAG', 'EDUCATION']
    return x, y, cat_features

def objective(params):
    clf = XGBClassifier(**params, cat_features=cat_features, verbose=0)
    clf.fit(x_train, y_train)
    loss = -clf.score(x_test, y_test)
    return {'loss': loss, 'status': STATUS_OK}

if __name__ == "__main__":
    c, y, cat_features = load_and_preprocess_data('appl_score_sample.csv')
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

    param_space = {
        'iterations': hp.choice('iterations', range(50, 500)),
        'depth': hp.choice('depth', range(3, 10)),
        'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
        'l2_leaf_reg': hp.uniform('l2_leaf_reg', 1, 10),
        'border_count': hp.choice('border_count', range(32, 255)),
    }


    trials = Trials()
    best_params = fmin(
        fn=objective,
        space=param_space,
        algo=tpe.suggest,
        max_evals=100,
        trials=trials
    )

    print(f"Best parameters: {best_params}")

    best_params['iterations'] = range(50, 500)[best_params['iterations']]
    best_params['depth'] = range(3, 10)[best_params['depth']]
    best_params['border_count'] = range(32, 255)[best_params['border_count']]

    final_model = XGBClassifier(**best_params, cat_features=cat_features)
    final_model.fit(x_train, y_train)

    final_model.save_model('catboost_model_v1.cbm')


  1%|          | 1/100 [00:00<00:14,  6.69trial/s, best loss: -0.605080831408776]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




  4%|▍         | 4/100 [00:00<00:12,  7.57trial/s, best loss: -0.628175519630485]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




  5%|▌         | 5/100 [00:00<00:22,  4.24trial/s, best loss: -0.628175519630485]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




  8%|▊         | 8/100 [00:01<00:13,  6.85trial/s, best loss: -0.628175519630485]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 10%|█         | 10/100 [00:01<00:11,  7.83trial/s, best loss: -0.628175519630485]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 12%|█▏        | 12/100 [00:01<00:09,  9.46trial/s, best loss: -0.628175519630485]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 14%|█▍        | 14/100 [00:02<00:13,  6.46trial/s, best loss: -0.6327944572748267]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 17%|█▋        | 17/100 [00:02<00:10,  8.23trial/s, best loss: -0.6327944572748267]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 19%|█▉        | 19/100 [00:02<00:08,  9.42trial/s, best loss: -0.6327944572748267]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 23%|██▎       | 23/100 [00:03<00:10,  7.69trial/s, best loss: -0.6466512702078522]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 25%|██▌       | 25/100 [00:03<00:09,  8.12trial/s, best loss: -0.6466512702078522]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 27%|██▋       | 27/100 [00:03<00:08,  8.71trial/s, best loss: -0.6466512702078522]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 30%|███       | 30/100 [00:03<00:07,  9.31trial/s, best loss: -0.651270207852194] 

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 31%|███       | 31/100 [00:04<00:11,  6.14trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 34%|███▍      | 34/100 [00:04<00:08,  7.76trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 36%|███▌      | 36/100 [00:04<00:07,  8.75trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 38%|███▊      | 38/100 [00:04<00:07,  8.73trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 40%|████      | 40/100 [00:05<00:08,  6.75trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 41%|████      | 41/100 [00:05<00:08,  7.34trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 43%|████▎     | 43/100 [00:05<00:06,  8.64trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 47%|████▋     | 47/100 [00:05<00:05,  9.96trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 50%|█████     | 50/100 [00:06<00:06,  7.61trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 54%|█████▍    | 54/100 [00:06<00:05,  8.73trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 55%|█████▌    | 55/100 [00:06<00:05,  8.93trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 56%|█████▌    | 56/100 [00:07<00:06,  6.61trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 58%|█████▊    | 58/100 [00:07<00:05,  7.43trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 61%|██████    | 61/100 [00:07<00:04,  8.17trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 63%|██████▎   | 63/100 [00:08<00:04,  9.05trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 65%|██████▌   | 65/100 [00:08<00:04,  7.35trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 67%|██████▋   | 67/100 [00:08<00:04,  7.28trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 70%|███████   | 70/100 [00:08<00:03,  8.54trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 72%|███████▏  | 72/100 [00:09<00:03,  9.32trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 73%|███████▎  | 73/100 [00:09<00:03,  6.88trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 76%|███████▌  | 76/100 [00:09<00:02,  8.14trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 77%|███████▋  | 77/100 [00:09<00:02,  8.57trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 80%|████████  | 80/100 [00:10<00:02,  9.58trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 82%|████████▏ | 82/100 [00:10<00:02,  7.13trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 85%|████████▌ | 85/100 [00:10<00:01,  7.72trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 87%|████████▋ | 87/100 [00:11<00:01,  8.47trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 89%|████████▉ | 89/100 [00:11<00:01,  8.82trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 90%|█████████ | 90/100 [00:11<00:01,  5.96trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 92%|█████████▏| 92/100 [00:11<00:01,  7.15trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 96%|█████████▌| 96/100 [00:12<00:00,  8.98trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 98%|█████████▊| 98/100 [00:12<00:00,  9.08trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




 99%|█████████▉| 99/100 [00:12<00:00,  6.32trial/s, best loss: -0.651270207852194]

Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg", "verbose" } are not used.




100%|██████████| 100/100 [00:12<00:00,  7.76trial/s, best loss: -0.651270207852194]
Best parameters: {'border_count': np.int64(147), 'depth': np.int64(4), 'iterations': np.int64(10), 'l2_leaf_reg': np.float64(6.837205168985412), 'learning_rate': np.float64(0.013748121731017806)}


Parameters: { "border_count", "cat_features", "depth", "iterations", "l2_leaf_reg" } are not used.

