In [23]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import math
import os
import pyarrow.feather as feather
from lightgbm import LGBMClassifier, log_evaluation
from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing
from colorama import Fore, Back, Style
import optuna.integration.lightgbm as lgb
import optuna

import warnings
warnings.filterwarnings("ignore")

In [2]:
#skip make_data
data_use=pd.read_feather("../Kaggle/AMEX/train_use.ftr")
train_labels=pd.read_csv("../Kaggle/AMEX/train_labels.csv")
print(data_use.shape)
print(train_labels.shape)

def categorization(data):

    num_cols=data._get_numeric_data().columns
    cat_cols=list(set(data.columns) - set(num_cols))

    for column in cat_cols:
        target_column = data[column]
        le = preprocessing.LabelEncoder()
        le.fit(target_column)
        label_encoded_column = le.transform(target_column)
        data[column] = pd.Series(label_encoded_column).astype('category')
    
    return data

data_use=categorization(data_use)

(458913, 2033)
(458913, 2)


In [3]:
data_use=data_use.iloc[0:1000,0:40]
train_labels=train_labels.iloc[0:1000,:]


In [19]:
#lightGBM
def amex_metric(y_true: np.array, y_pred: np.array) -> float:

    y_true=np.array(y_true)
    y_pred=np.array(y_pred)

    # count of positives and negatives
    n_pos = y_true.sum()
    n_neg = y_true.shape[0] - n_pos

    # sorting by descring prediction values
    indices = np.argsort(y_pred)[::-1]
    preds, target = y_pred[indices], y_true[indices]

    # filter the top 4% by cumulative row weights
    weight = 20.0 - target * 19.0
    cum_norm_weight = (weight / weight.sum()).cumsum()
    four_pct_filter = cum_norm_weight <= 0.04

    # default rate captured at 4%
    d = target[four_pct_filter].sum() / n_pos

    # weighted gini coefficient
    lorentz = (target / n_pos).cumsum()
    gini = ((lorentz - cum_norm_weight) * weight).sum()

    # max weighted gini coefficient
    gini_max = 10 * n_neg * (1 - 19 / (n_pos + 20 * n_neg))

    # normalized weighted gini coefficient
    g = gini / gini_max

    return 0.5 * (g + d)

def lgb_amex_metric(y_true, y_pred):
    """The competition metric with lightgbm's calling convention"""
    return ('amex',
            amex_metric(y_true, y_pred),
            True)

In [5]:
def amex_metric_optuna(preds: np.ndarray, data: lgb.Dataset) -> float:

    y_pred = preds
    y_true = data.get_label()

    # count of positives and negatives
    n_pos = y_true.sum()
    n_neg = y_true.shape[0] - n_pos

    # sorting by descring prediction values
    indices = np.argsort(y_pred)[::-1]
    preds, target = y_pred[indices], y_true[indices]

    # filter the top 4% by cumulative row weights
    weight = 20.0 - target * 19.0
    cum_norm_weight = (weight / weight.sum()).cumsum()
    four_pct_filter = cum_norm_weight <= 0.04

    # default rate captured at 4%
    d = target[four_pct_filter].sum() / n_pos

    # weighted gini coefficient
    lorentz = (target / n_pos).cumsum()
    gini = ((lorentz - cum_norm_weight) * weight).sum()

    # max weighted gini coefficient
    gini_max = 10 * n_neg * (1 - 19 / (n_pos + 20 * n_neg))

    # normalized weighted gini coefficient
    g = gini / gini_max

    return "AMEX score", 0.5 * (g + d), True

In [25]:
def fit_lgbm(trial, train, val, devices=(-1,),  cat_features=None, num_rounds=1500):
    """Train Light GBM model"""
    X_train, y_train = train
    X_valid, y_valid = val
    
    
    params = {
        #type
        'objective': 'binary',
        "boosting": "gbdt",
        "metric": "AMEX score",
        "verbose": -1,
        #tree structure
        'num_leaves': trial.suggest_int('num_leaves', 2, 300),
        'max_depth': trial.suggest_int('max_depth', 2, 12),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 5, 400),
        #accuracy
        "learning_rate": 0.1,
        #regularization
        'lambda_l1': trial.suggest_float('lambda_l1', 0, 0.98),
        'lambda_l2': trial.suggest_float('lambda_l1', 0, 0.98),
        'min_gain_to_split': trial.suggest_float('min_gain_to_split', 0, 15),
        #bagging
        "bagging_freq": 1,
        "bagging_fraction": trial.suggest_float('bagging_fraction', 0, 0.98),
        "feature_fraction": trial.suggest_float('feature_fraction', 0, 0.98),
        'seed':123,
    }

    device = devices[0]
    if device == -1:
        # use cpu
        pass
    else:
        # use gpu
        print(f'using gpu device_id {device}...')
        params.update({'device': 'gpu', 'gpu_device_id': device})


    d_train = lgb.Dataset(X_train, label=y_train, categorical_feature=cat_features)
    d_valid = lgb.Dataset(X_valid, label=y_valid, categorical_feature=cat_features)
    watchlist = [d_train, d_valid]
    
    #pruning_callback = optuna.integration.LightGBMPruningCallback(trial, 'auc', valid_name='valid_1')  

    FIT_PARAMS_LGB = {"num_boost_round": 10000, "early_stopping_rounds": 10, "verbose_eval":0}

    callbacks = [
    lgb.log_evaluation(0),
    lgb.early_stopping(10)]

    model = lgb.train(params, **FIT_PARAMS_LGB,
                categorical_feature=cat_features,
                train_set=d_train,
                valid_sets=watchlist,
                callbacks=callbacks, # コマンドライン出力用コールバック関数
                feval=amex_metric_optuna,
                     )

    # predictions
    y_pred_valid = model.predict(X_valid, num_iteration=model.best_iteration)
    
    #print('best_score', model.best_score)
    log = {'valid/amex': amex_metric(y_valid, y_pred_valid)}
    return model, y_pred_valid, log

In [7]:
def objective(trial, fast_check=True, return_info=False):
        folds = 5
        seed = 666
        shuffle = True
        kf = StratifiedKFold(n_splits=folds, shuffle=shuffle, random_state=seed)
    
        y_valid_pred_total = np.zeros(data_use.shape[0])
        models = []
        valid_score = 0

        for train_idx, valid_idx in kf.split(data_use, train_labels["target"]):
            train_data = data_use.iloc[train_idx,:], train_labels["target"].iloc[train_idx]
            valid_data = data_use.iloc[valid_idx,:], train_labels["target"].iloc[valid_idx]
            
            #print('train', len(train_idx), 'valid', len(valid_idx))
    
            model, y_pred_valid, log = fit_lgbm(trial, train_data, valid_data, num_rounds=1200)
            
            y_valid_pred_total[valid_idx] = y_pred_valid
            models.append(model)
            
            valid_score += log["valid/amex"]
            if fast_check:
                break
                
        valid_score /= len(models)
        
        if return_info:
            return valid_score, models, y_valid_pred_total, train_labels["target"]
        else:
            return valid_score
   

In [27]:
optuna.logging.set_verbosity(optuna.logging.ERROR)
study = optuna.create_study()
study.optimize(objective, timeout=60*0.5)

train 800 valid 200


feature_fraction, val_score: 0.079425:  14%|#4        | 1/7 [00:00<00:00, 15.10it/s]

Training until validation scores don't improve for 10 rounds


feature_fraction, val_score: 0.079425: 100%|##########| 7/7 [00:00<00:00, 12.82it/s]
num_leaves, val_score: 0.079425:  20%|##        | 4/20 [00:00<00:00, 17.06it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425:  55%|#####5    | 11/20 [00:00<00:00, 17.51it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425: 100%|##########| 20/20 [00:01<00:00, 15.60it/s]
bagging, val_score: 0.079425: 100%|##########| 10/10 [00:00<00:00, 14.27it/s]
feature_fraction_stage2, val_score: 0.079425: 100%|##########| 6/6 [00:00<00:00, 14.63it/s]
regularization_factors, val_score: 0.079425: 100%|##########| 20/20 [00:01<00:00, 15.08it/s]
min_data_in_leaf, val_score: 0.079425: 100%|##########| 5/5 [00:00<00:00, 16.60it/s]


best_score defaultdict(<class 'collections.OrderedDict'>, {'valid_0': OrderedDict([('AMEX score', 0.0392607846601031)]), 'valid_1': OrderedDict([('AMEX score', 0.0794246904371299)])})
train 800 valid 200


feature_fraction, val_score: 0.079425:  43%|####2     | 3/7 [00:00<00:00, 16.80it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


feature_fraction, val_score: 0.079425: 100%|##########| 7/7 [00:00<00:00, 16.62it/s]


Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425:   5%|5         | 1/20 [00:00<00:01, 14.80it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425:  20%|##        | 4/20 [00:00<00:01, 15.88it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425:  30%|###       | 6/20 [00:00<00:00, 15.90it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425:  40%|####      | 8/20 [00:00<00:00, 15.57it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425:  55%|#####5    | 11/20 [00:00<00:00, 15.90it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425:  85%|########5 | 17/20 [00:01<00:00, 15.36it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425: 100%|##########| 20/20 [00:01<00:00, 15.05it/s]


Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


bagging, val_score: 0.079425:  30%|###       | 3/10 [00:00<00:00, 10.95it/s]

Early stopping, best iteration is:
[12]	valid_0's AMEX score: 0.617068	valid_1's AMEX score: 0.544918
Early stopping, best iteration is:
[12]	valid_0's AMEX score: 0.617068	valid_1's AMEX score: 0.544918
Early stopping, best iteration is:
[12]	valid_0's AMEX score: 0.617068	valid_1's AMEX score: 0.544918


bagging, val_score: 0.079425:  70%|#######   | 7/10 [00:00<00:00, 13.24it/s]

Early stopping, best iteration is:
[12]	valid_0's AMEX score: 0.617068	valid_1's AMEX score: 0.544918
Early stopping, best iteration is:
[12]	valid_0's AMEX score: 0.617068	valid_1's AMEX score: 0.544918


bagging, val_score: 0.079425: 100%|##########| 10/10 [00:00<00:00, 12.97it/s]


Early stopping, best iteration is:
[12]	valid_0's AMEX score: 0.617068	valid_1's AMEX score: 0.544918
Early stopping, best iteration is:
[12]	valid_0's AMEX score: 0.617068	valid_1's AMEX score: 0.544918


feature_fraction_stage2, val_score: 0.079425: 100%|##########| 6/6 [00:00<00:00, 16.49it/s]
regularization_factors, val_score: 0.079425: 100%|##########| 20/20 [00:01<00:00, 14.22it/s]
min_data_in_leaf, val_score: 0.079425: 100%|##########| 5/5 [00:00<00:00, 16.51it/s]


best_score defaultdict(<class 'collections.OrderedDict'>, {'valid_0': OrderedDict([('AMEX score', 0.0392607846601031)]), 'valid_1': OrderedDict([('AMEX score', 0.0794246904371299)])})
train 800 valid 200


feature_fraction, val_score: 0.524108:  29%|##8       | 2/7 [00:00<00:00,  9.06it/s]

Training until validation scores don't improve for 10 rounds


feature_fraction, val_score: 0.522566: 100%|##########| 7/7 [00:00<00:00, 11.79it/s]
num_leaves, val_score: 0.522566: 100%|##########| 20/20 [00:01<00:00, 11.24it/s]
bagging, val_score: 0.514549:  50%|#####     | 5/10 [00:00<00:00, 11.08it/s]

Early stopping, best iteration is:
[64]	valid_0's AMEX score: 0.744886	valid_1's AMEX score: 0.618301


bagging, val_score: 0.427473:  90%|######### | 9/10 [00:00<00:00, 11.59it/s]

Early stopping, best iteration is:
[64]	valid_0's AMEX score: 0.744886	valid_1's AMEX score: 0.618301


bagging, val_score: 0.427473: 100%|##########| 10/10 [00:00<00:00, 11.28it/s]
feature_fraction_stage2, val_score: 0.427473: 100%|##########| 6/6 [00:00<00:00, 15.30it/s]
regularization_factors, val_score: 0.409639: 100%|##########| 20/20 [00:01<00:00, 13.56it/s]
min_data_in_leaf, val_score: 0.409639: 100%|##########| 5/5 [00:00<00:00, 13.11it/s]


best_score defaultdict(<class 'collections.OrderedDict'>, {'valid_0': OrderedDict([('AMEX score', 0.5364975103285803)]), 'valid_1': OrderedDict([('AMEX score', 0.4096386650857902)])})
train 800 valid 200


feature_fraction, val_score: 0.079425:  29%|##8       | 2/7 [00:00<00:00, 14.00it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


feature_fraction, val_score: 0.079425: 100%|##########| 7/7 [00:00<00:00, 15.09it/s]


Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425:   0%|          | 0/20 [00:00<?, ?it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425:  15%|#5        | 3/20 [00:00<00:01, 13.39it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425:  70%|#######   | 14/20 [00:00<00:00, 15.95it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425: 100%|##########| 20/20 [00:01<00:00, 15.47it/s]


Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


bagging, val_score: 0.079425: 100%|##########| 10/10 [00:00<00:00, 11.88it/s]
feature_fraction_stage2, val_score: 0.079425: 100%|##########| 6/6 [00:00<00:00, 14.62it/s]
regularization_factors, val_score: 0.079425: 100%|##########| 20/20 [00:01<00:00, 13.30it/s]
min_data_in_leaf, val_score: 0.079425: 100%|##########| 5/5 [00:00<00:00, 15.33it/s]


best_score defaultdict(<class 'collections.OrderedDict'>, {'valid_0': OrderedDict([('AMEX score', 0.0392607846601031)]), 'valid_1': OrderedDict([('AMEX score', 0.0794246904371299)])})
train 800 valid 200


feature_fraction, val_score: 0.079425:  43%|####2     | 3/7 [00:00<00:00, 15.69it/s]

Training until validation scores don't improve for 10 rounds


feature_fraction, val_score: 0.079425: 100%|##########| 7/7 [00:00<00:00, 16.59it/s]
num_leaves, val_score: 0.079425:  20%|##        | 4/20 [00:00<00:00, 16.67it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425:  90%|######### | 18/20 [00:01<00:00, 15.45it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.0392608	valid_1's AMEX score: 0.0794247


num_leaves, val_score: 0.079425: 100%|##########| 20/20 [00:01<00:00, 15.72it/s]
bagging, val_score: 0.079425: 100%|##########| 10/10 [00:00<00:00, 15.07it/s]


Early stopping, best iteration is:
[18]	valid_0's AMEX score: 0.535954	valid_1's AMEX score: 0.450475


feature_fraction_stage2, val_score: 0.079425: 100%|##########| 6/6 [00:00<00:00, 16.43it/s]
regularization_factors, val_score: 0.079425: 100%|##########| 20/20 [00:01<00:00, 14.43it/s]
min_data_in_leaf, val_score: 0.079425: 100%|##########| 5/5 [00:00<00:00, 15.58it/s]


best_score defaultdict(<class 'collections.OrderedDict'>, {'valid_0': OrderedDict([('AMEX score', 0.0392607846601031)]), 'valid_1': OrderedDict([('AMEX score', 0.0794246904371299)])})
train 800 valid 200


feature_fraction, val_score: 0.497110:  29%|##8       | 2/7 [00:00<00:00, 11.70it/s]

Training until validation scores don't improve for 10 rounds


feature_fraction, val_score: 0.497110: 100%|##########| 7/7 [00:00<00:00, 11.26it/s]


Early stopping, best iteration is:
[23]	valid_0's AMEX score: 0.694228	valid_1's AMEX score: 0.579928
Early stopping, best iteration is:
[23]	valid_0's AMEX score: 0.694228	valid_1's AMEX score: 0.579928
Early stopping, best iteration is:
[23]	valid_0's AMEX score: 0.694228	valid_1's AMEX score: 0.579928


num_leaves, val_score: 0.497110: 100%|##########| 20/20 [00:01<00:00, 12.64it/s]
bagging, val_score: 0.497110:  30%|###       | 3/10 [00:00<00:00, 11.05it/s]

Early stopping, best iteration is:
[23]	valid_0's AMEX score: 0.694228	valid_1's AMEX score: 0.579928


bagging, val_score: 0.486135: 100%|##########| 10/10 [00:00<00:00, 10.74it/s]
feature_fraction_stage2, val_score: 0.486135: 100%|##########| 6/6 [00:00<00:00,  9.53it/s]
regularization_factors, val_score: 0.486135: 100%|##########| 20/20 [00:01<00:00, 12.27it/s]
min_data_in_leaf, val_score: 0.486135: 100%|##########| 5/5 [00:00<00:00, 14.09it/s]


best_score defaultdict(<class 'collections.OrderedDict'>, {'valid_0': OrderedDict([('AMEX score', 0.48383354336153206)]), 'valid_1': OrderedDict([('AMEX score', 0.4861351434907132)])})
train 800 valid 200


feature_fraction, val_score: 0.531212:  29%|##8       | 2/7 [00:00<00:00, 12.41it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[22]	valid_0's AMEX score: 0.657851	valid_1's AMEX score: 0.531212


feature_fraction, val_score: 0.510041:  71%|#######1  | 5/7 [00:00<00:00, 12.07it/s]

Early stopping, best iteration is:
[22]	valid_0's AMEX score: 0.657851	valid_1's AMEX score: 0.531212
Early stopping, best iteration is:
[22]	valid_0's AMEX score: 0.657851	valid_1's AMEX score: 0.531212


feature_fraction, val_score: 0.510041: 100%|##########| 7/7 [00:00<00:00, 11.56it/s]
num_leaves, val_score: 0.494764:  15%|#5        | 3/20 [00:00<00:01, 11.44it/s]

Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764


num_leaves, val_score: 0.494764:  60%|######    | 12/20 [00:01<00:00, 10.58it/s]

Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764
Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764


num_leaves, val_score: 0.494764:  75%|#######5  | 15/20 [00:01<00:00, 10.07it/s]

Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764


num_leaves, val_score: 0.494764:  90%|######### | 18/20 [00:01<00:00, 10.91it/s]

Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764


num_leaves, val_score: 0.494764: 100%|##########| 20/20 [00:01<00:00, 11.11it/s]


Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764


bagging, val_score: 0.494764:  30%|###       | 3/10 [00:00<00:00, 10.67it/s]

Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764
Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764
Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764


bagging, val_score: 0.494764:  60%|######    | 6/10 [00:00<00:00, 10.07it/s]

Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764
Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764


bagging, val_score: 0.429015:  90%|######### | 9/10 [00:00<00:00, 10.55it/s]

Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764
Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764


bagging, val_score: 0.429015: 100%|##########| 10/10 [00:00<00:00, 10.33it/s]


Early stopping, best iteration is:
[32]	valid_0's AMEX score: 0.692095	valid_1's AMEX score: 0.494764


feature_fraction_stage2, val_score: 0.404988: 100%|##########| 3/3 [00:00<00:00, 12.09it/s]
regularization_factors, val_score: 0.404988: 100%|##########| 20/20 [00:01<00:00, 12.36it/s]
min_data_in_leaf, val_score: 0.404988: 100%|##########| 5/5 [00:00<00:00, 12.86it/s]

best_score defaultdict(<class 'collections.OrderedDict'>, {'valid_0': OrderedDict([('AMEX score', 0.5263168550862001)]), 'valid_1': OrderedDict([('AMEX score', 0.40498840957362614)])})





In [11]:


params_last = {
        #type
        'objective': 'binary',
        "boosting": "gbdt",
        "metric": "AMEX score",
        "verbose": -1,
        #tree structure
        'num_leaves': 33,
        'max_depth': 5,
        'min_data_in_leaf': 25,
        #accuracy
        "learning_rate": 0.1,
        #regularization
        'lambda_l1': 0.0009192401584865857,
        'lambda_l2': 0.011545006628700807,
        'min_gain_to_split': 3,
        #bagging
        "bagging_freq": 1,
        "bagging_fraction": 0.46461405788976207,
        "feature_fraction": 0.5,
        'seed':123,
    }

In [13]:
#valid_score, models0, y_pred_valid, y_train = objective(optuna.trial.FixedTrial(study.best_params), fast_check=False, return_info=True)
valid_score, models0, y_pred_valid, y_train = objective(optuna.trial.FixedTrial(params_last), fast_check=False, return_info=True)

[32m[I 2022-06-23 08:24:06,590][0m A new study created in memory with name: no-name-b124e6fe-b7e8-4b2a-aaa7-300d00ec12b0[0m


train 800 valid 200
training LGB:


feature_fraction, val_score: 0.549688:  14%|#4        | 1/7 [00:00<00:00,  9.48it/s][32m[I 2022-06-23 08:24:06,702][0m Trial 0 finished with value: 0.5496883012040759 and parameters: {'feature_fraction': 1.0}. Best is trial 0 with value: 0.5496883012040759.[0m
feature_fraction, val_score: 0.549688:  14%|#4        | 1/7 [00:00<00:00,  9.48it/s]

Training until validation scores don't improve for 10 rounds
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[10]	valid_0's AMEX score: 0.730371	valid_1's AMEX score: 0.549688
Training until validation scores don't improve for 10 rounds


feature_fraction, val_score: 0.549688:  29%|##8       | 2/7 [00:00<00:00,  7.36it/s][32m[I 2022-06-23 08:24:06,858][0m Trial 1 finished with value: 0.5769524510453115 and parameters: {'feature_fraction': 0.7}. Best is trial 0 with value: 0.5496883012040759.[0m
feature_fraction, val_score: 0.549688:  29%|##8       | 2/7 [00:00<00:00,  7.36it/s][32m[I 2022-06-23 08:24:06,952][0m Trial 2 finished with value: 0.5769524510453115 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 0 with value: 0.5496883012040759.[0m
feature_fraction, val_score: 0.549688:  57%|#####7    | 4/7 [00:00<00:00,  9.80it/s][32m[I 2022-06-23 08:24:07,023][0m Trial 3 finished with value: 0.5822528734013862 and parameters: {'feature_fraction': 0.4}. Best is trial 0 with value: 0.5496883012040759.[0m
feature_fraction, val_score: 0.549688:  57%|#####7    | 4/7 [00:00<00:00,  9.80it/s]

Early stopping, best iteration is:
[15]	valid_0's AMEX score: 0.724935	valid_1's AMEX score: 0.576952
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[15]	valid_0's AMEX score: 0.724935	valid_1's AMEX score: 0.576952
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[6]	valid_0's AMEX score: 0.652872	valid_1's AMEX score: 0.582253


feature_fraction, val_score: 0.549688:  57%|#####7    | 4/7 [00:00<00:00,  9.80it/s][32m[I 2022-06-23 08:24:07,108][0m Trial 4 finished with value: 0.5822528734013862 and parameters: {'feature_fraction': 0.5}. Best is trial 0 with value: 0.5496883012040759.[0m
feature_fraction, val_score: 0.549688:  86%|########5 | 6/7 [00:00<00:00,  9.11it/s][32m[I 2022-06-23 08:24:07,260][0m Trial 5 finished with value: 0.6198551955189255 and parameters: {'feature_fraction': 0.6}. Best is trial 0 with value: 0.5496883012040759.[0m
feature_fraction, val_score: 0.549688:  86%|########5 | 6/7 [00:00<00:00,  9.11it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[6]	valid_0's AMEX score: 0.652872	valid_1's AMEX score: 0.582253
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855


feature_fraction, val_score: 0.549688: 100%|##########| 7/7 [00:00<00:00,  9.30it/s][32m[I 2022-06-23 08:24:07,360][0m Trial 6 finished with value: 0.6198551955189255 and parameters: {'feature_fraction': 0.8}. Best is trial 0 with value: 0.5496883012040759.[0m
feature_fraction, val_score: 0.549688: 100%|##########| 7/7 [00:00<00:00,  9.12it/s]


Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855




Training until validation scores don't improve for 10 rounds


num_leaves, val_score: 0.549688:   5%|5         | 1/20 [00:00<00:03,  5.80it/s][32m[I 2022-06-23 08:24:07,541][0m Trial 7 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 31}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:   5%|5         | 1/20 [00:00<00:03,  5.80it/s][32m[I 2022-06-23 08:24:07,635][0m Trial 8 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 8}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  15%|#5        | 3/20 [00:00<00:01,  9.51it/s][32m[I 2022-06-23 08:24:07,705][0m Trial 9 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 14}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  15%|#5        | 3/20 [00:00<00:01,  9.51it/s]

Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855


num_leaves, val_score: 0.549688:  15%|#5        | 3/20 [00:00<00:01,  9.51it/s][32m[I 2022-06-23 08:24:07,798][0m Trial 10 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 30}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  25%|##5       | 5/20 [00:00<00:01,  9.12it/s][32m[I 2022-06-23 08:24:07,933][0m Trial 11 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 23}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  25%|##5       | 5/20 [00:00<00:01,  9.12it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855


num_leaves, val_score: 0.549688:  30%|###       | 6/20 [00:00<00:01,  9.00it/s][32m[I 2022-06-23 08:24:08,048][0m Trial 12 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 31}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  30%|###       | 6/20 [00:00<00:01,  9.00it/s][32m[I 2022-06-23 08:24:08,126][0m Trial 13 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 5}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  40%|####      | 8/20 [00:00<00:01, 10.38it/s][32m[I 2022-06-23 08:24:08,202][0m Trial 14 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 22}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  40%|####      | 8/20 [00:00<00:01, 10.38it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855


num_leaves, val_score: 0.549688:  40%|####      | 8/20 [00:00<00:01, 10.38it/s][32m[I 2022-06-23 08:24:08,296][0m Trial 15 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 28}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  50%|#####     | 10/20 [00:01<00:00, 10.61it/s][32m[I 2022-06-23 08:24:08,384][0m Trial 16 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 22}. Best is trial 7 with value: 0.6198551955189255.[0m


Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds


num_leaves, val_score: 0.549688:  50%|#####     | 10/20 [00:01<00:00, 10.61it/s][32m[I 2022-06-23 08:24:08,475][0m Trial 17 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 15}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  60%|######    | 12/20 [00:01<00:00, 10.81it/s][32m[I 2022-06-23 08:24:08,562][0m Trial 18 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 5}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  60%|######    | 12/20 [00:01<00:00, 10.81it/s][32m[I 2022-06-23 08:24:08,647][0m Trial 19 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 10}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  65%|######5   | 13/20 [00:01<00:00, 10.81it/s]

Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855


num_leaves, val_score: 0.549688:  70%|#######   | 14/20 [00:01<00:00, 11.16it/s][32m[I 2022-06-23 08:24:08,730][0m Trial 20 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 9}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  70%|#######   | 14/20 [00:01<00:00, 11.16it/s][32m[I 2022-06-23 08:24:08,812][0m Trial 21 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 18}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  80%|########  | 16/20 [00:01<00:00, 11.54it/s][32m[I 2022-06-23 08:24:08,891][0m Trial 22 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 2}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  80%|########  | 16/20 [00:01<00:00, 11.54it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855


num_leaves, val_score: 0.549688:  80%|########  | 16/20 [00:01<00:00, 11.54it/s][32m[I 2022-06-23 08:24:09,027][0m Trial 23 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 26}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  90%|######### | 18/20 [00:01<00:00, 10.26it/s][32m[I 2022-06-23 08:24:09,132][0m Trial 24 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 19}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688:  90%|######### | 18/20 [00:01<00:00, 10.26it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855


num_leaves, val_score: 0.549688:  90%|######### | 18/20 [00:01<00:00, 10.26it/s][32m[I 2022-06-23 08:24:09,228][0m Trial 25 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 2}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688: 100%|##########| 20/20 [00:01<00:00, 10.50it/s][32m[I 2022-06-23 08:24:09,313][0m Trial 26 finished with value: 0.6198551955189255 and parameters: {'num_leaves': 26}. Best is trial 7 with value: 0.6198551955189255.[0m
num_leaves, val_score: 0.549688: 100%|##########| 20/20 [00:01<00:00, 10.26it/s]


Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855


bagging, val_score: 0.525399:   0%|          | 0/10 [00:00<?, ?it/s][32m[I 2022-06-23 08:24:09,402][0m Trial 27 finished with value: 0.5253994459628332 and parameters: {'bagging_fraction': 0.9211160956703538, 'bagging_freq': 2}. Best is trial 27 with value: 0.5253994459628332.[0m
bagging, val_score: 0.525399:  10%|#         | 1/10 [00:00<00:00, 12.10it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399


bagging, val_score: 0.525399:  20%|##        | 2/10 [00:00<00:00, 11.72it/s][32m[I 2022-06-23 08:24:09,493][0m Trial 28 finished with value: 0.6198551955189255 and parameters: {'bagging_fraction': 0.6572764460515179, 'bagging_freq': 6}. Best is trial 27 with value: 0.5253994459628332.[0m
bagging, val_score: 0.525399:  20%|##        | 2/10 [00:00<00:00, 11.72it/s][32m[I 2022-06-23 08:24:09,575][0m Trial 29 finished with value: 0.6198551955189255 and parameters: {'bagging_fraction': 0.5816852146543796, 'bagging_freq': 2}. Best is trial 27 with value: 0.5253994459628332.[0m
bagging, val_score: 0.525399:  30%|###       | 3/10 [00:00<00:00, 11.72it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855




Training until validation scores don't improve for 10 rounds


bagging, val_score: 0.525399:  40%|####      | 4/10 [00:00<00:00, 11.55it/s][32m[I 2022-06-23 08:24:09,668][0m Trial 30 finished with value: 0.6198551955189255 and parameters: {'bagging_fraction': 0.6406906478569289, 'bagging_freq': 5}. Best is trial 27 with value: 0.5253994459628332.[0m
bagging, val_score: 0.525399:  40%|####      | 4/10 [00:00<00:00, 11.55it/s][32m[I 2022-06-23 08:24:09,767][0m Trial 31 finished with value: 0.6198551955189255 and parameters: {'bagging_fraction': 0.5295071380797092, 'bagging_freq': 6}. Best is trial 27 with value: 0.5253994459628332.[0m


Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds


bagging, val_score: 0.525399:  60%|######    | 6/10 [00:00<00:00, 11.32it/s][32m[I 2022-06-23 08:24:09,849][0m Trial 32 finished with value: 0.6198551955189255 and parameters: {'bagging_fraction': 0.40519132128665014, 'bagging_freq': 7}. Best is trial 27 with value: 0.5253994459628332.[0m
bagging, val_score: 0.525399:  60%|######    | 6/10 [00:00<00:00, 11.32it/s]

Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855


bagging, val_score: 0.525399:  60%|######    | 6/10 [00:00<00:00, 11.32it/s][32m[I 2022-06-23 08:24:09,933][0m Trial 33 finished with value: 0.6198551955189255 and parameters: {'bagging_fraction': 0.706442725234733, 'bagging_freq': 6}. Best is trial 27 with value: 0.5253994459628332.[0m
bagging, val_score: 0.525399:  80%|########  | 8/10 [00:00<00:00, 11.45it/s][32m[I 2022-06-23 08:24:10,021][0m Trial 34 finished with value: 0.6198551955189255 and parameters: {'bagging_fraction': 0.7184093749791811, 'bagging_freq': 1}. Best is trial 27 with value: 0.5253994459628332.[0m
bagging, val_score: 0.525399:  80%|########  | 8/10 [00:00<00:00, 11.45it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855




Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855


bagging, val_score: 0.525399:  80%|########  | 8/10 [00:00<00:00, 11.45it/s][32m[I 2022-06-23 08:24:10,111][0m Trial 35 finished with value: 0.6198551955189255 and parameters: {'bagging_fraction': 0.4346967531086753, 'bagging_freq': 1}. Best is trial 27 with value: 0.5253994459628332.[0m
bagging, val_score: 0.525399: 100%|##########| 10/10 [00:00<00:00, 11.38it/s][32m[I 2022-06-23 08:24:10,198][0m Trial 36 finished with value: 0.6198551955189255 and parameters: {'bagging_fraction': 0.9408405491218916, 'bagging_freq': 3}. Best is trial 27 with value: 0.5253994459628332.[0m
bagging, val_score: 0.525399: 100%|##########| 10/10 [00:00<00:00, 11.37it/s]


Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855




Training until validation scores don't improve for 10 rounds


feature_fraction_stage2, val_score: 0.525399:   0%|          | 0/3 [00:00<?, ?it/s][32m[I 2022-06-23 08:24:10,291][0m Trial 37 finished with value: 0.5253994459628332 and parameters: {'feature_fraction': 0.9840000000000001}. Best is trial 37 with value: 0.5253994459628332.[0m
feature_fraction_stage2, val_score: 0.525399:  67%|######6   | 2/3 [00:00<00:00, 12.64it/s][32m[I 2022-06-23 08:24:10,365][0m Trial 38 finished with value: 0.5253994459628332 and parameters: {'feature_fraction': 0.9520000000000001}. Best is trial 37 with value: 0.5253994459628332.[0m
feature_fraction_stage2, val_score: 0.525399:  67%|######6   | 2/3 [00:00<00:00, 12.64it/s]

Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399


feature_fraction_stage2, val_score: 0.525399:  67%|######6   | 2/3 [00:00<00:00, 12.64it/s][32m[I 2022-06-23 08:24:10,460][0m Trial 39 finished with value: 0.6198551955189255 and parameters: {'feature_fraction': 0.92}. Best is trial 37 with value: 0.5253994459628332.[0m
feature_fraction_stage2, val_score: 0.525399: 100%|##########| 3/3 [00:00<00:00, 11.72it/s]


Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855


regularization_factors, val_score: 0.525399:   0%|          | 0/20 [00:00<?, ?it/s][32m[I 2022-06-23 08:24:10,551][0m Trial 40 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 3.577963803473706e-06, 'lambda_l2': 0.0013382651101707325}. Best is trial 40 with value: 0.5253994459628332.[0m


Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399
Training until validation scores don't improve for 10 rounds


regularization_factors, val_score: 0.525399:  10%|#         | 2/20 [00:00<00:01, 12.04it/s][32m[I 2022-06-23 08:24:10,633][0m Trial 41 finished with value: 0.6198551955189255 and parameters: {'lambda_l1': 2.3438052622785692, 'lambda_l2': 8.877328486353691e-06}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399:  10%|#         | 2/20 [00:00<00:01, 12.04it/s][32m[I 2022-06-23 08:24:10,712][0m Trial 42 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 0.02883088029483542, 'lambda_l2': 2.4723631637129668e-08}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399:  15%|#5        | 3/20 [00:00<00:01, 12.04it/s]

Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399


regularization_factors, val_score: 0.525399:  20%|##        | 4/20 [00:00<00:01, 12.29it/s][32m[I 2022-06-23 08:24:10,793][0m Trial 43 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 0.006351787668195321, 'lambda_l2': 0.03841723869215282}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399:  20%|##        | 4/20 [00:00<00:01, 12.29it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399


regularization_factors, val_score: 0.525399:  20%|##        | 4/20 [00:00<00:01, 12.29it/s][32m[I 2022-06-23 08:24:10,899][0m Trial 44 finished with value: 0.6198551955189255 and parameters: {'lambda_l1': 8.643096782911203e-08, 'lambda_l2': 1.8927266496698316}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399:  25%|##5       | 5/20 [00:00<00:01, 12.29it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399


regularization_factors, val_score: 0.525399:  30%|###       | 6/20 [00:00<00:01, 11.69it/s][32m[I 2022-06-23 08:24:10,975][0m Trial 45 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 0.0001292778387450747, 'lambda_l2': 0.03037869639306319}. Best is trial 40 with value: 0.5253994459628332.[0m


Training until validation scores don't improve for 10 rounds


regularization_factors, val_score: 0.525399:  30%|###       | 6/20 [00:00<00:01, 11.69it/s][32m[I 2022-06-23 08:24:11,061][0m Trial 46 finished with value: 0.6198551955189255 and parameters: {'lambda_l1': 0.06260450598377865, 'lambda_l2': 9.666126473323825e-08}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399:  40%|####      | 8/20 [00:00<00:01, 11.71it/s][32m[I 2022-06-23 08:24:11,145][0m Trial 47 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 0.000979742148843642, 'lambda_l2': 0.0031162996198207207}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399:  40%|####      | 8/20 [00:00<00:01, 11.71it/s][32m[I 2022-06-23 08:24:11,224][0m Trial 48 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 2.3480932075254175e-07, 'lambda_l2': 0.0009379405149942972}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.52539

Early stopping, best iteration is:
[9]	valid_0's AMEX score: 0.694211	valid_1's AMEX score: 0.619855
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399


regularization_factors, val_score: 0.525399:  50%|#####     | 10/20 [00:00<00:00, 11.81it/s][32m[I 2022-06-23 08:24:11,312][0m Trial 49 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 0.0010677510337170019, 'lambda_l2': 0.0010077188720362}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399:  50%|#####     | 10/20 [00:00<00:00, 11.81it/s][32m[I 2022-06-23 08:24:11,399][0m Trial 50 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 1.757791580988018e-05, 'lambda_l2': 8.132182099315107e-06}. Best is trial 40 with value: 0.5253994459628332.[0m


Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399


regularization_factors, val_score: 0.525399:  60%|######    | 12/20 [00:01<00:00, 11.74it/s][32m[I 2022-06-23 08:24:11,484][0m Trial 51 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 4.924918977611444e-06, 'lambda_l2': 8.261062366572641e-08}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399:  60%|######    | 12/20 [00:01<00:00, 11.74it/s][32m[I 2022-06-23 08:24:11,568][0m Trial 52 finished with value: 0.5283703937899764 and parameters: {'lambda_l1': 0.24334912162921435, 'lambda_l2': 8.884270259610777e-06}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399:  70%|#######   | 14/20 [00:01<00:00, 11.75it/s][32m[I 2022-06-23 08:24:11,654][0m Trial 53 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 3.2022833187502557e-06, 'lambda_l2': 1.5121765406666526e-08}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645333	valid_1's AMEX score: 0.52837
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399


regularization_factors, val_score: 0.525399:  80%|########  | 16/20 [00:01<00:00, 11.56it/s][32m[I 2022-06-23 08:24:11,833][0m Trial 55 finished with value: 0.5415590625046491 and parameters: {'lambda_l1': 4.802747737299935, 'lambda_l2': 1.107838380850703}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399:  80%|########  | 16/20 [00:01<00:00, 11.56it/s][32m[I 2022-06-23 08:24:11,922][0m Trial 56 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 1.2262966634096892e-08, 'lambda_l2': 4.6741635946809726e-07}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399:  90%|######### | 18/20 [00:01<00:00, 11.50it/s][32m[I 2022-06-23 08:24:12,009][0m Trial 57 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 7.913854962165843e-05, 'lambda_l2': 7.337961491368467e-05}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[2]	valid_0's AMEX score: 0.55079	valid_1's AMEX score: 0.541559
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399


regularization_factors, val_score: 0.525399:  90%|######### | 18/20 [00:01<00:00, 11.50it/s][32m[I 2022-06-23 08:24:12,098][0m Trial 58 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 8.322132214848151e-07, 'lambda_l2': 9.376467916856212e-05}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399: 100%|##########| 20/20 [00:01<00:00, 11.44it/s][32m[I 2022-06-23 08:24:12,186][0m Trial 59 finished with value: 0.5253994459628332 and parameters: {'lambda_l1': 1.0643327407727111e-08, 'lambda_l2': 0.02708806496407863}. Best is trial 40 with value: 0.5253994459628332.[0m
regularization_factors, val_score: 0.525399: 100%|##########| 20/20 [00:01<00:00, 11.61it/s]


Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399


min_data_in_leaf, val_score: 0.525399:   0%|          | 0/5 [00:00<?, ?it/s][32m[I 2022-06-23 08:24:12,272][0m Trial 60 finished with value: 0.5253994459628332 and parameters: {'min_child_samples': 5}. Best is trial 60 with value: 0.5253994459628332.[0m
min_data_in_leaf, val_score: 0.525399:  20%|##        | 1/5 [00:00<00:00, 12.28it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399


min_data_in_leaf, val_score: 0.525399:  40%|####      | 2/5 [00:00<00:00, 12.00it/s][32m[I 2022-06-23 08:24:12,360][0m Trial 61 finished with value: 0.5253994459628332 and parameters: {'min_child_samples': 10}. Best is trial 60 with value: 0.5253994459628332.[0m
min_data_in_leaf, val_score: 0.525399:  40%|####      | 2/5 [00:00<00:00, 12.00it/s][32m[I 2022-06-23 08:24:12,437][0m Trial 62 finished with value: 0.5253994459628332 and parameters: {'min_child_samples': 50}. Best is trial 60 with value: 0.5253994459628332.[0m
min_data_in_leaf, val_score: 0.525399:  60%|######    | 3/5 [00:00<00:00, 12.00it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399


min_data_in_leaf, val_score: 0.525399:  80%|########  | 4/5 [00:00<00:00, 11.97it/s]

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399


[32m[I 2022-06-23 08:24:12,528][0m Trial 63 finished with value: 0.5253994459628332 and parameters: {'min_child_samples': 100}. Best is trial 60 with value: 0.5253994459628332.[0m
min_data_in_leaf, val_score: 0.525399:  80%|########  | 4/5 [00:00<00:00, 11.97it/s][32m[I 2022-06-23 08:24:12,608][0m Trial 64 finished with value: 0.5253994459628332 and parameters: {'min_child_samples': 25}. Best is trial 60 with value: 0.5253994459628332.[0m
min_data_in_leaf, val_score: 0.525399: 100%|##########| 5/5 [00:00<00:00, 11.98it/s]


Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's AMEX score: 0.645585	valid_1's AMEX score: 0.525399
best_score defaultdict(<class 'collections.OrderedDict'>, {'valid_0': OrderedDict([('AMEX score', 0.6455854468508382)]), 'valid_1': OrderedDict([('AMEX score', 0.5253994459628332)])})


KeyError: '[105, 154, 14, 191, 29, 10, 18, 74, 158, 162, 188, 3, 42, 87, 25, 100, 122, 4, 33, 138, 160, 126, 133, 75, 60, 58, 140, 24, 169, 2, 94, 21, 135, 117, 52, 128, 92, 19, 43, 69, 81, 145, 13, 198, 66, 118, 182, 193, 35, 116, 164, 132, 67, 166, 86, 22, 23, 111, 172, 88, 175, 143, 178, 93, 65, 70, 68, 150, 121, 152, 147, 190, 196, 192, 64, 59, 56, 16, 12, 11, 9, 20, 63, 7, 45, 44, 26, 46, 47, 41, 40, 49, 50, 51, 39, 38, 53, 54, 37, 31, 199, 99, 71, 170, 137, 139, 142, 148, 149, 151, 156, 159, 161, 163, 165, 167, 173, 134, 174, 177, 179, 180, 183, 185, 186, 187, 189, 194, 195, 197, 136, 72, 102, 73, 77, 78, 79, 80, 82, 83, 84, 85, 96, 98, 101, 130, 104, 107, 108, 112, 113, 115, 119, 123, 124, 125, 127, 129, 0] not in index'

In [None]:
num_cols=data_use._get_numeric_data().columns
cat_features=list(set(data_use.columns) - set(num_cols))    


study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1)


#timeout = 60 * 30

#study = optuna.create_study(
#    pruner=optuna.pruners.SuccessiveHalvingPruner(min_resource=2, reduction_factor=4, min_early_stopping_rate=1))
#study.optimize(objective, timeout=timeout)

In [40]:
study.best_params

ValueError: No trials are completed yet.

In [223]:
#ensemble
test=pd.read_feather("../Kaggle/AMEX/test_data.ftr")
sub1 = pd.read_csv('../Kaggle/AMEX/submission_ens.csv')

In [278]:
#CatBoost prediction
col=data_use.columns
test_ID=test['customer_ID'].unique()
pred_catboost= clf.predict_proba(test_use[col])[:, 1]

In [280]:
sub2= pd.DataFrame({'customer_ID':test_ID, 'prediction2':pred_catboost})

In [283]:
#Ensemble
sub1 = pd.read_csv('../Kaggle/AMEX/submission_ens.csv')

blend = pd.merge(sub1, sub2, how='inner', on='customer_ID')
blend.prediction = (blend.prediction * 0.95 + blend.prediction2 * 0.05)
blend[['customer_ID', 'prediction']].to_csv('../Kaggle/AMEX/0613submission2.csv', index=False)

In [None]:
#lightgbm fit
ONLY_FIRST_FOLD = True
INFERENCE = True


model=lgb.train(params, 
                categorical_feature=cat_features,
                train_set=d_train,
                valid_sets=watchlist,
                num_boost_round=10000,  # 最大学習サイクル数。early_stopping使用時は大きな値を入力
                callbacks=callbacks, # コマンドライン出力用コールバック関数
                feval=amex_metric_optuna,
                )
      
score_list = []
y_pred_list = []
kf = StratifiedKFold(n_splits=5)
for fold, (idx_tr, idx_va) in enumerate(kf.split(data_use, train_labels["target"])):
    X_tr, X_va, y_tr, y_va, model = None, None, None, None, None

    X_tr = data_use.iloc[idx_tr]
    X_va = data_use.iloc[idx_va]
    y_tr = train_labels["target"][idx_tr]
    y_va = train_labels["target"][idx_va]
    
    model = my_booster()

    model.fit(X_tr, y_tr,
            eval_set = [(X_va, y_va)], 
            eval_metric=[lgb_amex_metric],
            callbacks=[log_evaluation(100)])
    X_tr, y_tr = None, None
    y_va_pred = model.predict_proba(X_va, raw_score=True)
    score = amex_metric(y_va.to_numpy(), y_va_pred)
    n_trees = model.best_iteration_
    if n_trees is None: n_trees = model.n_estimators
    print(f"{Fore.GREEN}{Style.BRIGHT}Fold {fold} "
          f"                Score = {score:.5f}{Style.RESET_ALL}")
    score_list.append(score)
    
    if INFERENCE:
        y_pred_list.append(model.predict_proba(test, raw_score=True))
        
    if ONLY_FIRST_FOLD: break # we only want the first fold
    
print(f"{Fore.GREEN}{Style.BRIGHT}OOF Score:   {np.mean(score_list):.5f}{Style.RESET_ALL}")