In [14]:
import sys
from pathlib import Path
import os
import gc
import datetime
from glob import glob
import numpy as np
import pandas as pd
import polars as pl

from sklearn.model_selection import StratifiedGroupKFold
from sklearn.metrics import roc_auc_score

import warnings
warnings.filterwarnings('ignore')

# ROOT = '/Users/Yeager Tao/Desktop/hc/home-credit-credit-risk-model-stability'
ROOT = '/Users/wuqianran/Desktop/bigdata_finalproject/final'

from sklearn.model_selection import TimeSeriesSplit, GroupKFold, StratifiedGroupKFold
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.metrics import roc_auc_score
import lightgbm as lgb

# from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import KNNImputer


In [15]:
class Pipeline:

    def set_table_dtypes(df):
        for col in df.columns:
            if col in ["case_id", "WEEK_NUM", "num_group1", "num_group2"]:
                df = df.with_columns(pl.col(col).cast(pl.Int64))
            elif col in ["date_decision"]:
                df = df.with_columns(pl.col(col).cast(pl.Date))
            elif col[-1] in ("P", "A"):
                df = df.with_columns(pl.col(col).cast(pl.Float64))
            elif col[-1] in ("M",):
                df = df.with_columns(pl.col(col).cast(pl.String))
            elif col[-1] in ("D",):
                df = df.with_columns(pl.col(col).cast(pl.Date))
        return df

    def handle_dates(df):
        for col in df.columns:
            if col[-1] in ("D",):
                df = df.with_columns(pl.col(col) - pl.col("date_decision"))  #!!?
                df = df.with_columns(pl.col(col).dt.total_days()) # t - t-1
        df = df.drop("date_decision", "MONTH")
        return df

    def filter_cols(df):
        for col in df.columns:
            if col not in ["target", "case_id", "WEEK_NUM"]:
                isnull = df[col].is_null().mean()
                if isnull > 0.95:
                    df = df.drop(col)
        
        for col in df.columns:
            if (col not in ["target", "case_id", "WEEK_NUM"]) & (df[col].dtype == pl.String):
                freq = df[col].n_unique()
                if (freq == 1) | (freq > 200):
                    df = df.drop(col)
        
        return df



class Aggregator:
    # Please add or subtract features yourself, be aware that too many features will take up too much space.
    def num_expr(df):
        cols = [col for col in df.columns if col[-1] in ("P", "A")]
        expr_max = [pl.max(col).alias(f"max_{col}") for col in cols]

        expr_last = [pl.last(col).alias(f"last_{col}") for col in cols]
        # expr_first = [pl.first(col).alias(f"first_{col}") for col in cols]
        expr_mean = [pl.mean(col).alias(f"mean_{col}") for col in cols]
        expr_median = [pl.median(col).alias(f"median_{col}") for col in cols]
        expr_var = [pl.var(col).alias(f"var_{col}") for col in cols]

        return expr_max  + expr_mean 

    def date_expr(df):
        cols = [col for col in df.columns if col[-1] in ("D")]
        expr_max = [pl.max(col).alias(f"max_{col}") for col in cols]
        # expr_min = [pl.min(col).alias(f"min_{col}") for col in cols]
        expr_last = [pl.last(col).alias(f"last_{col}") for col in cols]
        # expr_first = [pl.first(col).alias(f"first_{col}") for col in cols]
        expr_mean = [pl.mean(col).alias(f"mean_{col}") for col in cols]
        expr_median = [pl.median(col).alias(f"median_{col}") for col in cols]

        return expr_max  + expr_mean 

    def str_expr(df):
        cols = [col for col in df.columns if col[-1] in ("M",)]
        expr_max = [pl.max(col).alias(f"max_{col}") for col in cols]
        # expr_min = [pl.min(col).alias(f"min_{col}") for col in cols]
        expr_last = [pl.last(col).alias(f"last_{col}") for col in cols]
        # expr_first = [pl.first(col).alias(f"first_{col}") for col in cols]
        # expr_count = [pl.count(col).alias(f"count_{col}") for col in cols]
        expr_mean = [pl.mean(col).alias(f"mean_{col}") for col in cols]
        return expr_max  + expr_mean
    
    def other_expr(df):
        cols = [col for col in df.columns if col[-1] in ("T", "L")]
        expr_max = [pl.max(col).alias(f"max_{col}") for col in cols]
        # expr_min = [pl.min(col).alias(f"min_{col}") for col in cols]
        expr_last = [pl.last(col).alias(f"last_{col}") for col in cols]
        # expr_first = [pl.first(col).alias(f"first_{col}") for col in cols]
        expr_mean = [pl.mean(col).alias(f"mean_{col}") for col in cols]
        return expr_max  + expr_mean

    def count_expr(df):
        cols = [col for col in df.columns if "num_group" in col]
        expr_max = [pl.max(col).alias(f"max_{col}") for col in cols]
        # expr_min = [pl.min(col).alias(f"min_{col}") for col in cols]
        expr_last = [pl.last(col).alias(f"last_{col}") for col in cols]
        # expr_first = [pl.first(col).alias(f"first_{col}") for col in cols]
        expr_mean = [pl.mean(col).alias(f"mean_{col}") for col in cols]
        return expr_max + expr_mean

    def get_exprs(df):
        exprs = Aggregator.num_expr(df) + \
                Aggregator.date_expr(df) + \
                Aggregator.str_expr(df) + \
                Aggregator.other_expr(df) + \
                Aggregator.count_expr(df)

        return exprs


In [16]:
def read_file(path, depth=None):
    df = pl.read_parquet(path)
    df = df.pipe(Pipeline.set_table_dtypes)
    if depth in [1,2]:
        df = df.group_by("case_id").agg(Aggregator.get_exprs(df)) 
    return df


def read_files(regex_path, depth=None):
    chunks = []
    
    for path in glob(str(regex_path)):
        df = pl.read_parquet(path)
        df = df.pipe(Pipeline.set_table_dtypes)
        if depth in [1, 2]:
            df = df.group_by("case_id").agg(Aggregator.get_exprs(df))
        chunks.append(df)
    
    df = pl.concat(chunks, how="vertical_relaxed")
    df = df.unique(subset=["case_id"])
    return df

In [17]:
def feature_eng(df_base, depth_0, depth_1, depth_2):
    df_base = (
        df_base
        .with_columns(
            month_decision = pl.col("date_decision").dt.month(),
            weekday_decision = pl.col("date_decision").dt.weekday(),
        )
    )
    for i, df in enumerate(depth_0 + depth_1 + depth_2):
        df_base = df_base.join(df, how="left", on="case_id", suffix=f"_{i}")
    df_base = df_base.pipe(Pipeline.handle_dates)
    return df_base


def to_pandas(df_data, cat_cols=None):
    df_data = df_data.to_pandas()
    if cat_cols is None:
        cat_cols = list(df_data.select_dtypes("object").columns)
    df_data[cat_cols] = df_data[cat_cols].astype("category")
    return df_data, cat_cols

In [18]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        if str(col_type)=="category":
            continue
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            continue
    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [19]:
%%time

ROOT            = Path(ROOT)

TRAIN_DIR       = ROOT / "parquet_files" / "train"
TEST_DIR        = ROOT / "parquet_files" / "test"

data_store = {
    "df_base": read_file(TRAIN_DIR / "train_base.parquet"),
    "depth_0": [
        read_file(TRAIN_DIR / "train_static_cb_0.parquet"),
        read_files(TRAIN_DIR / "train_static_0_*.parquet"),
    ],
    "depth_1": [
        read_files(TRAIN_DIR / "train_applprev_1_*.parquet", 1),
        read_file(TRAIN_DIR / "train_tax_registry_a_1.parquet", 1),
        read_file(TRAIN_DIR / "train_tax_registry_b_1.parquet", 1),
        read_file(TRAIN_DIR / "train_tax_registry_c_1.parquet", 1),
        read_files(TRAIN_DIR / "train_credit_bureau_a_1_*.parquet", 1),
        read_file(TRAIN_DIR / "train_credit_bureau_b_1.parquet", 1),
        read_file(TRAIN_DIR / "train_other_1.parquet", 1),
        read_file(TRAIN_DIR / "train_person_1.parquet", 1),
        read_file(TRAIN_DIR / "train_deposit_1.parquet", 1),
        read_file(TRAIN_DIR / "train_debitcard_1.parquet", 1),
    ],
    "depth_2": [
        read_file(TRAIN_DIR / "train_credit_bureau_b_2.parquet", 2),
        read_files(TRAIN_DIR / "train_credit_bureau_a_2_*.parquet", 2),
    ]
}

CPU times: user 2min 4s, sys: 1min 50s, total: 3min 55s
Wall time: 1min 15s


In [20]:
%%time

df_train = feature_eng(**data_store)
print("train data shape:\t", df_train.shape)
del data_store
df_train = df_train.pipe(Pipeline.filter_cols)
print("train data shape:\t", df_train.shape)
gc.collect()

train data shape:	 (1526659, 720)
train data shape:	 (1526659, 491)
CPU times: user 13.1 s, sys: 8.5 s, total: 21.6 s
Wall time: 14.7 s


569

In [8]:
!pip install --upgrade polars



In [None]:
cnt_encoding_cols = df_train.select(pl.selectors.by_dtype([pl.String, pl.Boolean, pl.Categorical])).columns

mappings = {}
for col in cnt_encoding_cols:
    mappings[col] = df_train.group_by(col).len()

df_train_lazy = df_train.select(mappings.keys()).lazy()
# df_train_lazy = pl.LazyFrame(df_train.select('case_id'))

for col, mapping in mappings.items():
    remapping = {category: count for category, count in mapping.rows()}
    remapping[None] = -2
    expr = pl.col(col).replace(
                remapping,
                default=-1,
            )
    df_train_lazy = df_train_lazy.with_columns(expr.alias(col + '_cnt'))
    del col, mapping, remapping
    gc.collect()

del mappings
transformed_train = df_train_lazy.collect()

df_train = pl.concat([df_train, transformed_train.select("^*cnt$")], how='horizontal')
print("train data shape:\t", df_train.shape)

del transformed_train, cnt_encoding_cols

gc.collect()

0

In [10]:
df_train, cat_cols = to_pandas(df_train)
df_train = reduce_mem_usage(df_train)
print("train data shape:\t", df_train.shape)
nums=df_train.select_dtypes(exclude='category').columns
from itertools import combinations, permutations
#df_train=df_train[nums]
nans_df = df_train[nums].isna()
nans_groups={}
for col in nums:
    cur_group = nans_df[col].sum()
    try:
        nans_groups[cur_group].append(col)
    except:
        nans_groups[cur_group]=[col]
del nans_df; x=gc.collect()

def reduce_group(grps):
    use = []
    for g in grps:
        mx = 0; vx = g[0]
        for gg in g:
            n = df_train[gg].nunique()
            if n>mx:
                mx = n
                vx = gg
            #print(str(gg)+'-'+str(n),', ',end='')
        use.append(vx)
        #print()
    print('Use these',use)
    return use

def group_columns_by_correlation(matrix, threshold=0.8):
    # 计算列之间的相关性
    correlation_matrix = matrix.corr()

    # 分组列
    groups = []
    remaining_cols = list(matrix.columns)
    while remaining_cols:
        col = remaining_cols.pop(0)
        group = [col]
        correlated_cols = [col]
        for c in remaining_cols:
            if correlation_matrix.loc[col, c] >= threshold:
                group.append(c)
                correlated_cols.append(c)
        groups.append(group)
        remaining_cols = [c for c in remaining_cols if c not in correlated_cols]
    
    return groups

uses=[]
for k,v in nans_groups.items():
    if len(v)>1:
            Vs = nans_groups[k]
            #cross_features=list(combinations(Vs, 2))
            #make_corr(Vs)
            grps= group_columns_by_correlation(df_train[Vs], threshold=0.8)
            use=reduce_group(grps)
            uses=uses+use
            #make_corr(use)
    else:
        uses=uses+v
    print('####### NAN count =',k)
print(uses)
print(len(uses))
uses=uses+list(df_train.select_dtypes(include='category').columns)
print(len(uses))
df_train=df_train[uses]
# df_train.drop(['requesttype_4525192L_cnt','max_empl_employedtotal_800L_cnt', 'max_empl_industry_691L_cnt'], axis=1, inplace=True)

Memory usage of dataframe is 5809.23 MB
Memory usage after optimization is: 2137.36 MB
Decreased by 63.2%
train data shape:	 (1526659, 564)
Use these ['case_id', 'WEEK_NUM', 'target', 'month_decision', 'weekday_decision', 'credamount_770A', 'applicationcnt_361L', 'applications30d_658L', 'applicationscnt_1086L', 'applicationscnt_464L', 'applicationscnt_867L', 'clientscnt_1022L', 'clientscnt_100L', 'clientscnt_1071L', 'clientscnt_1130L', 'clientscnt_157L', 'clientscnt_257L', 'clientscnt_304L', 'clientscnt_360L', 'clientscnt_493L', 'clientscnt_533L', 'clientscnt_887L', 'clientscnt_946L', 'deferredmnthsnum_166L', 'disbursedcredamount_1113A', 'downpmt_116A', 'homephncnt_628L', 'isbidproduct_1095L', 'mobilephncnt_593L', 'numactivecreds_622L', 'numactivecredschannel_414L', 'numactiverelcontr_750L', 'numcontrs3months_479L', 'numnotactivated_1143L', 'numpmtchanneldd_318L', 'numrejects9m_859L', 'sellerplacecnt_915L', 'max_mainoccupationinc_384A', 'max_birth_259D', 'mean_persontype_1072L', 'descr

In [11]:
y = df_train["target"]
weeks = df_train["WEEK_NUM"]
df_train= df_train.drop(columns=["target", "case_id", "WEEK_NUM"])
cv = StratifiedGroupKFold(n_splits=5, shuffle=False)

In [12]:
params = {
    "boosting_type": "gbdt",
    
    "objective": "binary",
    "metric": "auc",
    "max_depth": 8,  
    "learning_rate": 0.01,
    "n_estimators": 10000,  
    "colsample_bytree": 0.8,
    "colsample_bynode": 0.8,
    "verbose": -1,
    "random_state": 42,
    "reg_alpha": 0.3,
    "reg_lambda": 8,
    "extra_trees":True,
    'num_leaves':32,
    "sample_weight":'balanced',
    "device": "gpu", 
    # "device": "gpu", 
    "verbose": -1,
}

fitted_models = []
cv_scores = []


for idx_train, idx_valid in cv.split(df_train, y, groups=weeks):#   Because it takes a long time to divide the data set, 
    X_train, y_train = df_train.iloc[idx_train], y.iloc[idx_train]# each time the data set is divided, two models are trained to each other twice, which saves time.
    X_valid, y_valid = df_train.iloc[idx_valid], y.iloc[idx_valid]
    model = lgb.LGBMClassifier(**params)
    model.fit(
        X_train, y_train,
        eval_set = [(X_valid, y_valid)],
        callbacks = [lgb.log_evaluation(200), lgb.early_stopping(100)] )
    fitted_models.append(model)
    y_pred_valid = model.predict_proba(X_valid)[:,1]
    auc_score = roc_auc_score(y_valid, y_pred_valid)
    cv_scores.append(auc_score)
    
print("CV AUC scores: ", cv_scores)
print("Maximum CV AUC score: ", max(cv_scores))

Training until validation scores don't improve for 100 rounds
[200]	valid_0's auc: 0.817627
[400]	valid_0's auc: 0.833834
[600]	valid_0's auc: 0.840857
[800]	valid_0's auc: 0.844994
[1000]	valid_0's auc: 0.847601
[1200]	valid_0's auc: 0.849445
[1400]	valid_0's auc: 0.850762
[1600]	valid_0's auc: 0.851817
[1800]	valid_0's auc: 0.852649
[2000]	valid_0's auc: 0.85335
[2200]	valid_0's auc: 0.853904
[2400]	valid_0's auc: 0.854428
[2600]	valid_0's auc: 0.854818
[2800]	valid_0's auc: 0.855165
[3000]	valid_0's auc: 0.85547
[3200]	valid_0's auc: 0.855783
[3400]	valid_0's auc: 0.856009
[3600]	valid_0's auc: 0.856214
[3800]	valid_0's auc: 0.856374
[4000]	valid_0's auc: 0.856529
[4200]	valid_0's auc: 0.856633
[4400]	valid_0's auc: 0.856781
[4600]	valid_0's auc: 0.856892
[4800]	valid_0's auc: 0.856958
[5000]	valid_0's auc: 0.857019
[5200]	valid_0's auc: 0.857114
[5400]	valid_0's auc: 0.85722
[5600]	valid_0's auc: 0.857285
[5800]	valid_0's auc: 0.857364
[6000]	valid_0's auc: 0.857435
[6200]	valid_0'

In [11]:
y = df_train["target"]
weeks = df_train["WEEK_NUM"]
df_train= df_train.drop(columns=["target", "case_id", "WEEK_NUM"])
df_train[cat_cols] = df_train[cat_cols].astype(str)

In [12]:
from catboost import CatBoostClassifier, Pool

params = {
    "eval_metric": "AUC",  
    # "depth": 10,  
    "learning_rate": 0.03,
    "iterations": 6000,  # 4000
    # "random_seed": 3107,  
    # "l2_leaf_reg": 10,  
    # "border_count": 254,  
    "verbose": 500,  
    "task_type": "GPU",
    "early_stopping_rounds": 100  # 设置早停机制
}

n_splits = 5
fitted_models = []
cv_scores = []

cv = StratifiedGroupKFold(n_splits=n_splits, shuffle=False)

step = 0
for idx_train, idx_valid in cv.split(df_train, y, groups=weeks):#   Because it takes a long time to divide the data set, 
    step += 1
    print(f'current step: {step}')
    
    X_train, y_train = df_train.iloc[idx_train], y.iloc[idx_train]# each time the data set is divided, two models are trained to each other twice, which saves time.
    X_valid, y_valid = df_train.iloc[idx_valid], y.iloc[idx_valid]

    train_pool = Pool(X_train, y_train,cat_features=cat_cols)
    val_pool = Pool(X_valid, y_valid,cat_features=cat_cols)

    model = CatBoostClassifier(**params)
    model.fit(train_pool, eval_set=val_pool, verbose=100, early_stopping_rounds=50)

    
    fitted_models.append(model)
    y_pred_valid = model.predict_proba(X_valid)[:,1]
    auc_score = roc_auc_score(y_valid, y_pred_valid)
    cv_scores.append(auc_score)
    
print("CV AUC scores: ", cv_scores)
print("AVG CV AUC score: ", np.mean(cv_scores))
print("Maximum CV AUC score: ", max(cv_scores))

current step: 1


Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.7103287	best: 0.7103287 (0)	total: 1.31s	remaining: 2h 11m 12s
100:	test: 0.8171400	best: 0.8171400 (100)	total: 50.3s	remaining: 48m 56s
200:	test: 0.8291014	best: 0.8291014 (200)	total: 1m 38s	remaining: 47m 13s
300:	test: 0.8354989	best: 0.8354989 (300)	total: 2m 25s	remaining: 45m 46s
400:	test: 0.8387885	best: 0.8387885 (400)	total: 3m 11s	remaining: 44m 37s
500:	test: 0.8410038	best: 0.8410038 (500)	total: 3m 58s	remaining: 43m 33s
600:	test: 0.8424864	best: 0.8424864 (600)	total: 4m 44s	remaining: 42m 32s
700:	test: 0.8436189	best: 0.8436189 (700)	total: 5m 30s	remaining: 41m 34s
800:	test: 0.8444410	best: 0.8444410 (800)	total: 6m 16s	remaining: 40m 42s
900:	test: 0.8451725	best: 0.8451725 (900)	total: 7m 2s	remaining: 39m 52s
1000:	test: 0.8457574	best: 0.8457574 (1000)	total: 7m 49s	remaining: 39m 2s
1100:	test: 0.8463077	best: 0.8463077 (1100)	total: 8m 34s	remaining: 38m 9s
1200:	test: 0.8467637	best: 0.8467637 (1200)	total: 9m 21s	remaining: 37m 23s
1300:	test: 

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.7172238	best: 0.7172238 (0)	total: 661ms	remaining: 1h 6m 5s
100:	test: 0.8171117	best: 0.8171117 (100)	total: 48.8s	remaining: 47m 32s
200:	test: 0.8300095	best: 0.8300095 (200)	total: 1m 36s	remaining: 46m 36s
300:	test: 0.8363650	best: 0.8363650 (300)	total: 2m 24s	remaining: 45m 42s
400:	test: 0.8398783	best: 0.8398783 (400)	total: 3m 12s	remaining: 44m 52s
500:	test: 0.8418818	best: 0.8418818 (500)	total: 4m	remaining: 43m 54s
600:	test: 0.8431682	best: 0.8431682 (600)	total: 4m 46s	remaining: 42m 58s
700:	test: 0.8443520	best: 0.8443520 (700)	total: 5m 34s	remaining: 42m 5s
800:	test: 0.8453304	best: 0.8453304 (800)	total: 6m 20s	remaining: 41m 12s
900:	test: 0.8460149	best: 0.8460149 (900)	total: 7m 7s	remaining: 40m 20s
1000:	test: 0.8466437	best: 0.8466437 (1000)	total: 7m 54s	remaining: 39m 27s
1100:	test: 0.8472021	best: 0.8472021 (1100)	total: 8m 40s	remaining: 38m 37s
1200:	test: 0.8476754	best: 0.8476754 (1200)	total: 9m 27s	remaining: 37m 46s
1300:	test: 0.848

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.7202511	best: 0.7202511 (0)	total: 755ms	remaining: 1h 15m 28s
100:	test: 0.8229858	best: 0.8229858 (100)	total: 48.6s	remaining: 47m 19s
200:	test: 0.8348711	best: 0.8348711 (200)	total: 1m 36s	remaining: 46m 19s
300:	test: 0.8408800	best: 0.8408800 (300)	total: 2m 23s	remaining: 45m 19s
400:	test: 0.8445008	best: 0.8445008 (400)	total: 3m 10s	remaining: 44m 25s
500:	test: 0.8466592	best: 0.8466592 (500)	total: 3m 57s	remaining: 43m 29s
600:	test: 0.8483769	best: 0.8483769 (600)	total: 5m 3s	remaining: 45m 29s
700:	test: 0.8495355	best: 0.8495355 (700)	total: 5m 48s	remaining: 43m 55s
800:	test: 0.8504559	best: 0.8504559 (800)	total: 6m 33s	remaining: 42m 37s
900:	test: 0.8511820	best: 0.8511820 (900)	total: 7m 19s	remaining: 41m 29s
1000:	test: 0.8518360	best: 0.8518360 (1000)	total: 8m 5s	remaining: 40m 25s
1100:	test: 0.8523176	best: 0.8523176 (1100)	total: 8m 50s	remaining: 39m 23s
1200:	test: 0.8527910	best: 0.8527910 (1200)	total: 9m 36s	remaining: 38m 24s
1300:	test:

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.7167139	best: 0.7167139 (0)	total: 649ms	remaining: 1h 4m 54s
100:	test: 0.8219712	best: 0.8219712 (100)	total: 47.8s	remaining: 46m 32s
200:	test: 0.8337694	best: 0.8337694 (200)	total: 1m 34s	remaining: 45m 40s
300:	test: 0.8403461	best: 0.8403461 (300)	total: 2m 21s	remaining: 44m 46s
400:	test: 0.8440646	best: 0.8440646 (400)	total: 3m 9s	remaining: 43m 58s
500:	test: 0.8460551	best: 0.8460551 (500)	total: 3m 55s	remaining: 43m
600:	test: 0.8474609	best: 0.8474609 (600)	total: 4m 41s	remaining: 42m 8s
700:	test: 0.8485481	best: 0.8485481 (700)	total: 5m 27s	remaining: 41m 14s
800:	test: 0.8494735	best: 0.8494735 (800)	total: 6m 12s	remaining: 40m 19s
900:	test: 0.8502097	best: 0.8502097 (900)	total: 6m 58s	remaining: 39m 28s
1000:	test: 0.8508018	best: 0.8508018 (1000)	total: 7m 43s	remaining: 38m 36s
1100:	test: 0.8512865	best: 0.8512865 (1100)	total: 8m 29s	remaining: 37m 47s
1200:	test: 0.8517420	best: 0.8517420 (1200)	total: 9m 14s	remaining: 36m 57s
1300:	test: 0.85

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.7115291	best: 0.7115291 (0)	total: 750ms	remaining: 1h 14m 56s
100:	test: 0.8168172	best: 0.8168172 (100)	total: 48s	remaining: 46m 41s
200:	test: 0.8290993	best: 0.8290993 (200)	total: 1m 35s	remaining: 45m 43s
300:	test: 0.8357348	best: 0.8357348 (300)	total: 2m 22s	remaining: 44m 56s
400:	test: 0.8390297	best: 0.8390297 (400)	total: 3m 9s	remaining: 43m 58s
500:	test: 0.8413104	best: 0.8413104 (500)	total: 3m 55s	remaining: 43m 7s
600:	test: 0.8429710	best: 0.8429710 (600)	total: 4m 48s	remaining: 43m 7s
700:	test: 0.8441104	best: 0.8441104 (700)	total: 5m 38s	remaining: 42m 42s
800:	test: 0.8451152	best: 0.8451152 (800)	total: 6m 26s	remaining: 41m 49s
900:	test: 0.8459381	best: 0.8459381 (900)	total: 7m 17s	remaining: 41m 18s
1000:	test: 0.8465739	best: 0.8465739 (1000)	total: 8m 8s	remaining: 40m 40s
1100:	test: 0.8470997	best: 0.8470997 (1100)	total: 8m 54s	remaining: 39m 37s
1200:	test: 0.8475794	best: 0.8475807 (1199)	total: 9m 39s	remaining: 38m 37s
1300:	test: 0.8