In [35]:
import warnings
import pandas as pd
import os
import sys
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import datetime
import math
import re
from pathlib import Path

from sklearn.model_selection import GroupKFold

In [2]:
from tqdm import tqdm

In [3]:
import lightgbm



In [4]:
target_col = ['x_0', 'y_0', 'z_0', 'x_1', 'y_1', 'z_1', 'x_2', 'y_2',  'z_2', 'x_3', 'y_3', 'z_3', 'x_4', 'y_4', 'z_4', 'x_5', 'y_5', 'z_5']

In [5]:
warnings.filterwarnings('ignore')
import random
import torch
def set_seed(seed=None, cudnn_deterministic=True):
    if seed is None:
        seed = 42

    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = cudnn_deterministic
    torch.backends.cudnn.benchmark = False
seed = 77
set_seed(seed)

In [6]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


In [7]:
def mae(gt: np.array, pred: np.array):
    abs_diff = np.abs(gt - pred)
    score = np.mean(abs_diff.reshape(-1, ))
    return score

In [8]:
ROOT_DIR = Path.cwd().parents[2]
DATA_DIR = ROOT_DIR / 'data'
gbdt_name = Path.cwd().stem

oof_exp_name = 'exp0001'

ORIGINAL_DATA_DIR = DATA_DIR / 'original_data/atmaCup#18_dataset'
train_df = pd.read_csv(ORIGINAL_DATA_DIR / 'train_features.csv')
test_df = pd.read_csv(ORIGINAL_DATA_DIR / 'test_features.csv')

OUTPUT_DIR = ROOT_DIR / 'outputs'

SAVE_DIR = OUTPUT_DIR / 'gbdt' / gbdt_name
SAVE_DIR.mkdir(exist_ok=True, parents=True)

In [9]:
def common_preprocess(target_df):
    # boolのcol
    bool_cols = ['brakePressed', 'gasPressed', 'leftBlinker', 'rightBlinker']
    print('bool_cols', bool_cols)
    target_df[bool_cols] = target_df[bool_cols].astype(int)

    target_df['scene'] = target_df['ID'].str.split('_').str[0]
    target_df['scene_sec'] = target_df['ID'].str.split('_').str[1].astype(int)

    count_df = target_df.groupby('scene').size()
    target_df['scene_count'] = target_df['scene'].map(count_df)
    return target_df

In [10]:
train_df = common_preprocess(train_df)
test_df = common_preprocess(test_df)

bool_cols ['brakePressed', 'gasPressed', 'leftBlinker', 'rightBlinker']
bool_cols ['brakePressed', 'gasPressed', 'leftBlinker', 'rightBlinker']


In [11]:
train_df.columns

Index(['ID', 'vEgo', 'aEgo', 'steeringAngleDeg', 'steeringTorque', 'brake',
       'brakePressed', 'gas', 'gasPressed', 'gearShifter', 'leftBlinker',
       'rightBlinker', 'x_0', 'y_0', 'z_0', 'x_1', 'y_1', 'z_1', 'x_2', 'y_2',
       'z_2', 'x_3', 'y_3', 'z_3', 'x_4', 'y_4', 'z_4', 'x_5', 'y_5', 'z_5',
       'scene', 'scene_sec', 'scene_count'],
      dtype='object')

In [12]:
import os, json

tl_dir = ORIGINAL_DATA_DIR / 'traffic_lights'

traffic_lights = []
id_class_list = []
for json_path in tqdm(tl_dir.glob('*.json')):
    id_ = json_path.stem
    traffic_light = json.load(open(json_path))

    traffic_lights.append(traffic_light)

    for traffic_light in traffic_light:
        id_class_list.append((id_.split('.')[0], traffic_light['class']))

counts = [len(traffic_light) for traffic_light in traffic_lights]

45098it [03:13, 233.37it/s]


In [13]:
traffic_lights_df = pd.DataFrame(id_class_list, columns=['ID', 'class'])
traffic_lights_df['class'].value_counts()

class
green       5879
red         4915
empty       1352
yellow       682
straight     647
left         576
right        306
other         60
Name: count, dtype: int64

In [14]:
traffic_lights_df

Unnamed: 0,ID,class
0,000fb056f97572d384bae4f5fc1e0f28_20,green
1,000fb056f97572d384bae4f5fc1e0f28_220,green
2,000fb056f97572d384bae4f5fc1e0f28_320,green
3,000fb056f97572d384bae4f5fc1e0f28_320,green
4,000fb056f97572d384bae4f5fc1e0f28_420,green
...,...,...
14412,ffe89d836ecae270d82b1903e6245149_120,red
14413,ffeff0e119ff5eb6885d834ea6a9667a_220,green
14414,ffeff0e119ff5eb6885d834ea6a9667a_220,green
14415,ffeff0e119ff5eb6885d834ea6a9667a_220,yellow


In [15]:
ids = [json_path.stem for json_path in tl_dir.glob('*.json')]

traffic_lights_df = pd.DataFrame({
    'ID': ids,
    'traffic_lights_counts': counts
})

train_df = pd.merge(train_df, traffic_lights_df, on='ID', how='left')
test_df = pd.merge(test_df, traffic_lights_df, on='ID', how='left')

In [16]:
train_df

Unnamed: 0,ID,vEgo,aEgo,steeringAngleDeg,steeringTorque,brake,brakePressed,gas,gasPressed,gearShifter,...,x_4,y_4,z_4,x_5,y_5,z_5,scene,scene_sec,scene_count,traffic_lights_counts
0,00066be8e20318869c38c66be466631a_320,5.701526e+00,1.538456e+00,-2.165777,-139.0,0.0,0,0.250,1,drive,...,17.574227,0.174289,0.406331,21.951269,0.199503,0.485079,00066be8e20318869c38c66be466631a,320,3,0
1,00066be8e20318869c38c66be466631a_420,1.117629e+01,2.798807e-01,-11.625697,-44.0,0.0,0,0.000,0,drive,...,26.316489,0.843124,0.065000,31.383814,1.425070,0.073083,00066be8e20318869c38c66be466631a,420,3,0
2,00066be8e20318869c38c66be466631a_520,1.047255e+01,2.310992e-01,-2.985105,-132.0,0.0,0,0.180,1,drive,...,25.677387,-0.576985,0.102859,30.460033,-0.841894,0.152889,00066be8e20318869c38c66be466631a,520,3,0
3,000fb056f97572d384bae4f5fc1e0f28_120,6.055565e+00,-1.177754e-01,7.632668,173.0,0.0,0,0.000,0,drive,...,15.703514,0.960717,0.043479,19.311182,1.374655,0.058754,000fb056f97572d384bae4f5fc1e0f28,120,6,0
4,000fb056f97572d384bae4f5fc1e0f28_20,3.316744e+00,1.276733e+00,-31.725477,-114.0,0.0,0,0.255,1,drive,...,11.619313,-0.554488,0.011393,14.657048,-0.778800,0.044243,000fb056f97572d384bae4f5fc1e0f28,20,6,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43366,fff88cca5f8a012427b96bdde66011e3_20,-0.000000e+00,-0.000000e+00,16.102747,-1.0,0.0,1,0.000,0,drive,...,0.034200,0.012502,-0.040583,0.043196,0.015644,-0.051309,fff88cca5f8a012427b96bdde66011e3,20,6,0
43367,fff88cca5f8a012427b96bdde66011e3_220,1.594968e+00,3.501294e-01,-5.791823,-97.0,0.0,0,0.000,0,drive,...,4.106655,-0.023314,-0.024449,4.708509,-0.027916,-0.026821,fff88cca5f8a012427b96bdde66011e3,220,6,0
43368,fff88cca5f8a012427b96bdde66011e3_320,-4.207162e-03,-3.783329e-02,-2.089301,0.0,0.0,1,0.000,0,drive,...,0.091362,-0.000041,-0.039632,0.106230,-0.001123,-0.047890,fff88cca5f8a012427b96bdde66011e3,320,6,0
43369,fff88cca5f8a012427b96bdde66011e3_420,-2.162400e-10,-1.943228e-09,-2.170106,1.0,0.0,1,0.000,0,drive,...,0.054023,-0.005685,-0.020612,0.067738,-0.008169,-0.026279,fff88cca5f8a012427b96bdde66011e3,420,6,0


In [19]:
oof_exp_names = ['exp0000']
oof_feat_cols = []

for oof_exp_name in oof_exp_names:
    _oof_feat_cols  = [f'{oof_exp_name}_{c}' for c in target_col]   

    path = OUTPUT_DIR / 'exp' / oof_exp_name / 'oof.csv'
    cnn_train_df = pd.read_csv(path, index_col=0)
    # sort
    cnn_train_df.sort_values(by='ID', inplace=True)
    pred_cols = [f'pred_{i}' for i in target_col]

    train_df[_oof_feat_cols] = cnn_train_df[pred_cols]
    print(_oof_feat_cols)
    print((train_df['ID'] == cnn_train_df['ID']).sum() / len(train_df))
    
    submit_path = OUTPUT_DIR / 'exp' / oof_exp_name / 'submission.csv'
    cnn_test_df = pd.read_csv(submit_path)

    test_df[_oof_feat_cols] = cnn_test_df[target_col]

    oof_feat_cols.extend(_oof_feat_cols)   

['exp0000_x_0', 'exp0000_y_0', 'exp0000_z_0', 'exp0000_x_1', 'exp0000_y_1', 'exp0000_z_1', 'exp0000_x_2', 'exp0000_y_2', 'exp0000_z_2', 'exp0000_x_3', 'exp0000_y_3', 'exp0000_z_3', 'exp0000_x_4', 'exp0000_y_4', 'exp0000_z_4', 'exp0000_x_5', 'exp0000_y_5', 'exp0000_z_5']
1.0


In [20]:
def make_shift_feature(target_df, use_feat_cols):
    shift_count = 1
    shift_range = list(range(-shift_count, shift_count+1))
    shift_range = [x for x in shift_range if x != 0]

    target_df['ori_idx'] = target_df.index

    target_df = target_df.sort_values(['scene', 'scene_sec']).reset_index(drop=True)

    shift_feat_cols = []
    for shift in shift_range:
        for col in use_feat_cols:
            shift_col = f'{col}_shift{shift}'
            target_df[shift_col] = target_df.groupby('scene')[col].shift(shift)
            shift_feat_cols.append(shift_col)

            diff_col = f'{col}_diff{shift}'
            target_df[diff_col] = target_df[col] - target_df[shift_col]
            shift_feat_cols.append(diff_col)

    target_df = target_df.sort_values('ori_idx').reset_index(drop=True)
    target_df = target_df.drop('ori_idx', axis=1)

    return target_df, shift_feat_cols

In [21]:
use_cols = ['vEgo', 'aEgo', 'steeringAngleDeg', 'steeringTorque', 'brake',
       'brakePressed', 'gas', 'gasPressed',  'leftBlinker',
       'rightBlinker']

train_df, shift_feat_cols = make_shift_feature(train_df, use_cols)
test_df, shift_feat_cols = make_shift_feature(test_df, use_cols)

In [22]:
check_columns = ['ID'] + [c for c in train_df.columns if re.search('^vEgo*', c)]
train_df[check_columns]

Unnamed: 0,ID,vEgo,vEgo_shift-1,vEgo_diff-1,vEgo_shift1,vEgo_diff1
0,00066be8e20318869c38c66be466631a_320,5.701526e+00,1.117629e+01,-5.474767e+00,,
1,00066be8e20318869c38c66be466631a_420,1.117629e+01,1.047255e+01,7.037439e-01,5.701526e+00,5.474767e+00
2,00066be8e20318869c38c66be466631a_520,1.047255e+01,,,1.117629e+01,-7.037439e-01
3,000fb056f97572d384bae4f5fc1e0f28_120,6.055565e+00,8.214018e+00,-2.158453e+00,3.316744e+00,2.738821e+00
4,000fb056f97572d384bae4f5fc1e0f28_20,3.316744e+00,6.055565e+00,-2.738821e+00,,
...,...,...,...,...,...,...
43366,fff88cca5f8a012427b96bdde66011e3_20,-0.000000e+00,-0.000000e+00,0.000000e+00,,
43367,fff88cca5f8a012427b96bdde66011e3_220,1.594968e+00,-4.207162e-03,1.599175e+00,-0.000000e+00,1.594968e+00
43368,fff88cca5f8a012427b96bdde66011e3_320,-4.207162e-03,-2.162400e-10,-4.207162e-03,1.594968e+00,-1.599175e+00
43369,fff88cca5f8a012427b96bdde66011e3_420,-2.162400e-10,-1.110231e-17,-2.162400e-10,-4.207162e-03,4.207162e-03


In [23]:
train_df.columns

Index(['ID', 'vEgo', 'aEgo', 'steeringAngleDeg', 'steeringTorque', 'brake',
       'brakePressed', 'gas', 'gasPressed', 'gearShifter', 'leftBlinker',
       'rightBlinker', 'x_0', 'y_0', 'z_0', 'x_1', 'y_1', 'z_1', 'x_2', 'y_2',
       'z_2', 'x_3', 'y_3', 'z_3', 'x_4', 'y_4', 'z_4', 'x_5', 'y_5', 'z_5',
       'scene', 'scene_sec', 'scene_count', 'traffic_lights_counts',
       'exp0000_x_0', 'exp0000_y_0', 'exp0000_z_0', 'exp0000_x_1',
       'exp0000_y_1', 'exp0000_z_1', 'exp0000_x_2', 'exp0000_y_2',
       'exp0000_z_2', 'exp0000_x_3', 'exp0000_y_3', 'exp0000_z_3',
       'exp0000_x_4', 'exp0000_y_4', 'exp0000_z_4', 'exp0000_x_5',
       'exp0000_y_5', 'exp0000_z_5', 'vEgo_shift-1', 'vEgo_diff-1',
       'aEgo_shift-1', 'aEgo_diff-1', 'steeringAngleDeg_shift-1',
       'steeringAngleDeg_diff-1', 'steeringTorque_shift-1',
       'steeringTorque_diff-1', 'brake_shift-1', 'brake_diff-1',
       'brakePressed_shift-1', 'brakePressed_diff-1', 'gas_shift-1',
       'gas_diff-1', 'gasPres

In [None]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from typing import List

class AbstractBaseBlock:
    """
    https://www.guruguru.science/competitions/16/discussions/95b7f8ec-a741-444f-933a-94c33b9e66be/
    """

    def __init__(self) -> None:
        pass

    def fit(self, input_df: pd.DataFrame, y=None) -> pd.DataFrame:
        # return self.transform(input_df)
        raise NotImplementedError()

    def transform(self, input_df: pd.DataFrame) -> pd.DataFrame:
        raise NotImplementedError()


def run_block(input_df: pd.DataFrame, blocks: List[AbstractBaseBlock], is_fit):
    output_df = pd.DataFrame()
    for block in blocks:
        name = block.__class__.__name__

        if is_fit:
            # print(f'fit: {name}')
            _df = block.fit(input_df)
        else:
            # print(f'transform: {name}')
            _df = block.transform(input_df)

        # print(f'concat: {name}')
        output_df = pd.concat([output_df, _df], axis=1)
    return output_df

class NumericBlock(AbstractBaseBlock):
    def __init__(self, col: str) -> None:
        super().__init__()
        self.col = col

    def fit(self, input_df):
        return self.transform(input_df)

    def transform(self, input_df):
        output_df = pd.DataFrame()
        output_df[self.col] = input_df[self.col].copy()
        return output_df

class LabelEncodingBlock(AbstractBaseBlock):
    def __init__(self, col: str) -> None:
        super().__init__()
        self.col = col
        self.encoder = LabelEncoder()

    def fit(self, input_df):
        # return self.transform(input_df)

        self.encoder.fit(input_df[self.col])
        return self.transform(input_df)

    def transform(self, input_df):
        output_df = pd.DataFrame()

        # output_df[self.col] = self.encoder.fit_transform(input_df[self.col])

        # self.encoder.fit(input_df[self.col])
        output_df[self.col] = self.encoder.transform(input_df[self.col])
        return output_df.add_suffix('@le')

class CountEncodingBlock(AbstractBaseBlock):
    def __init__(self, col: str) -> None:
        super().__init__()
        self.col = col

    def fit(self, input_df):
        self.val_count_dict = {}
        self.val_count = input_df[self.col].value_counts()
        return self.transform(input_df)

    def transform(self, input_df):
        output_df = pd.DataFrame()
        output_df[self.col] = input_df[self.col].map(self.val_count)
        return output_df.add_suffix('@ce')

In [25]:
# ======= train_df, test_df 共通の処理 =======

num_cols = ['vEgo', 'aEgo', 'steeringAngleDeg', 'steeringTorque', 'brake',
       'brakePressed', 'gas', 'gasPressed',  'leftBlinker',
       'rightBlinker']
num_cols += ['scene_sec']
num_cols += oof_feat_cols
num_cols += shift_feat_cols
num_cols += ['scene_count']

agg_num_cols = ['vEgo', 'aEgo', 'steeringAngleDeg', 'steeringTorque', 'brake', 'gas']

cat_label_cols = ['gearShifter']
cat_count_cols = []
cat_te_cols = []

train_num = len(train_df)
whole_df = pd.concat([train_df, test_df], axis=0, ignore_index=True)

blocks = [
    *[NumericBlock(col) for col in num_cols],
    *[LabelEncodingBlock(col) for col in cat_label_cols],
    *[CountEncodingBlock(col) for col in cat_count_cols],
    # *[AggBlock(col, target_cols=agg_num_cols,
    #            agg_cols=['mean', 'max', 'min', 'std']) for col in ['scene']],
]
whole_feat_df = run_block(whole_df, blocks, is_fit=True)


# ======= train_df, test_df 別々に処理 =======

train_df, test_df = whole_df.iloc[:train_num], whole_df.iloc[train_num:].drop(
    columns=target_col).reset_index(drop=True)
train_feat, test_feat = whole_feat_df.iloc[:train_num], whole_feat_df.iloc[train_num:].reset_index(
    drop=True)

blocks = [
    *[TargetEncodingBlock(col, target_col) for col in cat_te_cols]
]

_df = run_block(train_df, blocks, is_fit=True)
train_feat = pd.concat([train_feat, _df], axis=1)
_df = run_block(test_df, blocks, is_fit=False)
test_feat = pd.concat([test_feat, _df], axis=1)

print('use_col len', len(train_feat.columns))

use_col len 71


In [26]:
train_feat.columns

Index(['vEgo', 'aEgo', 'steeringAngleDeg', 'steeringTorque', 'brake',
       'brakePressed', 'gas', 'gasPressed', 'leftBlinker', 'rightBlinker',
       'scene_sec', 'exp0000_x_0', 'exp0000_y_0', 'exp0000_z_0', 'exp0000_x_1',
       'exp0000_y_1', 'exp0000_z_1', 'exp0000_x_2', 'exp0000_y_2',
       'exp0000_z_2', 'exp0000_x_3', 'exp0000_y_3', 'exp0000_z_3',
       'exp0000_x_4', 'exp0000_y_4', 'exp0000_z_4', 'exp0000_x_5',
       'exp0000_y_5', 'exp0000_z_5', 'vEgo_shift-1', 'vEgo_diff-1',
       'aEgo_shift-1', 'aEgo_diff-1', 'steeringAngleDeg_shift-1',
       'steeringAngleDeg_diff-1', 'steeringTorque_shift-1',
       'steeringTorque_diff-1', 'brake_shift-1', 'brake_diff-1',
       'brakePressed_shift-1', 'brakePressed_diff-1', 'gas_shift-1',
       'gas_diff-1', 'gasPressed_shift-1', 'gasPressed_diff-1',
       'leftBlinker_shift-1', 'leftBlinker_diff-1', 'rightBlinker_shift-1',
       'rightBlinker_diff-1', 'vEgo_shift1', 'vEgo_diff1', 'aEgo_shift1',
       'aEgo_diff1', 'steeringAn

In [27]:
n_folds = 3
def split_data(df):
    scene_ser = df['ID'].apply(lambda x: x.split('_')[0])

    group_kfold = GroupKFold(n_splits=n_folds)
    for ifold, (train_index, valid_index) in enumerate(group_kfold.split(df, groups=scene_ser)):
        df.loc[valid_index, 'fold'] = ifold
    return df

In [28]:
split_data(train_df)

Unnamed: 0,ID,vEgo,aEgo,steeringAngleDeg,steeringTorque,brake,brakePressed,gas,gasPressed,gearShifter,...,brakePressed_diff1,gas_shift1,gas_diff1,gasPressed_shift1,gasPressed_diff1,leftBlinker_shift1,leftBlinker_diff1,rightBlinker_shift1,rightBlinker_diff1,fold
0,00066be8e20318869c38c66be466631a_320,5.701526e+00,1.538456e+00,-2.165777,-139.0,0.0,0,0.250,1,drive,...,,,,,,,,,,2.0
1,00066be8e20318869c38c66be466631a_420,1.117629e+01,2.798807e-01,-11.625697,-44.0,0.0,0,0.000,0,drive,...,0.0,0.250,-0.250,1.0,-1.0,0.0,0.0,0.0,1.0,2.0
2,00066be8e20318869c38c66be466631a_520,1.047255e+01,2.310992e-01,-2.985105,-132.0,0.0,0,0.180,1,drive,...,0.0,0.000,0.180,0.0,1.0,0.0,0.0,1.0,-1.0,2.0
3,000fb056f97572d384bae4f5fc1e0f28_120,6.055565e+00,-1.177754e-01,7.632668,173.0,0.0,0,0.000,0,drive,...,0.0,0.255,-0.255,1.0,-1.0,0.0,0.0,0.0,0.0,2.0
4,000fb056f97572d384bae4f5fc1e0f28_20,3.316744e+00,1.276733e+00,-31.725477,-114.0,0.0,0,0.255,1,drive,...,,,,,,,,,,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43366,fff88cca5f8a012427b96bdde66011e3_20,-0.000000e+00,-0.000000e+00,16.102747,-1.0,0.0,1,0.000,0,drive,...,,,,,,,,,,0.0
43367,fff88cca5f8a012427b96bdde66011e3_220,1.594968e+00,3.501294e-01,-5.791823,-97.0,0.0,0,0.000,0,drive,...,-1.0,0.000,0.000,0.0,0.0,0.0,0.0,1.0,0.0,0.0
43368,fff88cca5f8a012427b96bdde66011e3_320,-4.207162e-03,-3.783329e-02,-2.089301,0.0,0.0,1,0.000,0,drive,...,1.0,0.000,0.000,0.0,0.0,0.0,0.0,1.0,0.0,0.0
43369,fff88cca5f8a012427b96bdde66011e3_420,-2.162400e-10,-1.943228e-09,-2.170106,1.0,0.0,1,0.000,0,drive,...,0.0,0.000,0.000,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [29]:
y = train_df[target_col]
folds = train_df['fold']

In [30]:
train_feat.shape, test_feat.shape

((43371, 71), (1727, 71))

In [31]:
import lightgbm as lgb

class LightGBM:

    def __init__(self, lgb_params, save_dir=None, imp_dir=None, categorical_feature=None,
                 model_name='lgb',
                 stopping_rounds=50) -> None:
        self.save_dir = save_dir
        self.imp_dir = imp_dir
        self.lgb_params = lgb_params
        self.categorical_feature = categorical_feature

        # saveの切り替え用
        self.model_name = model_name

        self.stopping_rounds = stopping_rounds

    def fit(self, x_train, y_train, **fit_params) -> None:

        X_val, y_val = fit_params['eval_set'][0]
        del fit_params['eval_set']

        train_dataset = lgb.Dataset(
            x_train, y_train, categorical_feature=self.categorical_feature)

        val_dataset = lgb.Dataset(
            X_val, y_val, categorical_feature=self.categorical_feature)

        self.model = lgb.train(params=self.lgb_params,
                               train_set=train_dataset,
                               valid_sets=[train_dataset, val_dataset],
                               callbacks=[lgb.early_stopping(stopping_rounds=self.stopping_rounds,
                                                             verbose=True),
                                          lgb.log_evaluation(500)],
                               **fit_params
                               )

    def save(self, fold):
        save_to = f'{self.save_dir}lgb_fold_{fold}_{self.model_name}.txt'
        self.model.save_model(save_to)

    def predict(self, x):
        return self.model.predict(x)

    def predict_proba(self, x):
        return self.model.predict_proba(x)

def get_model(model_name):
    lgb_params = {
        'objective': 'regression',
        'boosting_type': 'gbdt',
        'verbose': -1,
        'n_jobs': 8,
        'seed': seed,
        'learning_rate': 0.01,
        # 'num_class': CFG.num_class, # multiclassなら必要
        'metric': 'mae',
        'num_leaves': 64,
        'max_depth': 5,
        'bagging_seed': seed,
        'feature_fraction_seed': seed,
        'drop_seed': seed,
    }
    model = LightGBM(
                lgb_params=lgb_params,
                imp_dir=OUTPUT_DIR,
                save_dir=OUTPUT_DIR,
                model_name=model_name
    )

    return model

def get_fit_params(model_name):
    params = {
        'num_boost_round': 100000
    }

    return params

In [32]:
TARGET_COL = [
 'x_0',
 'y_0',
 'z_0',
 'x_1',
 'y_1',
 'z_1',
 'x_2',
 'y_2',
 'z_2',
 'x_3',
 'y_3',
 'z_3',
 'x_4',
 'y_4',
 'z_4',
 'x_5',
 'y_5',
 'z_5'
]
eval_func = eval('mae')

oof_predictions = np.zeros((train_feat.shape[0], len(TARGET_COL)))
test_predictions = np.zeros((test_feat.shape[0], len(TARGET_COL)))

for target_idx in range(len(TARGET_COL)):
    print(f'target {target_idx}')

    for fold in tqdm(range(n_folds)):
        print(f'Training fold {fold + 1}')
        target_col = TARGET_COL[target_idx]

        model_name = f'lgb_{target_col}'
        model = get_model(model_name)
        fit_params = get_fit_params(model_name)

        trn_ind = folds != fold
        val_ind = folds == fold

        x_train, x_val = train_feat.loc[trn_ind], train_feat.loc[val_ind]
        y_train, y_val = y.loc[trn_ind, target_col], y.loc[val_ind, target_col]
        eval_set = [(x_val, y_val)]

        fit_params_fold = fit_params.copy()
        fit_params_fold['eval_set'] = eval_set

        model.fit(x_train, y_train, **fit_params_fold)

        if hasattr(model, 'save'):
            model.save(fold)
        if hasattr(model, 'plot_importance'):
            model.plot_importance(fold)

        oof_predictions[val_ind, target_idx] = model.predict(x_val)

        test_predictions[:, target_idx] += model.predict(test_feat)

target 0



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.0645992	valid_1's l1: 0.0670017



 33% 1/3 [00:02<00:04,  2.13s/it]

Early stopping, best iteration is:
[801]	training's l1: 0.0594191	valid_1's l1: 0.0635375
Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.0646191	valid_1's l1: 0.0672477



 67% 2/3 [00:04<00:02,  2.12s/it]

Early stopping, best iteration is:
[791]	training's l1: 0.0596658	valid_1's l1: 0.0632534
Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.064756	valid_1's l1: 0.0674704


100% 3/3 [00:06<00:00,  2.08s/it]


Early stopping, best iteration is:
[738]	training's l1: 0.0598765	valid_1's l1: 0.0641148
target 1



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.0309196	valid_1's l1: 0.0339182
[1000]	training's l1: 0.0298136	valid_1's l1: 0.0336938
[1500]	training's l1: 0.0289918	valid_1's l1: 0.0336005
[2000]	training's l1: 0.0283583	valid_1's l1: 0.0335428
Early stopping, best iteration is:
[1974]	training's l1: 0.0283857	valid_1's l1: 0.0335386



 33% 1/3 [00:03<00:06,  3.19s/it]

Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.0321123	valid_1's l1: 0.0337486



 67% 2/3 [00:04<00:02,  2.00s/it]

Early stopping, best iteration is:
[618]	training's l1: 0.0318061	valid_1's l1: 0.0336761
Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.0325428	valid_1's l1: 0.0329109


100% 3/3 [00:05<00:00,  1.94s/it]


Early stopping, best iteration is:
[857]	training's l1: 0.0317793	valid_1's l1: 0.0328078
target 2



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds



 33% 1/3 [00:00<00:01,  1.59it/s]

Early stopping, best iteration is:
[237]	training's l1: 0.0250215	valid_1's l1: 0.0259226
Training fold 2
Training until validation scores don't improve for 50 rounds



 67% 2/3 [00:01<00:00,  1.18it/s]

[500]	training's l1: 0.0248779	valid_1's l1: 0.0254374
Early stopping, best iteration is:
[483]	training's l1: 0.0248996	valid_1's l1: 0.0254348
Training fold 3
Training until validation scores don't improve for 50 rounds


100% 3/3 [00:02<00:00,  1.18it/s]


Early stopping, best iteration is:
[423]	training's l1: 0.024848	valid_1's l1: 0.0256177
target 3



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.143343	valid_1's l1: 0.15025



 33% 1/3 [00:02<00:04,  2.34s/it]

[1000]	training's l1: 0.130114	valid_1's l1: 0.141683
Early stopping, best iteration is:
[1000]	training's l1: 0.130114	valid_1's l1: 0.141683
Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.142539	valid_1's l1: 0.150584
[1000]	training's l1: 0.129148	valid_1's l1: 0.140673



 67% 2/3 [00:04<00:02,  2.49s/it]

Early stopping, best iteration is:
[1169]	training's l1: 0.127889	valid_1's l1: 0.14062
Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.14295	valid_1's l1: 0.147172


100% 3/3 [00:06<00:00,  2.31s/it]


Early stopping, best iteration is:
[722]	training's l1: 0.132042	valid_1's l1: 0.14117
target 4



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.0705332	valid_1's l1: 0.0775491
[1000]	training's l1: 0.0673444	valid_1's l1: 0.0763859
[1500]	training's l1: 0.0650165	valid_1's l1: 0.0755629
[2000]	training's l1: 0.0629341	valid_1's l1: 0.0749054
[2500]	training's l1: 0.0611034	valid_1's l1: 0.0745079
[3000]	training's l1: 0.0595403	valid_1's l1: 0.0743357
Early stopping, best iteration is:
[3066]	training's l1: 0.0593388	valid_1's l1: 0.0743079



 33% 1/3 [00:04<00:09,  4.68s/it]

Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.0717579	valid_1's l1: 0.076408
[1000]	training's l1: 0.0688367	valid_1's l1: 0.0756173
[1500]	training's l1: 0.06632	valid_1's l1: 0.0749826
[2000]	training's l1: 0.0641027	valid_1's l1: 0.0743787
[2500]	training's l1: 0.0624637	valid_1's l1: 0.0741921
Early stopping, best iteration is:
[2776]	training's l1: 0.0615732	valid_1's l1: 0.0741225



 67% 2/3 [00:08<00:04,  4.39s/it]

Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.0730133	valid_1's l1: 0.0748126
[1000]	training's l1: 0.0700257	valid_1's l1: 0.0740291
[1500]	training's l1: 0.0675994	valid_1's l1: 0.0734507
[2000]	training's l1: 0.0653988	valid_1's l1: 0.0729641
Early stopping, best iteration is:
[2419]	training's l1: 0.0638888	valid_1's l1: 0.0727468


100% 3/3 [00:12<00:00,  4.18s/it]


target 5



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds



 33% 1/3 [00:00<00:01,  1.18it/s]

Early stopping, best iteration is:
[374]	training's l1: 0.0511764	valid_1's l1: 0.0535326
Training fold 2
Training until validation scores don't improve for 50 rounds



 67% 2/3 [00:01<00:00,  1.02it/s]

[500]	training's l1: 0.0512245	valid_1's l1: 0.0525484
Early stopping, best iteration is:
[515]	training's l1: 0.0511837	valid_1's l1: 0.0525427
Training fold 3
Training until validation scores don't improve for 50 rounds


100% 3/3 [00:02<00:00,  1.01it/s]


[500]	training's l1: 0.051061	valid_1's l1: 0.052886
Early stopping, best iteration is:
[522]	training's l1: 0.051002	valid_1's l1: 0.0528822
target 6



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.245592	valid_1's l1: 0.259589
[1000]	training's l1: 0.222295	valid_1's l1: 0.245409



 33% 1/3 [00:02<00:04,  2.45s/it]

Early stopping, best iteration is:
[1121]	training's l1: 0.220228	valid_1's l1: 0.245244
Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.245657	valid_1's l1: 0.259347
[1000]	training's l1: 0.222588	valid_1's l1: 0.24186
[1500]	training's l1: 0.215669	valid_1's l1: 0.241418
[2000]	training's l1: 0.209396	valid_1's l1: 0.241081
Early stopping, best iteration is:
[2147]	training's l1: 0.207552	valid_1's l1: 0.240949



 67% 2/3 [00:06<00:03,  3.39s/it]

Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.245336	valid_1's l1: 0.251966
[1000]	training's l1: 0.222529	valid_1's l1: 0.243034


100% 3/3 [00:09<00:00,  3.09s/it]


Early stopping, best iteration is:
[1286]	training's l1: 0.218494	valid_1's l1: 0.24271
target 7



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.124423	valid_1's l1: 0.137144
[1000]	training's l1: 0.117888	valid_1's l1: 0.134555
[1500]	training's l1: 0.113035	valid_1's l1: 0.132836
[2000]	training's l1: 0.109123	valid_1's l1: 0.131626
[2500]	training's l1: 0.105511	valid_1's l1: 0.130527
[3000]	training's l1: 0.10242	valid_1's l1: 0.129982
[3500]	training's l1: 0.0996879	valid_1's l1: 0.129637
[4000]	training's l1: 0.0972968	valid_1's l1: 0.12948
Early stopping, best iteration is:
[3967]	training's l1: 0.097439	valid_1's l1: 0.129466



 33% 1/3 [00:05<00:11,  5.92s/it]

Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.124849	valid_1's l1: 0.133658
[1000]	training's l1: 0.118759	valid_1's l1: 0.131323
[1500]	training's l1: 0.114165	valid_1's l1: 0.130129
[2000]	training's l1: 0.110142	valid_1's l1: 0.129157
[2500]	training's l1: 0.106769	valid_1's l1: 0.128528
[3000]	training's l1: 0.103934	valid_1's l1: 0.128063
Early stopping, best iteration is:
[3244]	training's l1: 0.10271	valid_1's l1: 0.127931



 67% 2/3 [00:10<00:05,  5.23s/it]

Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.126457	valid_1's l1: 0.131324
[1000]	training's l1: 0.120661	valid_1's l1: 0.129685
[1500]	training's l1: 0.115035	valid_1's l1: 0.127593
[2000]	training's l1: 0.110916	valid_1's l1: 0.126557
[2500]	training's l1: 0.107658	valid_1's l1: 0.126079
Early stopping, best iteration is:
[2713]	training's l1: 0.106284	valid_1's l1: 0.125941


100% 3/3 [00:14<00:00,  4.92s/it]


target 8



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds



 33% 1/3 [00:00<00:01,  1.31it/s]

Early stopping, best iteration is:
[316]	training's l1: 0.078414	valid_1's l1: 0.0821477
Training fold 2
Training until validation scores don't improve for 50 rounds



 67% 2/3 [00:01<00:00,  1.13it/s]

[500]	training's l1: 0.0783913	valid_1's l1: 0.0801255
Early stopping, best iteration is:
[471]	training's l1: 0.0785175	valid_1's l1: 0.0801235
Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.078002	valid_1's l1: 0.0807476
Early stopping, best iteration is:
[575]	training's l1: 0.0776986	valid_1's l1: 0.0807425


100% 3/3 [00:02<00:00,  1.02it/s]


target 9



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.381741	valid_1's l1: 0.404841
[1000]	training's l1: 0.346197	valid_1's l1: 0.383192
[1500]	training's l1: 0.333931	valid_1's l1: 0.38173
Early stopping, best iteration is:
[1927]	training's l1: 0.324925	valid_1's l1: 0.381335



 33% 1/3 [00:03<00:07,  3.63s/it]

Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.381236	valid_1's l1: 0.406003
[1000]	training's l1: 0.34526	valid_1's l1: 0.3766
[1500]	training's l1: 0.332832	valid_1's l1: 0.374757
[2000]	training's l1: 0.32274	valid_1's l1: 0.373647
[2500]	training's l1: 0.313412	valid_1's l1: 0.373028
Early stopping, best iteration is:
[2853]	training's l1: 0.30721	valid_1's l1: 0.372571



 67% 2/3 [00:08<00:04,  4.52s/it]

Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.381345	valid_1's l1: 0.390585
[1000]	training's l1: 0.346296	valid_1's l1: 0.376681


100% 3/3 [00:11<00:00,  3.75s/it]


Early stopping, best iteration is:
[1126]	training's l1: 0.343016	valid_1's l1: 0.376437
target 10



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.197884	valid_1's l1: 0.218774
[1000]	training's l1: 0.188069	valid_1's l1: 0.215583
[1500]	training's l1: 0.180283	valid_1's l1: 0.213006
[2000]	training's l1: 0.173389	valid_1's l1: 0.210474
[2500]	training's l1: 0.167555	valid_1's l1: 0.208727
[3000]	training's l1: 0.162638	valid_1's l1: 0.207719
[3500]	training's l1: 0.158266	valid_1's l1: 0.207287
[4000]	training's l1: 0.154207	valid_1's l1: 0.206886
Early stopping, best iteration is:
[4038]	training's l1: 0.15391	valid_1's l1: 0.206856



 33% 1/3 [00:06<00:12,  6.15s/it]

Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.199665	valid_1's l1: 0.213289
[1000]	training's l1: 0.189306	valid_1's l1: 0.209858
[1500]	training's l1: 0.180983	valid_1's l1: 0.207522
[2000]	training's l1: 0.174764	valid_1's l1: 0.206032
[2500]	training's l1: 0.169972	valid_1's l1: 0.205516
[3000]	training's l1: 0.165203	valid_1's l1: 0.204818
[3500]	training's l1: 0.160703	valid_1's l1: 0.204182
Early stopping, best iteration is:
[3766]	training's l1: 0.15845	valid_1's l1: 0.203956



 67% 2/3 [00:11<00:05,  5.73s/it]

Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.199788	valid_1's l1: 0.209894
[1000]	training's l1: 0.190183	valid_1's l1: 0.20749
[1500]	training's l1: 0.181615	valid_1's l1: 0.205018
[2000]	training's l1: 0.174963	valid_1's l1: 0.203313
[2500]	training's l1: 0.169201	valid_1's l1: 0.202291
[3000]	training's l1: 0.164318	valid_1's l1: 0.201708
Early stopping, best iteration is:
[3377]	training's l1: 0.160883	valid_1's l1: 0.201313


100% 3/3 [00:16<00:00,  5.55s/it]


target 11



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds



 33% 1/3 [00:00<00:01,  1.19it/s]

Early stopping, best iteration is:
[356]	training's l1: 0.105939	valid_1's l1: 0.111798
Training fold 2
Training until validation scores don't improve for 50 rounds



 67% 2/3 [00:01<00:00,  1.09it/s]

Early stopping, best iteration is:
[439]	training's l1: 0.10675	valid_1's l1: 0.108599
Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.105822	valid_1's l1: 0.10963
Early stopping, best iteration is:
[560]	training's l1: 0.105492	valid_1's l1: 0.109617


100% 3/3 [00:02<00:00,  1.00it/s]


target 12



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.551831	valid_1's l1: 0.582653
[1000]	training's l1: 0.496664	valid_1's l1: 0.549386
[1500]	training's l1: 0.476712	valid_1's l1: 0.545793
[2000]	training's l1: 0.460495	valid_1's l1: 0.543891
[2500]	training's l1: 0.446892	valid_1's l1: 0.542255
Early stopping, best iteration is:
[2587]	training's l1: 0.444642	valid_1's l1: 0.541995



 33% 1/3 [00:04<00:09,  4.70s/it]

Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.548479	valid_1's l1: 0.585588
[1000]	training's l1: 0.496669	valid_1's l1: 0.542716
[1500]	training's l1: 0.477099	valid_1's l1: 0.53803
[2000]	training's l1: 0.461164	valid_1's l1: 0.535733
Early stopping, best iteration is:
[2255]	training's l1: 0.453834	valid_1's l1: 0.535156



 67% 2/3 [00:08<00:04,  4.44s/it]

Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.549612	valid_1's l1: 0.564398
[1000]	training's l1: 0.498779	valid_1's l1: 0.541513
[1500]	training's l1: 0.480607	valid_1's l1: 0.538256
[2000]	training's l1: 0.46535	valid_1's l1: 0.535582
[2500]	training's l1: 0.450791	valid_1's l1: 0.53407
Early stopping, best iteration is:
[2709]	training's l1: 0.445406	valid_1's l1: 0.533675


100% 3/3 [00:13<00:00,  4.62s/it]


target 13



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.295293	valid_1's l1: 0.325911
[1000]	training's l1: 0.281696	valid_1's l1: 0.322341
[1500]	training's l1: 0.269945	valid_1's l1: 0.318691
[2000]	training's l1: 0.259435	valid_1's l1: 0.315327
[2500]	training's l1: 0.250878	valid_1's l1: 0.313509
[3000]	training's l1: 0.243	valid_1's l1: 0.311784
[3500]	training's l1: 0.235863	valid_1's l1: 0.310814
Early stopping, best iteration is:
[3685]	training's l1: 0.233558	valid_1's l1: 0.310659



 33% 1/3 [00:05<00:11,  5.57s/it]

Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.297682	valid_1's l1: 0.315903
[1000]	training's l1: 0.283382	valid_1's l1: 0.311989
[1500]	training's l1: 0.271435	valid_1's l1: 0.309038
[2000]	training's l1: 0.261813	valid_1's l1: 0.307188
[2500]	training's l1: 0.253223	valid_1's l1: 0.305886
[3000]	training's l1: 0.245457	valid_1's l1: 0.305198
Early stopping, best iteration is:
[2979]	training's l1: 0.245719	valid_1's l1: 0.305142



 67% 2/3 [00:10<00:05,  5.09s/it]

Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.298117	valid_1's l1: 0.316768
[1000]	training's l1: 0.283676	valid_1's l1: 0.313249
[1500]	training's l1: 0.270755	valid_1's l1: 0.310202
[2000]	training's l1: 0.260313	valid_1's l1: 0.308172
[2500]	training's l1: 0.251633	valid_1's l1: 0.306737
[3000]	training's l1: 0.243432	valid_1's l1: 0.305753
Early stopping, best iteration is:
[3239]	training's l1: 0.239827	valid_1's l1: 0.305352


100% 3/3 [00:15<00:00,  5.08s/it]


target 14



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds



 33% 1/3 [00:01<00:02,  1.08s/it]

[500]	training's l1: 0.133291	valid_1's l1: 0.142399
Early stopping, best iteration is:
[481]	training's l1: 0.133439	valid_1's l1: 0.142384
Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.135189	valid_1's l1: 0.138135



 67% 2/3 [00:02<00:01,  1.23s/it]

Early stopping, best iteration is:
[669]	training's l1: 0.133957	valid_1's l1: 0.138083
Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.134426	valid_1's l1: 0.139288


100% 3/3 [00:03<00:00,  1.22s/it]


Early stopping, best iteration is:
[615]	training's l1: 0.133551	valid_1's l1: 0.139245
target 15



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.746361	valid_1's l1: 0.784917
[1000]	training's l1: 0.669906	valid_1's l1: 0.73614
[1500]	training's l1: 0.640429	valid_1's l1: 0.729609
[2000]	training's l1: 0.617809	valid_1's l1: 0.725408
[2500]	training's l1: 0.599472	valid_1's l1: 0.724007
Early stopping, best iteration is:
[2848]	training's l1: 0.587836	valid_1's l1: 0.722999



 33% 1/3 [00:05<00:10,  5.13s/it]

Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.742819	valid_1's l1: 0.79546
[1000]	training's l1: 0.668633	valid_1's l1: 0.733448
[1500]	training's l1: 0.641191	valid_1's l1: 0.724414
[2000]	training's l1: 0.619223	valid_1's l1: 0.720248
[2500]	training's l1: 0.60098	valid_1's l1: 0.71817
[3000]	training's l1: 0.584505	valid_1's l1: 0.717024
[3500]	training's l1: 0.569409	valid_1's l1: 0.716156
[4000]	training's l1: 0.553849	valid_1's l1: 0.715228
Early stopping, best iteration is:
[4429]	training's l1: 0.542278	valid_1's l1: 0.714704



 67% 2/3 [00:12<00:06,  6.55s/it]

Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.741896	valid_1's l1: 0.767162
[1000]	training's l1: 0.669703	valid_1's l1: 0.730863
[1500]	training's l1: 0.642893	valid_1's l1: 0.723892
[2000]	training's l1: 0.62155	valid_1's l1: 0.720009
[2500]	training's l1: 0.602907	valid_1's l1: 0.717821
Early stopping, best iteration is:
[2551]	training's l1: 0.601251	valid_1's l1: 0.717722


100% 3/3 [00:17<00:00,  5.75s/it]


target 16



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.418875	valid_1's l1: 0.460994
[1000]	training's l1: 0.398167	valid_1's l1: 0.455022
[1500]	training's l1: 0.382476	valid_1's l1: 0.450637
[2000]	training's l1: 0.368262	valid_1's l1: 0.447168
[2500]	training's l1: 0.356363	valid_1's l1: 0.445448
[3000]	training's l1: 0.345157	valid_1's l1: 0.443752
[3500]	training's l1: 0.335333	valid_1's l1: 0.442938
[4000]	training's l1: 0.326235	valid_1's l1: 0.441962
Early stopping, best iteration is:
[4305]	training's l1: 0.32058	valid_1's l1: 0.441377



 33% 1/3 [00:06<00:13,  6.50s/it]

Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.421421	valid_1's l1: 0.446171
[1000]	training's l1: 0.400222	valid_1's l1: 0.439652
[1500]	training's l1: 0.382899	valid_1's l1: 0.435849
[2000]	training's l1: 0.368962	valid_1's l1: 0.434054
Early stopping, best iteration is:
[2390]	training's l1: 0.359605	valid_1's l1: 0.433009



 67% 2/3 [00:10<00:04,  4.83s/it]

Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.420625	valid_1's l1: 0.450852
[1000]	training's l1: 0.399386	valid_1's l1: 0.445919
[1500]	training's l1: 0.381193	valid_1's l1: 0.441428
[2000]	training's l1: 0.366827	valid_1's l1: 0.439159
[2500]	training's l1: 0.353465	valid_1's l1: 0.437137
[3000]	training's l1: 0.342165	valid_1's l1: 0.435833
Early stopping, best iteration is:
[3045]	training's l1: 0.341261	valid_1's l1: 0.435755


100% 3/3 [00:14<00:00,  4.93s/it]


target 17



  0% 0/3 [00:00<?, ?it/s]

Training fold 1
Training until validation scores don't improve for 50 rounds



 33% 1/3 [00:01<00:02,  1.14s/it]

[500]	training's l1: 0.162588	valid_1's l1: 0.174278
Early stopping, best iteration is:
[533]	training's l1: 0.162287	valid_1's l1: 0.174272
Training fold 2
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.165102	valid_1's l1: 0.169074
Early stopping, best iteration is:
[566]	training's l1: 0.164446	valid_1's l1: 0.169004



 67% 2/3 [00:02<00:01,  1.15s/it]

Training fold 3
Training until validation scores don't improve for 50 rounds
[500]	training's l1: 0.164115	valid_1's l1: 0.169917


100% 3/3 [00:03<00:00,  1.27s/it]

Early stopping, best iteration is:
[791]	training's l1: 0.161512	valid_1's l1: 0.169757





In [33]:
def get_result(result_df):

    pred_cols = [f'pred_{i}' for i in range(len(TARGET_COL))]

    preds = result_df[pred_cols].values
    labels = result_df[TARGET_COL].values

    eval_func = eval('mae')
    best_score = eval_func(labels, preds)

    print(f'best_score: {best_score:<.4f}')
    return best_score

In [34]:
score = eval_func(y.values, oof_predictions)
print(f'oof result {score}')

# model.plot_importance_all(n_fold=CFG.n_fold)

pred_cols = [f'pred_{i}' for i in range(len(TARGET_COL))]

oof = train_df.copy()
oof[pred_cols] = oof_predictions
oof[TARGET_COL] = y

oof_feat = train_feat.copy()
oof_feat[pred_cols] = oof_predictions
oof_feat[TARGET_COL] = y

get_result(oof)

# save
oof.to_csv(SAVE_DIR / 'oof_gbdt.csv', index=False)
oof_feat.to_csv(SAVE_DIR / 'oof_feat_gbdt.csv', index=False)

test_predictions /= n_folds

test_feat[TARGET_COL] = test_predictions
test_feat.to_csv(SAVE_DIR / 'submission_oof.csv', index=False)
test_feat[TARGET_COL].to_csv(SAVE_DIR / f'submission_gbdt.csv', index=False)

sample_sub = pd.read_csv(ORIGINAL_DATA_DIR / 'atmaCup18__sample_submit.csv')
print('sample_sub_len: ', len(sample_sub))
print('sub_len: ', len(test_feat))

oof result 0.21350996474782025
best_score: 0.2135
sample_sub_len:  1727
sub_len:  1727


In [40]:
model.model

<lightgbm.basic.Booster at 0x7f8eedf546d0>