<a href="https://colab.research.google.com/github/TomohiroYazaki/Tabular_Playground_Series_-_Mar_2021/blob/main/Tabular_Playground_Series___Mar_2021_v4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install optuna

In [None]:
#!pip install --upgrade pandas

In [None]:
!pip install pytorch-tabnet

In [None]:
!pip install catboost

In [None]:
!pip install category_encoders

In [None]:
import numpy as np
import os
import random
import sys
import statistics
import pandas as pd
from pathlib import Path
import time
from datetime import datetime
import pytz
import json
from tqdm import tqdm

#import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))
        
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from sklearn.dummy import DummyRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn import preprocessing
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.neighbors import LocalOutlierFactor
from sklearn.decomposition import PCA
from sklearn.metrics import roc_auc_score
from sklearn import metrics
from sklearn.calibration import calibration_curve
from sklearn.calibration import CalibratedClassifierCV
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier

import xgboost as xgb
import lightgbm as lgb
import catboost as cb

import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
from pytorch_tabnet.tab_model import TabNetClassifier

import optuna

import warnings
warnings.filterwarnings("ignore")
        
#input_path = Path('/kaggle/input/tabular-playground-series-jan-2021/')
input_path = Path('/content/drive/MyDrive/Kaggle/Tabular_Playground_Series_-_Mar_2021/Data')

**---------- Utilities ----------**

In [None]:
!rm -f /content/log.log
!rm -f /content/result.png
!rm -f /content/submission.csv
!rm -f /content/submission_mean.csv
!rm -f /content/submission_stack.csv
!rm -f /content/plot_optimization_history.html
!rm -f /content/plot_parallel_coordinate.html
!rm -f /content/plot_slice.html
!rm -f /content/plot_param_importances.html
!rm -f /content/plot_contour.html
!rm -f /content/best_params.json
!rm -f /content/FEATURES_SEARCH.csv
!rm -f /content/FEATURES_SEARCH.png
!rm -f /content/train.csv
!rm -f /content/test.csv
!rm -f /content/target.csv
!rm -f -r /content/TEST
!rm -f -r /content/OOF

In [None]:
def get_logger(filename='log'):
    from logging import getLogger, INFO, StreamHandler, FileHandler, Formatter
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    #handler1 = StreamHandler()
    #handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    #logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

logger = get_logger('log')

In [None]:
import logging

from lightgbm.callback import _format_eval_result


def log_evaluation(logger, period=1, show_stdv=True, level=logging.DEBUG):
    def _callback(env):
        if period > 0 and env.evaluation_result_list and (env.iteration + 1) % period == 0:
            result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list])
            logger.info('[{}]\t{}'.format(env.iteration+1, result))
    _callback.order = 10
    return _callback

In [None]:
def convDictKeyToNdarray(dict):
    return np.array([list(dict.keys())])

In [None]:
def plot_results(name, target, pred, figsize=(6,6)):
    plt.figure(0).clf()

    fig = plt.figure(figsize=figsize)
    fpr, tpr, thresh = metrics.roc_curve(target, pred)
    auc = metrics.roc_auc_score(target, pred)
    plt.title(f'{name}: {auc:0.5f}', fontsize=18)
    plt.plot(fpr,tpr,label=name)
    plt.legend(loc=0)
    plt.show()
    fig.savefig("result.png")
    #return fig

In [None]:
def plot_FEATURES_SEARCH(results, figsize=(20, 8)):
    plt.figure(0).clf()
    results = np.array(results)
    fig = plt.figure(figsize=figsize)

    x_position = np.arange(len(results[:,0]))
    ax = fig.add_subplot(1, 1, 1)
    ax.bar(x_position, results[:,1].astype(np.float32), tick_label=results[:,0])
    ylim = results[:,1].astype(np.float32)
    if ylim.min() == ylim.max():
        bottom=ylim.min()*0.99
        top=ylim.max()*1.01
    else:
        bottom=ylim.min()-(ylim.max()-ylim.min())*0.1
        top=ylim.max()+(ylim.max()-ylim.min())*0.1
    ax.set_ylim(bottom=bottom, top=top)
    plt.show()
    fig.savefig("FEATURES_SEARCH.png")
    #return fig

In [None]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        
seed_everything(21)

**---------- Data Processing ----------**

In [None]:
cont_features = [
    "cont0", "cont1", "cont2", "cont3", "cont4", "cont5", "cont6", "cont7",
    "cont8", "cont9", "cont10",
]
cat_features = [
    "cat0", "cat1", "cat2", "cat3", "cat4", "cat5", "cat6", "cat7",
    "cat8", "cat9", "cat10", "cat11", "cat12", "cat13", "cat14", "cat15",
    "cat16", "cat17", "cat18"
]

In [None]:
def replace_outliers(data):
    for col in cont_features:
        Q1 = data[col].quantile(0.25)
        Q3 = data[col].quantile(0.75)
        IQR = Q3 - Q1
        median_ = data[col].median()
#         data[col].mask(((data[col] < Q1 - 1.5*IQR) | (data[col] > Q3 + 1.5*IQR)), median_, inplace=True)
        # data[col] = np.where(((data[col] < Q1 - 1.5*IQR) | (data[col] > Q3 + 1.5*IQR)),
        #                     median_, data[col])
        #data.loc[((data[col] < Q1 - 1.5*IQR) | (data[col] > Q3 + 1.5*IQR)), col] = median_
        data.loc[((data[col] < Q1 - 1.5*IQR)), col] = Q1 - 1.5*IQR
        data.loc[((data[col] > Q3 + 1.5*IQR)), col] = Q3 + 1.5*IQR
        #logger.info(f'replace_outliers : Q1 - 1.5*IQR,Q3 + 1.5*IQR')
    return data

In [None]:
from category_encoders import LeaveOneOutEncoder
def loo_encode(train_df, test_df, column):
    loo = LeaveOneOutEncoder()
    new_feature = "{}_loo".format(column)
    loo.fit(train_df[column], train_df["target"])
    train_df[new_feature] = loo.transform(train_df[column])
    test_df[new_feature] = loo.transform(test_df[column])
    return new_feature

In [None]:
def one_hot_encode(train_df, test_df, column):
    all = pd.concat([train_df[column], test_df[column]])
    all = pd.get_dummies(all,columns=column)
    for c in all.columns:
        train[c] = all[c].iloc[:train_df.shape[0]]
        test[c] = all[c].iloc[train_df.shape[0]:]
    return list(all.columns)
    #train = all.iloc[:train.shape[0],:].reset_index(drop=True)
    #train = all.iloc[:train.shape[0],:]
    #test = all.iloc[train.shape[0]:,:].reset_index(drop=True)
    #test = all.iloc[train.shape[0]:,:]
    #return train, test

In [None]:
def label_encode(train, test):
    for column in cat_features:
        le = preprocessing.LabelEncoder()
        le.fit(list(train[column].astype('str')) + list(test[column].astype('str')))
        train[column] = le.transform(list(train[column].astype(str))) 
        test[column] = le.transform(list(test[column].astype(str))) 
    return train, test

In [None]:
def reject_outliers(df, feature, threshold=3):
    mean, std = np.mean(df), np.std(df)
    z_score = np.abs((df-mean) / std)
    good = z_score < threshold
    return good
#good = reject_outliers(train['target'], 'target', threshold=4)

In [None]:
def reject_lof_outliers(df, feature):
    lof = LocalOutlierFactor(n_neighbors=20, contamination=0.001, p=1)
    good = lof.fit_predict(df) > 0.5 # change this value to set the threshold for outliers    
    return good
#good = reject_lof_outliers(train['target'].values.reshape(train['target'].shape[0], -1), 'target')

In [None]:
train = pd.read_csv(input_path / 'train.csv', index_col='id')
#train = pd.read_csv(input_path / 'train4.csv', index_col='id')
display(train.head(10))

In [None]:
test = pd.read_csv(input_path / 'test.csv', index_col='id')
#test = pd.read_csv(input_path / 'test4.csv', index_col='id')
display(test.head(10))

In [None]:
loo_features = []

for feature in cat_features:
    loo_features.append(loo_encode(train, test, feature))

In [None]:
#tmp_num = train.shape[1]
#train, test = one_hot_encode(train, test, cat_features)
#one_hot_features = list(train.columns[tmp_num:])

In [None]:
one_hot_features = one_hot_encode(train, test, cat_features)

In [None]:
#target = pd.read_csv(input_path / 'target.csv', index_col='id')
#display(target.head())

In [None]:
train, test = label_encode(train, test)

In [None]:
train = replace_outliers(train)
test = replace_outliers(test)

In [None]:
display(train.tail())

In [None]:
display(test.tail())

In [None]:
target = train.pop('target')

In [None]:
submission = pd.read_csv(input_path / 'sample_submission.csv', index_col='id')
display(submission.head())

In [None]:
#cat_idxs = [i for i in range(19)]
cat_idxs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18]

In [None]:
#cat_dims = [i for i in train[cat_features].nunique()]
cat_dims = [2, 15, 19, 13, 20, 84, 16, 51, 61, 19, 2, 2, 2, 2, 4, 4, 4, 4]

In [None]:
print(train.shape)
print(test.shape)
print(target.shape)

In [None]:
#train_add = pd.read_csv(input_path / 'train_add.csv', index_col='id')
#test_add = pd.read_csv(input_path / 'test_add.csv', index_col='id')

In [None]:
#train = pd.concat([train, train_add], axis=1)

In [None]:
#test = pd.concat([test, test_add], axis=1)

In [None]:
#display(train.tail())

In [None]:
#display(test.tail())

In [None]:
#print(train.shape)
#print(test.shape)
#print(target.shape)

**---------- Model ----------**

In [None]:
class Model:
    def __init__(self, model=None):
        self.model = model

    #def set_params(self, **params):
    #    self.params = params
    #    _ = self.model.set_params(**params)

    def set_seed(self, seed):
        self.seed = seed
    
    def get_params(self):
        return self.params

    def set_init_params(self, init_params):
        self.init_params = init_params

    def get_init_params(self):
        return self.init_params

    def set_model(self, model):
        self.model = model

    def get_model(self):
        return self.model
    
    def fit(self, X, Y):
        self.model.fit(X ,Y)

    def fit(self, X, Y, VX, VY):
        self.model.fit(X ,Y)

    def predict(self, X):
        return self.model.predict(X)

In [None]:
class Model_DummyRegressor(Model):
    def __init__(self):
        self.model = DummyRegressor()

    def reset_model(self,**params):
        del self.model
        self.model = DummyRegressor(**params)

In [None]:
#DummyRegressor
'''
params_DummyRegressor = {
    'strategy': hp.choice('strategy', ['median', 'mean']), 
}
'''

In [None]:
class Model_LinearRegression(Model):
    def __init__(self):
        self.model = LinearRegression()
        self.set_init_params(params_LinearRegression)

    def reset_model(self,**params):
        del self.model
        self.model = LinearRegression(**params)

    def predict(self, X):
        return self.model.predict(X)[:,-1]

In [None]:
#Model_LinearRegression
params_LinearRegression = {
    "normalize": True,
}

In [None]:
class Model_Ridge(Model):
    def __init__(self):
        self.model = CalibratedClassifierCV(
            RidgeClassifier(),
            cv=3,
        )

    def reset_model(self, **params):
        del self.model
        tmp_params = params
        tmp_cv = tmp_params['cv']
        del tmp_params['cv']
        if hasattr(self, 'seed'):
            tmp_params['random_state']=self.seed
        elif not 'random_state' in params:
            tmp_params['random_state']=21
        self.params = tmp_params
        self.model = CalibratedClassifierCV(
            RidgeClassifier(**tmp_params),
            cv=tmp_cv,
        )
        #self.model = RidgeClassifier(**tmp_params)

    def predict(self, X):
        return self.model.predict_proba(X)[:,-1]

    def objective(self, trial):
        oof_preds = np.zeros(train.shape[0])

        params = {
            'alpha': trial.suggest_uniform('alpha', 0.0, 10.0),
            'normalize': trial.suggest_categorical('normalize', [True, False]),
            'cv': trial.suggest_int('cv', 2, 10, 1),
        }

        self.reset_model(**params)

        kf = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=21)
        for tr_idx, va_idx in kf.split(train, target):
            tr_x, va_x = train.iloc[tr_idx], train.iloc[va_idx]
            tr_y, va_y = target.iloc[tr_idx], target.iloc[va_idx]

            self.fit(tr_x, tr_y, va_x, va_y)
            oof_preds[va_idx] = self.predict(va_x)

        score = roc_auc_score(target, oof_preds)
        return score

In [None]:
class Model_SGD(Model):
    def __init__(self):
        self.model = CalibratedClassifierCV(
            SGDClassifier(),
            cv=3,
        )

    def reset_model(self, **params):
        del self.model
        tmp_params = params
        tmp_cv = tmp_params['cv']
        del tmp_params['cv']
        tmp_params['loss'] = "squared_hinge"
        tmp_params['max_iter'] = 100000
        tmp_params['n_jobs'] = -1
        if hasattr(self, 'seed'):
            tmp_params['random_state']=self.seed
        elif not 'random_state' in params:
            tmp_params['random_state']=21
        self.params = tmp_params
        self.model = CalibratedClassifierCV(
            SGDClassifier(**tmp_params),
            cv=tmp_cv,
        )
        #self.model = RidgeClassifier(**tmp_params)

    def predict(self, X):
        return self.model.predict_proba(X)[:,-1]

    def objective(self, trial):
        oof_preds = np.zeros(train.shape[0])

        params = {
            'alpha': trial.suggest_loguniform("alpha", 0.00005, 0.0005), 
            'l1_ratio': trial.suggest_uniform('l1_ratio', 0.01, 0.8),
            'cv': trial.suggest_int('cv', 2, 6, 1),
        }

        self.reset_model(**params)

        kf = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=21)
        for tr_idx, va_idx in kf.split(train, target):
            tr_x, va_x = train.iloc[tr_idx], train.iloc[va_idx]
            tr_y, va_y = target.iloc[tr_idx], target.iloc[va_idx]

            self.fit(tr_x, tr_y, va_x, va_y)
            oof_preds[va_idx] = self.predict(va_x)

        score = roc_auc_score(target, oof_preds)
        return score

In [None]:
class Model_HGB(Model):
    def __init__(self):
        self.model = HistGradientBoostingClassifier()

    def reset_model(self, **params):
        del self.model
        tmp_params = params
        tmp_params['max_iter'] = 1000
        if hasattr(self, 'seed'):
            tmp_params['random_state']=self.seed
        elif not 'random_state' in params:
            tmp_params['random_state']=21
        self.params = tmp_params
        self.model = HistGradientBoostingClassifier(**tmp_params)

    def predict(self, X):
        return self.model.predict_proba(X)[:,-1]

    def objective(self, trial):
        oof_preds = np.zeros(train.shape[0])

        params = {
            'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
            'l2_regularization': trial.suggest_uniform('l2_regularization', 0.0, 5.0),
            'max_bins': trial.suggest_int('max_bins', 50, 255, 5),
            'max_depth': trial.suggest_int('max_depth', 5, 50, 1),
            'max_leaf_nodes': trial.suggest_int('max_leaf_nodes', 5, 255, 5),
        }

        self.reset_model(**params)

        kf = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=21)
        for tr_idx, va_idx in kf.split(train, target):
            tr_x, va_x = train.iloc[tr_idx], train.iloc[va_idx]
            tr_y, va_y = target.iloc[tr_idx], target.iloc[va_idx]

            self.fit(tr_x, tr_y, va_x, va_y)
            oof_preds[va_idx] = self.predict(va_x)

        score = roc_auc_score(target, oof_preds)
        return score

In [None]:
class Model_XGB(Model):
    def __init__(self):
        self.model = xgb.XGBClassifier()

    def reset_model(self, **params):
        del self.model
        tmp_params = params
        tmp_params['tree_method']='gpu_hist' 
        tmp_params['gpu_id']=0
        tmp_params['eval_metric']='auc'
        #tmp_params['max_depth']=25
        tmp_params['objective']='binary:logistic'
        if hasattr(self, 'seed'):
            tmp_params['seed']=self.seed
        elif not 'seed' in params:
            tmp_params['seed']=21
        #tmp_params['verbosity']=1
        #tmp_params['n_estimators']=10000
        self.params = tmp_params
        #self.model = xgb.XGBClassifier(**tmp_params)

    def fit(self, X, Y, VX, VY):
        trn_data = xgb.DMatrix(X, label=Y)
        val_data= xgb.DMatrix(VX, label=VY)
        #evals = [(val_data, 'eval')]
        evals = [(trn_data, 'train'),(val_data, 'eval')]
        evals_result = {}
        #self.model.fit(X ,Y, eval_metric="auc", eval_set=eval_set, early_stopping_rounds=150, callbacks=[xgb.callback.record_evaluation(evals_results)], verbose=False)
        self.model = xgb.train(self.params, trn_data, num_boost_round=10000, evals=evals, early_stopping_rounds=150, evals_result=evals_result, verbose_eval=500) 
        #return evals_results

    def predict(self, X):
        data = xgb.DMatrix(X)
        return self.model.predict(data, ntree_limit=self.model.best_ntree_limit)
        #return self.model.predict_proba(X)[:,-1]

    def objective(self, trial):
        oof_preds = np.zeros(train.shape[0])

        params = {
            'max_depth': trial.suggest_int('max_depth', 5, 25, 1),
            'gamma': trial.suggest_loguniform("gamma", 0.01, 10.0), 
            'min_child_weight': trial.suggest_loguniform("min_child_weight", 0.01, 100.0), 
            'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.1, 1.0),
            'subsample': trial.suggest_uniform('subsample', 0.1, 1.0),
            #'learning_rate': trial.suggest_loguniform("learning_rate", 0.05, 1.0), 
            'learning_rate': trial.suggest_uniform('learning_rate', 0.05, 1.0),
            'max_delta_step': trial.suggest_loguniform("max_delta_step", 0.01, 10.0), 
            'reg_alpha': trial.suggest_loguniform("reg_alpha", 0.1, 10.0), 
            'reg_lambda': trial.suggest_loguniform("reg_lambda", 0.1, 10.0), 
        }

        self.reset_model(**params)

        kf = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=21)
        #kf = KFold(n_splits=NFOLDS, shuffle=True, random_state=21)
        for tr_idx, va_idx in kf.split(train, target):
            tr_x, va_x = train.iloc[tr_idx], train.iloc[va_idx]
            tr_y, va_y = target.iloc[tr_idx], target.iloc[va_idx]

            self.fit(tr_x, tr_y, va_x, va_y)
            #print(evals_results)
            #score = np.argmax(np.array(evals_results['validation_0']['auc']))
            oof_preds[va_idx] = self.predict(va_x)

        score = roc_auc_score(target, oof_preds)
        self.model.__del__()
        return score

In [None]:
class Model_LightGBM(Model):
    def __init__(self):
        self.model = lgb.LGBMRegressor()

    def reset_model(self, **params):
        del self.model
        tmp_params = params
        #tmp_params['max_depth']=int(tmp_params['max_depth'])
        tmp_params['num_leaves']=int(tmp_params['num_leaves'])
        tmp_params['min_child_samples']=int(tmp_params['min_child_samples'])
        tmp_params['min_data_per_group']=int(tmp_params['min_data_per_group'])
        tmp_params['n_estimators']=100000
        if hasattr(self, 'seed'):
            tmp_params['random_state']=self.seed
        elif not 'random_state' in params:
            tmp_params['random_state']=21
        self.params = tmp_params
        self.model = lgb.LGBMRegressor(**tmp_params)

    def fit(self, X, Y, VX, VY):        
        eval_set = [(VX, VY)]
        self.model.fit(X ,Y, eval_metric="rmse", eval_set=eval_set, verbose=False, early_stopping_rounds=10)

In [None]:
'''
LightBGM_params_space = {
    #'learning_rate':hp.loguniform('learning_rate',np.log(0.001),np.log(0.1)),
    'learning_rate':hp.loguniform('learning_rate',np.log(0.01),np.log(0.1)),
    'min_child_weight':hp.loguniform('min_child_weight',np.log(0.0001),np.log(1.0)),
    #'max_depth':hp.quniform('max_depth',3,15,1),
    #'num_leaves':hp.quniform('num_leaves',20,50,1),
    'num_leaves':hp.quniform('num_leaves',50,255,5),
    'min_child_samples':hp.quniform('min_child_samples',10,30,1),
    'reg_alpha':hp.uniform('reg_alpha',0.0,5.0),
    'reg_lambda':hp.uniform('reg_lambda',0.0,5.0),
    #'boosting_type': hp.choice('boosting_type', ['gbdt', 'dart', 'goss', 'rf']), 
    #'cat_smooth': hp.choice('cat_smooth', [200, 400, 600, 800, 1000]), 
    'min_data_per_group':hp.quniform('min_data_per_group',200,1000,200),
    #'min_data_per_group': hp.choice('min_data_per_group', [200, 400, 600, 800, 1000]), 
}
'''

In [None]:
LightBGM_params = {
    "learning_rate": 0.023138520618280357,
    "min_child_samples": 21,
    "min_child_weight": 0.007646593938515828,
    "num_leaves": 210,
    "reg_alpha": 4.841326955256633,
    "reg_lambda": 2.3605570451667712,
    "n_estimators": 10000
}

In [None]:
class Model_LightGBMwithCats(Model):
    def __init__(self):
        self.model = None
        self.set_init_params(LightBGMwithCats_params)

    def reset_model(self, **params):
        tmp_params = params
        tmp_params['n_estimators']=10000
        if hasattr(self, 'seed'):
            tmp_params['random_state']=self.seed
        elif not 'random_state' in params:
            tmp_params['random_state']=21
        #tmp_params['objective'] = 'binary' 
        tmp_params['objective'] = 'cross_entropy' 
        tmp_params['boosting'] = 'gbdt'
        tmp_params['metric']= 'auc'
        self.params = tmp_params

    def fit(self, X, Y, VX, VY):
        #cats = [c for c in X.columns if X[c].dtypes=='object']
        #trn_data = lgb.Dataset(X, label=Y, categorical_feature=cats) #-------> Specify Categorical feature for lgb
        #val_data= lgb.Dataset(VX, label=VY, categorical_feature=cats)  #-------> Specify Categorical feature for lgb
        trn_data = lgb.Dataset(X, label=Y, categorical_feature=cat_features) #-------> Specify Categorical feature for lgb
        val_data= lgb.Dataset(VX, label=VY, categorical_feature=cat_features)  #-------> Specify Categorical feature for lgb
        #logger.info(f'')
        #callbacks = [log_evaluation(logger, period=500)]
        evals_results = {}
        self.model = lgb.train(self.params, trn_data, num_boost_round=10000, valid_sets=(trn_data, val_data), verbose_eval=500, early_stopping_rounds=150, callbacks=[lgb.record_evaluation(evals_results)])
        return evals_results

    def objective(self, trial):
        oof_preds = np.zeros(train.shape[0])

        params = {
            #'learning_rate': trial.suggest_loguniform("learning_rate", 0.01, 0.1), 
            'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.1),
            'min_child_weight': trial.suggest_loguniform("min_child_weight", 0.0001, 1.0), 
            'max_depth': trial.suggest_int('max_depth', 15, 50, 5),
            'num_leaves': trial.suggest_int('num_leaves', 10, 255, 5),
            'min_child_samples': trial.suggest_int('min_child_samples', 10, 400, 10),
            'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.0, 1.0),
            'feature_fraction': trial.suggest_uniform('feature_fraction', 0.0, 1.0),
            'reg_alpha': trial.suggest_uniform('reg_alpha', 0.0, 10.0),
            'reg_lambda': trial.suggest_uniform('reg_lambda', 0.0, 5.0),
            'cat_smooth': trial.suggest_int('cat_smooth', 20, 100, 20),
            'min_data_per_group': trial.suggest_int('min_data_per_group', 50, 400, 50),
            'max_bin': trial.suggest_int('max_bin', 256, 1024, 128),
            'bagging_freq': trial.suggest_int('bagging_freq', 0, 2, 1),
            'cat_l2': trial.suggest_uniform('cat_l2', 5.0, 20.0),
        }

        self.reset_model(**params)

        kf = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=21)
        #kf = KFold(n_splits=NFOLDS, shuffle=True, random_state=21)
        for tr_idx, va_idx in kf.split(train, target):
            tr_x, va_x = train.iloc[tr_idx], train.iloc[va_idx]
            tr_y, va_y = target.iloc[tr_idx], target.iloc[va_idx]

            evals_results = self.fit(tr_x, tr_y, va_x, va_y)
            #print(evals_results)
            #score = np.argmax(np.array(evals_results['valid_1']['auc']))
            oof_preds[va_idx] = self.predict(va_x)

        score = roc_auc_score(target, oof_preds)
        return score

In [None]:
LightBGMwithCats_params = {
    "learning_rate": 0.010051841635755903,
    "min_child_weight": 0.8478873301463337,
    "max_depth": 20,
    "num_leaves": 180,
    "min_child_samples": 140,
    "bagging_fraction": 0.38894237049923747,
    "feature_fraction": 0.25427393680284716,
    "reg_alpha": 7.0525046457500205,
    "reg_lambda": 0.3816397618302956,
    "cat_smooth": 60,
    "min_data_per_group": 250,
    "max_bin": 256,
    "bagging_freq": 0,
    "cat_l2": 6.345830496649996
}

In [None]:
class Model_CatBoost(Model):
    def __init__(self):
        self.model = cb.CatBoostClassifier()

    def reset_model(self, **params):
        del self.model
        tmp_params = params
        tmp_params['allow_writing_files'] = False
        tmp_params['od_type'] = 'Iter'
        tmp_params['grow_policy'] = 'Depthwise'
        tmp_params['silent'] = False
        tmp_params['eval_metric'] = 'AUC'
        tmp_params['loss_function']="Logloss"
        tmp_params['iterations'] = 10000
        tmp_params['task_type']="GPU"
        tmp_params['devices']="0"
        if hasattr(self, 'seed'):
            tmp_params['random_state']=self.seed
        elif not 'random_state' in params:
            tmp_params['random_state']=21
        self.params = tmp_params
        self.model = cb.CatBoostClassifier(**tmp_params)

    def fit(self, X, Y, VX, VY):
        eval_set = [(VX, VY)]
        #categorical_features_indices = np.where(X.dtypes == 'object')[0]
        self.model.fit(
            X, Y,
            #eval_metric="rmse",
            #cat_features=categorical_features_indices,
            cat_features=cat_features,
            eval_set=eval_set,
            verbose=500,
            early_stopping_rounds=150
        )

    def predict(self, X):
        return self.model.predict_proba(X)[:,-1]

    def objective(self, trial):
        oof_preds = np.zeros(train.shape[0])

        params = {
            #'learning_rate': trial.suggest_loguniform("learning_rate", 0.01, 0.1), 
            'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.1),
            'bagging_temperature': trial.suggest_uniform('bagging_temperature', 0.0, 2.0),
            'depth': trial.suggest_int('depth', 6, 10, 1),
            'od_wait': trial.suggest_int('od_wait', 10, 200, 10),
            'l2_leaf_reg': trial.suggest_uniform('l2_leaf_reg', 1.0, 10.0),
            'penalties_coefficient': trial.suggest_uniform('penalties_coefficient', 1.0, 3.0),
        }

        self.reset_model(**params)

        kf = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=21)
        #kf = KFold(n_splits=NFOLDS, shuffle=True, random_state=21)
        for tr_idx, va_idx in kf.split(train, target):
            tr_x, va_x = train.iloc[tr_idx], train.iloc[va_idx]
            tr_y, va_y = target.iloc[tr_idx], target.iloc[va_idx]

            evals_results = self.fit(tr_x, tr_y, va_x, va_y)
            #print(evals_results)
            #score = np.argmax(np.array(evals_results['valid_1']['auc']))
            oof_preds[va_idx] = self.predict(va_x)

        score = roc_auc_score(target, oof_preds)
        return score

In [None]:
class Model_TabNet(Model):
    def __init__(self):
        self.model = TabNetClassifier()
        self.set_init_params(TabNet_params)

    def reset_model(self, **params):
        del self.model

        tmp_params = params
        tmp_params['n_a'] = tmp_params['n_d']
        tmp_params['optimizer_fn'] = torch.optim.Adam
        tmp_params['optimizer_params'] = dict(lr=2e-2, weight_decay=1e-5)
        tmp_params['mask_type'] = 'entmax'
        tmp_params['scheduler_params'] = dict(mode="min", patience=5, min_lr=1e-5, factor=0.9)
        tmp_params['scheduler_fn'] = torch.optim.lr_scheduler.ReduceLROnPlateau
        tmp_params['verbose'] = 10
        tmp_params['cat_idxs'] = cat_idxs
        tmp_params['cat_dims'] = cat_dims
        if hasattr(self, 'seed'):
            tmp_params['seed']=self.seed
        elif not 'seed' in params:
            tmp_params['seed']=21
        self.params = tmp_params
        self.model = TabNetClassifier(**tmp_params)

    def predict(self, X):
        X = X.values
        return self.model.predict(X)
        #return self.model.predict(X)[:,-1]

    def fit(self, X, Y, VX, VY):
        X = X.values
        #Y = Y.values.reshape(-1, 1)
        Y = Y.values
        VX = VX.values
        #VY = VY.values.reshape(-1, 1)
        VY = VY.values

        #eval_set = [(VX, VY)]

        self.model.fit(
            X_train=X,
            y_train=Y,
            #eval_set=eval_set,
            eval_set=[(X, Y), (VX, VY)],
            #eval_name = ["val"],
            eval_name=['train', 'valid'],
            eval_metric = ["auc"],
            max_epochs=400,
            patience=20,
            #max_epochs=4,
            #patience=2,
            #patience=50,
            batch_size=1024, virtual_batch_size=128,
            num_workers=16,
            drop_last=False
            #loss_fn=nn.MSELoss()
        )

    def objective(self, trial):
        oof_preds = np.zeros(train.shape[0])

        params = {
            'n_d': trial.suggest_int('n_d', 16, 32, 2),
            'n_steps': trial.suggest_int('n_steps', 1, 3, 1),
            'gamma': trial.suggest_uniform('gamma', 1.0, 2.0),
            'lambda_sparse': trial.suggest_loguniform("lambda_sparse", 0.0001, 1.0), 
            #'smoothing': trial.suggest_loguniform("smoothing", 0.0001, 0.1), 
        }

        self.reset_model(**params)

        kf = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=21)
        #kf = KFold(n_splits=NFOLDS, shuffle=True, random_state=21)
        for tr_idx, va_idx in kf.split(train, target):
            tr_x, va_x = train.iloc[tr_idx], train.iloc[va_idx]
            tr_y, va_y = target.iloc[tr_idx], target.iloc[va_idx]

            evals_results = self.fit(tr_x, tr_y, va_x, va_y)
            #print(evals_results)
            #score = np.argmax(np.array(evals_results['valid_1']['auc']))
            oof_preds[va_idx] = self.predict(va_x)

        score = roc_auc_score(target, oof_preds)
        return score

In [None]:
TabNet_params = {
    "gamma": 1.6073252457780574,
    "lambda_sparse": 3.046142378479607e-05,
    "n_d": 16,
    "n_steps": 2,
    'optimizer_fn':torch.optim.Adam,
    'optimizer_params':dict(lr=2e-2, weight_decay=1e-5),
    'mask_type':'entmax',
    'scheduler_params':dict(mode="min", patience=5, min_lr=1e-5, factor=0.9,),
    'scheduler_fn':torch.optim.lr_scheduler.ReduceLROnPlateau,
    'verbose':10,
}

In [None]:
torch.autograd.detect_anomaly = False
torch.autograd.profiler.profile = False
torch.autograd.profiler.emit_nvtx = False
torch.autograd.gradcheck = False
torch.autograd.gradgradcheck = False

**---------- Learning ----------**

In [None]:
# Parameters
NFOLDS = 7
logger.info(f'NFOLDS : {NFOLDS}')
MAX_EVALS = 100
logger.info(f'MAX_EVALS : {MAX_EVALS}')
RANDOM_SEEDS = 50
logger.info(f'RANDOM_SEEDS : {MAX_EVALS}')
###########################################
#model = Model_DummyRegressor()
#model = Model_Ridge()
#model = Model_SGD()
#model = Model_HGB()
#model = Model_XGB()
#model = Model_LightGBM()
model = Model_LightGBMwithCats()
#model = Model_CatBoost()
#model = Model_TabNet()
logger.info(f'MODEL : {model.__class__.__name__}')

if model.__class__.__name__ == 'Model_LightGBMwithCats' or model.__class__.__name__ == 'Model_CatBoost':
    model_features = cat_features + cont_features
elif model.__class__.__name__ == 'Model_TabNet':
    model_features = one_hot_features + cont_features
else:
    model_features = loo_features + cont_features
train = train[model_features]
test = test[model_features]
###########################################
HYPERPARAMETERS_SEARCH = False
if HYPERPARAMETERS_SEARCH:
    logger.info(f'HYPERPARAMETERS_SEARCH : ON')
else:
    logger.info(f'HYPERPARAMETERS_SEARCH : OFF')
###########################################
BESTPARAMETER_ESTIMATE = False
if BESTPARAMETER_ESTIMATE:
    logger.info(f'BESTPARAMETER_ESTIMATE : ON')
else:
    logger.info(f'BESTPARAMETER_ESTIMATE : OFF')
###########################################
FEATURES_SEARCH = False
if FEATURES_SEARCH:
    logger.info(f'FEATURES_SEARCH : ON')
else:
    logger.info(f'FEATURES_SEARCH : OFF')
###########################################
RANDOM_SEEDS_PREDICT = True
if RANDOM_SEEDS_PREDICT:
    logger.info(f'RANDOM_SEEDS_PREDICT : ON')
else:
    logger.info(f'RANDOM_SEEDS_PREDICT : OFF')
###########################################
ENSEMBLE = False
if ENSEMBLE:
    logger.info(f'ENSEMBLE : ON')
else:
    logger.info(f'ENSEMBLE : OFF')

**---------- HYPERPARAMETERS_SEARCH ----------**

In [None]:
if HYPERPARAMETERS_SEARCH:
    #----- Timer Set -----#
    start_time = time.perf_counter()

    #----- Hyperparameters Search -----#
    study = optuna.create_study(direction='maximize')
    study.optimize(model.objective, n_trials=MAX_EVALS)

    #----- Timer Stop -----#
    execution_time = time.perf_counter() - start_time
    print("Learning time:{0}".format(execution_time/60) + "[min]")

    #----- Log -----#
    logger.info(f'Model : {model.__class__.__name__}')
    logger.info(f'Learning time : {execution_time/60}[min]')

In [None]:
if HYPERPARAMETERS_SEARCH:
    fig = optuna.visualization.plot_optimization_history(study)
    fig.show()
    fig.write_html("plot_optimization_history.html")

In [None]:
if HYPERPARAMETERS_SEARCH:
    fig = optuna.visualization.plot_parallel_coordinate(study)
    fig.update_layout(width=2000, height=600)
    fig.show()
    fig.write_html("plot_parallel_coordinate.html")

In [None]:
if HYPERPARAMETERS_SEARCH:
    fig = optuna.visualization.plot_slice(study)
    fig.show()
    fig.write_html("plot_slice.html")

In [None]:
if HYPERPARAMETERS_SEARCH:
    fig = optuna.visualization.plot_param_importances(study)
    fig.show()
    fig.write_html("plot_param_importances.html")

In [None]:
if HYPERPARAMETERS_SEARCH:
    fig = optuna.visualization.plot_contour(study, params=list(optuna.importance.get_param_importances(study).keys())[0:5])
    fig.update_layout(width=1600, height=1200)
    fig.show()
    fig.write_html("plot_contour.html")

In [None]:
if HYPERPARAMETERS_SEARCH:
    with open("best_params.json", 'w') as f:
        json.dump(study.best_params, f, indent=4)

In [None]:
if HYPERPARAMETERS_SEARCH:
    best_params = study.best_params
else:
    best_params = model.get_init_params()

**---------- BESTPARAMETER_ESTIMATE ----------**

In [None]:
if BESTPARAMETER_ESTIMATE:
    oof_preds = np.zeros(train.shape[0])
    test_preds = np.zeros(test.shape[0])

    model.reset_model(**best_params)

    kf = KFold(n_splits=NFOLDS, shuffle=True, random_state=21)
    for tr_idx, va_idx in kf.split(train):
        tr_x, va_x = train.iloc[tr_idx], train.iloc[va_idx]
        tr_y, va_y = target.iloc[tr_idx], target.iloc[va_idx]

        model.fit(tr_x, tr_y, va_x, va_y)
        #model.fit(tr_x, tr_y)
        oof_preds[va_idx] = model.predict(va_x)

        test_preds += model.predict(test) / NFOLDS

In [None]:
if BESTPARAMETER_ESTIMATE:
    logger.info(f'AUC : {roc_auc_score(target, oof_preds)}')

In [None]:
if BESTPARAMETER_ESTIMATE:
    plot_results(model.__class__.__name__, target, oof_preds)

In [None]:
if BESTPARAMETER_ESTIMATE:
    prob_true, prob_pred = calibration_curve(y_true=target, y_prob=oof_preds, n_bins=100)

    fig, ax1 = plt.subplots(figsize=(8.0, 8.0))
    ax1.plot(prob_pred, prob_true, marker='.', label='calibration plot', color='skyblue') # キャリプレーションプロットを作成
    ax1.plot([0, 1], [0, 1], linestyle='--', label='ideal', color='limegreen') # 45度線をプロット
    ax1.legend(bbox_to_anchor=(1.12, 1), loc='upper left')
    plt.show()

In [None]:
if BESTPARAMETER_ESTIMATE:
    submission['target'] = test_preds
    submission.to_csv('submission.csv')

**---------- FEATURES_SEARCH ----------**

In [None]:
select_features = [
    "cat16", "cat1", "cat10", "cont5", "cat0", "cat15", "cat8", "cat7", "cat11", "cont4", "cat4", "cat14",
    "cont6", "cont2", "cat18", "cat2", "cat6", "cat17",
    "cont10", "cont8", "cat9", "cont0", "cont3", "cont9", "cont7", "cat13", "cont1", "cat5", "cat12"
]

In [None]:
#candidate_features = cont_features + cat_features

In [None]:
#candidate_features = [i for i in candidate_features if i not in select_features]

In [None]:
candidate_features = [
    "cat3"
]

In [None]:
if FEATURES_SEARCH:
    hour = 9.5

    #----- Timer Set -----#
    start_time = time.perf_counter()

    model = Model_LightGBMwithCats()

    results = []

    for i in tqdm(range(len(candidate_features))):
        oof_preds = np.zeros(train.shape[0])

        max_auc = 0
        max_feature = ''

        for f in candidate_features:
            print("candidate_features : ---------------------------------------", f)
            select_features.append(f)
            print(select_features)

            tmp_train = train[select_features]

            model.reset_model(**LightBGMwithCats_params)

            #kf = KFold(n_splits=NFOLDS, shuffle=True, random_state=21)
            kf = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=21)
        
            for fold_, (tr_idx, va_idx) in enumerate(kf.split(tmp_train, target)):
                print("fold : ---------------------------------------", fold_)
                tr_x, va_x = tmp_train.iloc[tr_idx], tmp_train.iloc[va_idx]
                tr_y, va_y = target.iloc[tr_idx], target.iloc[va_idx]

                model.fit(tr_x, tr_y, va_x, va_y)
                oof_preds[va_idx]= model.predict(va_x)

            tmp_auc = roc_auc_score(target, oof_preds)
            print('auc : ' + str(tmp_auc))
            logger.info(f'auc : {tmp_auc}')
            if max_auc < tmp_auc:
                max_auc = tmp_auc
                max_feature = f
            select_features.remove(f)

            if hour*60*60 < time.perf_counter() - start_time:
                print('-- BREAK inner loop')
                break
        else:
            results.append([max_feature, max_auc])
            plot_FEATURES_SEARCH(results)
            select_features.append(max_feature)
            candidate_features.remove(max_feature)
            continue
        
        print('BREAK outer loop')
        break

        #results.append([max_feature, max_auc])
        #plot_FEATURES_SEARCH(results)
        #select_features.append(max_feature)
        #candidate_features.remove(max_feature)

    #----- Timer Stop -----#
    execution_time = time.perf_counter() - start_time
    print("Learning time:{0}".format(execution_time/60) + "[min]")

    #----- Log -----#
    logger.info(f'Learning time : {execution_time/60}[min]')

In [None]:
if FEATURES_SEARCH:
    plot_FEATURES_SEARCH(results)
    pd.DataFrame(results).to_csv('FEATURES_SEARCH.csv')
    train.to_csv('train.csv')
    test.to_csv('test.csv')
    target.to_csv('target.csv')

**---------- RANDOM_SEEDS_PREDICT ----------**

In [None]:
!mkdir TEST
!mkdir OOF

In [None]:
if RANDOM_SEEDS_PREDICT:
    seeds = [i for i in range(RANDOM_SEEDS)]
    #----- Timer Set -----#
    start_time = time.perf_counter()

    for i in tqdm(range(len(seeds))):
        test_preds = np.zeros(test.shape[0])
        oof_preds = np.zeros(train.shape[0])

        seed = seeds[i]
        model.set_seed(seed)

        kf = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=21)
        for fold_, (tr_idx, va_idx) in enumerate(kf.split(train, target)):
            print("fold : ---------------------------------------", fold_)
            tr_x, va_x = train.iloc[tr_idx], train.iloc[va_idx]
            tr_y, va_y = target.iloc[tr_idx], target.iloc[va_idx]

            model.fit(tr_x, tr_y, va_x, va_y)
            oof_preds[va_idx]= model.predict(va_x)

            test_preds += model.predict(test) / NFOLDS

        tmp_auc = roc_auc_score(target, oof_preds)
        print('auc : ' + str(tmp_auc))
        logger.info(f'{i},{tmp_auc}')

        test_preds_pass = 'TEST/' + model.__class__.__name__ + '_test_preds[' + str(i) + '].csv'
        oof_preds_pass = 'OOF/' + model.__class__.__name__ + '_oof_preds[' + str(i) + '].csv'

        np.savetxt(test_preds_pass, test_preds, delimiter=',')
        np.savetxt(oof_preds_pass, oof_preds, delimiter=',')


**---------- ENSEMBLE ----------**

In [None]:
def read_pred(directory_path):
    os.chdir(directory_path)
    ret = {}
    for file_name in os.listdir():
        file_path = directory_path + '/' + file_name
        ret[file_name] = np.loadtxt(file_path, delimiter=',')
    return ret

In [None]:
def mean_pred(pred_dict):
    ret = np.zeros(pred_dict[list(pred_dict.keys())[0]].shape[0])
    for key in pred_dict.keys():
        ret += pred_dict[key] / len(pred_dict)
    return ret

In [None]:
def stack_pred(oof_dict, test_dict):
    tmp_train = pd.DataFrame(data=None, index=train.index, columns=None, dtype=None, copy=False)
    tmp_test = pd.DataFrame(data=None, index=test.index, columns=None, dtype=None, copy=False)
    for key in oof_dict.keys():
        tmp_train[key] = oof_dict[key]
    for key in test_dict.keys():
        tmp_test[key] = test_dict[key]

    oof_preds = np.zeros(train.shape[0])
    test_preds = np.zeros(test.shape[0])

    model = CalibratedClassifierCV(
        RidgeClassifier(random_state=21), 
        cv=3
    )

    kf = KFold(n_splits=NFOLDS, shuffle=True, random_state=21)
    for tr_idx, va_idx in kf.split(tmp_train):
        tr_x, va_x = tmp_train.iloc[tr_idx], tmp_train.iloc[va_idx]
        tr_y, va_y = target.iloc[tr_idx], target.iloc[va_idx]

        model.fit(tr_x, tr_y)
        #model.fit(tr_x, tr_y)
        oof_preds[va_idx] = model.predict_proba(va_x)[:,-1]

        test_preds += model.predict_proba(tmp_test)[:,-1] / NFOLDS
    
    return oof_preds, test_preds

In [None]:
if ENSEMBLE:
    oof_dict = read_pred('/content/drive/MyDrive/Kaggle/Tabular_Playground_Series_-_Mar_2021/Data/OOF')
    test_dict = read_pred('/content/drive/MyDrive/Kaggle/Tabular_Playground_Series_-_Mar_2021/Data/TEST')

In [None]:
if ENSEMBLE:
    for key in oof_dict.keys():
        print('{0} : {1}'.format(key,roc_auc_score(target, oof_dict[key])))

    print('----------')
    oof_mean = mean_pred(oof_dict)
    test_mean = mean_pred(test_dict)
    print('mean : {0}'.format(roc_auc_score(target, oof_mean)))

    print('----------')
    oof_stack, test_stack = stack_pred(oof_dict, test_dict)
    print('stack : {0}'.format(roc_auc_score(target, oof_stack)))

In [None]:
if ENSEMBLE:
    submission['target'] = test_mean
    submission.to_csv('submission_mean.csv')
    submission['target'] = test_stack
    submission.to_csv('submission_stack.csv')

**---------- END ----------**

In [None]:
if FEATURES_SEARCH:
    LOG_PATH = '/content/drive/MyDrive/Kaggle/Tabular_Playground_Series_-_Mar_2021/Result/' + datetime.now(pytz.timezone('Asia/Tokyo')).strftime("%Y%m%d_%H:%M") + '_FEATURES_SEARCH'
elif ENSEMBLE:
    LOG_PATH = '/content/drive/MyDrive/Kaggle/Tabular_Playground_Series_-_Mar_2021/Result/' + datetime.now(pytz.timezone('Asia/Tokyo')).strftime("%Y%m%d_%H:%M") + 'ENSEMBLE'
elif RANDOM_SEEDS_PREDICT:
    LOG_PATH = '/content/drive/MyDrive/Kaggle/Tabular_Playground_Series_-_Mar_2021/Result/' + datetime.now(pytz.timezone('Asia/Tokyo')).strftime("%Y%m%d_%H:%M") + '_' + model.__class__.__name__ + '_RANDOM_SEEDS:' + str(RANDOM_SEEDS)
else:
    LOG_PATH = '/content/drive/MyDrive/Kaggle/Tabular_Playground_Series_-_Mar_2021/Result/' + datetime.now(pytz.timezone('Asia/Tokyo')).strftime("%Y%m%d_%H:%M") + '_' + model.__class__.__name__ + '_NFOLDS:' + str(NFOLDS)
!mkdir $LOG_PATH
!cp /content/log.log $LOG_PATH
!cp /content/result.png $LOG_PATH
!cp /content/submission.csv $LOG_PATH
!cp /content/submission_mean.csv $LOG_PATH
!cp /content/submission_stack.csv $LOG_PATH
!cp /content/plot_optimization_history.html $LOG_PATH
!cp /content/plot_parallel_coordinate.html $LOG_PATH
!cp /content/plot_slice.html $LOG_PATH
!cp /content/plot_param_importances.html $LOG_PATH
!cp /content/plot_contour.html $LOG_PATH
!cp /content/best_params.json $LOG_PATH
!cp /content/FEATURES_SEARCH.csv $LOG_PATH
!cp /content/FEATURES_SEARCH.png $LOG_PATH
!cp /content/train.csv $LOG_PATH
!cp /content/test.csv $LOG_PATH
!cp /content/target.csv $LOG_PATH
!cp -r /content/TEST $LOG_PATH
!cp -r /content/OOF $LOG_PATH
#LOG_PATH = LOG_PATH.replace('\\','')