# Strategy

- Preprocessing
    - RankGauss
    - PCA + Existing Features
    - Variance Encoding
- Model
    - Multi head ResNet
    - TabNet (pytorch)
- Learning
    - Pre-train with non-scored label + public test pseudo label
    - Optimizer: AdamW with weight_decay
    - Label smoothing
- Prediction
    - Ensemble above with weight optimization
    - With clipping

# Library

In [1]:
import warnings

warnings.filterwarnings("ignore")

In [2]:
import sys

sys.path.append("../input/iterative-stratification/iterative-stratification-master")
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

sys.path.append("../input/autograd")
import autograd.numpy as np
from autograd import grad

sys.path.append("../input/pytorchtabnet")
from pytorch_tabnet.metrics import Metric
from pytorch_tabnet.tab_model import TabNetRegressor

In [3]:
import datetime
import gc
import os
import random
from time import time

import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
import tensorflow_addons as tfa
import torch
import torch.nn.functional as F
import torch.optim as optim
from scipy.optimize import fsolve, minimize
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import QuantileTransformer
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from torch import nn
from torch.optim.lr_scheduler import ReduceLROnPlateau as torch_ReduceLROnPlateau

In [4]:
# import numpy as np
import optuna

In [5]:
MIXED_PRECISION = False
XLA_ACCELERATE = True

if MIXED_PRECISION:
    from tensorflow.keras.mixed_precision import experimental as mixed_precision

    if tpu:
        policy = tf.keras.mixed_precision.experimental.Policy("mixed_bfloat16")
    else:
        policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16")
    mixed_precision.set_policy(policy)
    print("Mixed precision enabled")

if XLA_ACCELERATE:
    tf.config.optimizer.set_jit(True)
    print("Accelerated Linear Algebra enabled")

Accelerated Linear Algebra enabled


In [6]:
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# Functions

In [7]:
def fix_seed(seed=2020):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False


random_seed = 22
fix_seed(random_seed)

In [8]:
# https://www.kaggle.com/c/lish-moa/discussion/189857#1043953

# Prediction Clipping Thresholds
p_min = 0.001
p_max = 0.999

# Evaluation Metric with clipping and no label smoothing
def logloss(y_true, y_pred):
    # y_pred = tf.clip_by_value(y_pred, p_min, p_max)
    return -K.mean(y_true * K.log(y_pred) + (1 - y_true) * K.log(1 - y_pred))

In [9]:
# [Fast Numpy Log Loss] https://www.kaggle.com/gogo827jz/optimise-blending-weights-4-5x-faster-log-loss
def metric(y_true, y_pred):
    loss = 0
    y_pred_clip = np.clip(y_pred, 1e-7, 1 - 1e-7)
    for i in range(y_pred.shape[1]):
        loss += -np.mean(y_true[:, i] * np.log(y_pred_clip[:, i]) + (1 - y_true[:, i]) * np.log(1 - y_pred_clip[:, i]))
    return loss / y_pred.shape[1]

In [10]:
def blend(size, weights, oof):
    blend_ = np.zeros(size)
    for i, key in enumerate(oof.keys()):
        blend_ += weights[i] * oof[key].values
    return blend_

In [11]:
class LogitsLogLoss(Metric):
    """
    LogLoss with sigmoid applied
    """

    def __init__(self):
        self._name = "logits_ll"
        self._maximize = False

    def __call__(self, y_true, y_pred):
        """
        Compute LogLoss of predictions.

        Parameters
        ----------
        y_true: np.ndarray
            Target matrix or vector
        y_score: np.ndarray
            Score matrix or vector

        Returns
        -------
            float
            LogLoss of predictions vs targets.
        """
        logits = 1 / (1 + np.exp(-y_pred))
        aux = (1 - y_true) * np.log(1 - logits + 1e-15) + y_true * np.log(logits + 1e-15)
        return np.mean(-aux)

# Load Data

In [12]:
train_df = pd.read_csv("../input/lish-moa/train_features.csv")
test_df = pd.read_csv("../input/lish-moa/test_features.csv")
target_df = pd.read_csv("../input/lish-moa/train_targets_scored.csv")
non_target_df = pd.read_csv("../input/lish-moa/train_targets_nonscored.csv")
submit_df = pd.read_csv("../input/lish-moa/sample_submission.csv")

pub_test_df = pd.read_csv("../input/moapublictest/test_features.csv")
pub_submit_df = pd.read_csv("../input/moablendblendblend/submission.csv")

In [13]:
train = train_df.copy()
test = test_df.copy()
ss = submit_df.copy()

pub_test = pub_test_df.copy()
pub_ss = pub_submit_df.copy()

In [14]:
train = pd.concat([train, pub_test])
target_df = pd.concat([target_df, pub_ss])

In [15]:
pub_ss

Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,...,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_0004d9e33,0.001630,0.001773,0.002477,0.013613,0.017482,0.003867,0.002386,0.006600,0.001153,...,0.001653,0.002699,0.004204,0.001432,0.001138,0.001603,0.001762,0.002517,0.008489,0.002085
1,id_001897cda,0.001443,0.002091,0.002192,0.002889,0.001552,0.002944,0.003545,0.009351,0.009536,...,0.001777,0.002289,0.003843,0.000953,0.012831,0.001640,0.005801,0.002020,0.003870,0.003743
2,id_002429b5b,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,id_00276f245,0.001691,0.001618,0.002519,0.011536,0.010732,0.004092,0.003335,0.003608,0.001158,...,0.001496,0.001826,0.003506,0.019467,0.006121,0.001640,0.002532,0.002263,0.001763,0.002498
4,id_0027f1083,0.001914,0.001948,0.002404,0.011865,0.016432,0.003054,0.004709,0.002078,0.001254,...,0.001585,0.001313,0.003165,0.002510,0.001409,0.001645,0.002604,0.002245,0.000730,0.001857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3977,id_ff7004b87,0.001377,0.001558,0.001461,0.003053,0.006736,0.002350,0.001215,0.003282,0.001440,...,0.001453,0.004201,0.002171,0.118851,0.007058,0.001545,0.005873,0.001801,0.002117,0.002088
3978,id_ff925dd0d,0.003097,0.002258,0.001451,0.007406,0.021323,0.005489,0.005110,0.004447,0.001555,...,0.001218,0.001697,0.002843,0.002327,0.002278,0.001539,0.002104,0.001877,0.001059,0.002098
3979,id_ffb710450,0.001730,0.001327,0.001455,0.009516,0.032527,0.004954,0.002966,0.004133,0.000729,...,0.001056,0.001165,0.002498,0.002348,0.001464,0.001172,0.001288,0.001693,0.001100,0.001546
3980,id_ffbb869f2,0.001697,0.001459,0.001544,0.019809,0.023506,0.004427,0.004441,0.002557,0.001029,...,0.001086,0.000706,0.002753,0.001079,0.001576,0.001093,0.001453,0.001938,0.000678,0.002631


# Preprocessing

In [16]:
train.loc[:, "cp_dose"] = train.loc[:, "cp_dose"].map({"D1": 0, "D2": 1})
test.loc[:, "cp_dose"] = test.loc[:, "cp_dose"].map({"D1": 0, "D2": 1})

In [17]:
train.loc[:, "cp_time"] = train.loc[:, "cp_time"].map({24: 0, 48: 1, 72: 2})
test.loc[:, "cp_time"] = test.loc[:, "cp_time"].map({24: 0, 48: 1, 72: 2})

## cp_type が ctrl_vehicle なものは MoA を持たない

ので、学習から除外する

In [18]:
target_df = target_df.loc[train["cp_type"] != "ctl_vehicle"].reset_index(drop=True)
non_target_df = non_target_df.loc[train[: train_df.shape[0]]["cp_type"] != "ctl_vehicle"].reset_index(drop=True)

train = train.loc[train["cp_type"] != "ctl_vehicle"].reset_index(drop=True)

In [19]:
train = train.drop("cp_type", axis=1)
test = test.drop("cp_type", axis=1)

In [20]:
del train["sig_id"]
del target_df["sig_id"]
del non_target_df["sig_id"]
del test["sig_id"]
del ss["sig_id"]

In [21]:
train

Unnamed: 0,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,...,c-90,c-91,c-92,c-93,c-94,c-95,c-96,c-97,c-98,c-99
0,0,0,1.0620,0.5577,-0.2479,-0.6208,-0.1944,-1.0120,-1.0220,-0.0326,...,0.2862,0.2584,0.8076,0.5523,-0.1912,0.6584,-0.3981,0.2139,0.3801,0.4176
1,2,0,0.0743,0.4087,0.2991,0.0604,1.0190,0.5207,0.2341,0.3372,...,-0.4265,0.7543,0.4708,0.0230,0.2957,0.4899,0.1522,0.1241,0.6077,0.7371
2,1,0,0.6280,0.5817,1.5540,-0.0764,-0.0323,1.2390,0.1715,0.2155,...,-0.7250,-0.6297,0.6103,0.0223,-1.3240,-0.3174,-0.6417,-0.2187,-1.4080,0.6931
3,1,0,-0.5138,-0.2491,-0.2656,0.5288,4.0620,-0.8095,-1.9590,0.1792,...,-2.0990,-0.6441,-5.6300,-1.3780,-0.8632,-1.2880,-1.6210,-0.8784,-0.3876,-0.8154
4,2,1,-0.3254,-0.4009,0.9700,0.6919,1.4180,-0.8244,-0.2800,-0.1498,...,0.0042,0.0048,0.6670,1.0690,0.5523,-0.3031,0.1094,0.2885,-0.3786,0.7125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25567,0,0,0.4571,-0.5743,3.3930,-0.6202,0.8557,1.6240,0.0640,-0.6316,...,-1.1790,-0.6422,-0.4367,0.0159,-0.6539,-0.4791,-1.2680,-1.1280,-0.4167,-0.6600
25568,0,0,-0.5885,-0.2548,2.5850,0.3456,0.4401,0.3107,-0.7437,-0.0143,...,0.0210,0.5780,-0.5888,0.8057,0.9312,1.2730,0.2614,-0.2790,-0.0131,-0.0934
25569,2,0,-0.3985,-0.1554,0.2677,-0.6813,0.0152,0.4791,-0.0166,0.7501,...,0.4418,0.9153,-0.1862,0.4049,0.9568,0.4666,0.0461,0.5888,-0.4205,-0.1504
25570,1,1,-1.0960,-1.7750,-0.3977,1.0160,-1.3350,-0.2207,-0.3611,-1.3020,...,0.3079,-0.4473,-0.8192,0.7785,0.3133,0.1286,-0.2618,0.5074,0.7430,-0.0484


In [22]:
print(train.shape)
print(target_df.shape)
print(non_target_df.shape)

print(test.shape)
print(ss.shape)

(25572, 874)
(25572, 206)
(21948, 402)
(3982, 874)
(3982, 206)


## Rank Gauss

https://www.kaggle.com/nayuts/moa-pytorch-nn-pca-rankgauss

連続値を特定の範囲の閉域に押し込めて、分布の偏りを解消する方法です。

In [23]:
g_cols = [col for col in train_df.columns if col.startswith("g-")]
c_cols = [col for col in train_df.columns if col.startswith("c-")]

for col in g_cols + c_cols:
    transformer = QuantileTransformer(n_quantiles=100, random_state=random_seed, output_distribution="normal")

    vec_len = len(train[col].values)
    vec_len_test = len(test[col].values)

    raw_vec = train[col].values.reshape(vec_len, 1)
    transformer.fit(raw_vec)

    train[col] = transformer.transform(raw_vec).reshape(1, vec_len)[0]
    test[col] = transformer.transform(test[col].values.reshape(vec_len_test, 1)).reshape(1, vec_len_test)[0]

In [24]:
train

Unnamed: 0,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,...,c-90,c-91,c-92,c-93,c-94,c-95,c-96,c-97,c-98,c-99
0,0,0,1.124260,0.896698,-0.436214,-0.965311,-0.287443,-1.016437,-1.360774,-0.045876,...,0.428869,0.384250,1.300482,0.879422,-0.206096,1.046155,-0.479268,0.339234,0.583214,0.696712
1,2,0,0.117451,0.667759,0.260124,0.097531,1.204172,0.692876,0.356691,0.559630,...,-0.499745,1.147297,0.728062,0.089253,0.453665,0.770909,0.226300,0.202945,0.955497,1.219730
2,1,0,0.777229,0.935347,1.414044,-0.113563,-0.025489,1.494313,0.277364,0.357917,...,-0.800373,-0.721883,0.960080,0.088259,-1.182700,-0.358059,-0.732238,-0.253014,-1.085791,1.140342
3,1,0,-0.749489,-0.299404,-0.459100,0.774708,2.344556,-0.856449,-2.323390,0.298781,...,-1.391931,-0.736149,-1.612415,-1.219207,-0.912980,-1.194806,-1.288428,-0.950502,-0.445204,-0.884754
4,2,1,-0.460555,-0.508226,0.959313,0.984009,1.451890,-0.867329,-0.342599,-0.234770,...,0.038727,0.021330,1.056779,1.734597,0.843756,-0.341198,0.169668,0.451146,-0.434772,1.174162
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25567,0,0,0.599819,-0.723658,2.333805,-0.964385,1.075072,1.786462,0.131113,-0.812649,...,-1.126325,-0.734273,-0.505298,0.079622,-0.730794,-0.547424,-1.172338,-1.131847,-0.476173,-0.740089
25568,0,0,-0.865375,-0.307222,1.988666,0.523322,0.644383,0.437283,-0.989775,-0.015526,...,0.062985,0.868007,-0.660578,1.300272,1.418983,2.041320,0.385704,-0.325379,0.004373,-0.059129
25569,2,0,-0.575003,-0.170529,0.221620,-1.053665,0.049099,0.641676,0.016354,1.208632,...,0.670012,1.400519,-0.220545,0.641140,1.461002,0.734826,0.081096,0.924382,-0.479982,-0.136455
25570,1,1,-1.635710,-1.976022,-0.636890,1.330359,-1.719197,-0.259467,-0.455103,-1.308468,...,0.463639,-0.527331,-0.857951,1.256633,0.481844,0.236350,-0.320390,0.796356,1.191530,-0.001347


## PCA features (+ Existing features)

既存のカラムは残したほうがいいのだろうか？？
→ このコンペでは残したほうがいい成績が出ている。

In [25]:
# g-
n_comp = 50

data = pd.concat([pd.DataFrame(train[g_cols]), pd.DataFrame(test[g_cols])])
data2 = PCA(n_components=n_comp, random_state=random_seed).fit_transform(data[g_cols])
train2 = data2[: train.shape[0]]
test2 = data2[-test.shape[0] :]

train2 = pd.DataFrame(train2, columns=[f"pca_G-{i}" for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f"pca_G-{i}" for i in range(n_comp)])

train = pd.concat((train, train2), axis=1)
test = pd.concat((test, test2), axis=1)

In [26]:
# c-
n_comp = 15

data = pd.concat([pd.DataFrame(train[c_cols]), pd.DataFrame(test[c_cols])])
data2 = PCA(n_components=n_comp, random_state=random_seed).fit_transform(data[c_cols])
train2 = data2[: train.shape[0]]
test2 = data2[-test.shape[0] :]

train2 = pd.DataFrame(train2, columns=[f"pca_C-{i}" for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f"pca_C-{i}" for i in range(n_comp)])

train = pd.concat((train, train2), axis=1)
test = pd.concat((test, test2), axis=1)

In [27]:
train

Unnamed: 0,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,...,pca_C-5,pca_C-6,pca_C-7,pca_C-8,pca_C-9,pca_C-10,pca_C-11,pca_C-12,pca_C-13,pca_C-14
0,0,0,1.124260,0.896698,-0.436214,-0.965311,-0.287443,-1.016437,-1.360774,-0.045876,...,1.084173,0.499893,0.361410,-0.060848,0.345115,0.430001,0.294952,0.457666,-1.104604,0.746927
1,2,0,0.117451,0.667759,0.260124,0.097531,1.204172,0.692876,0.356691,0.559630,...,-0.644772,-0.072663,0.691390,-0.915782,0.139468,1.039007,0.163256,-0.388631,-1.131308,-0.578330
2,1,0,0.777229,0.935347,1.414044,-0.113563,-0.025489,1.494313,0.277364,0.357917,...,1.014754,-0.962508,1.009455,-0.254046,-0.406054,0.674100,0.071775,0.290638,0.701243,-0.010055
3,1,0,-0.749489,-0.299404,-0.459100,0.774708,2.344556,-0.856449,-2.323390,0.298781,...,0.871548,0.699965,0.872746,-0.628334,0.962443,2.251942,1.342743,-0.423853,-0.559250,0.182839
4,2,1,-0.460555,-0.508226,0.959313,0.984009,1.451890,-0.867329,-0.342599,-0.234770,...,-0.245337,-0.583627,0.459293,0.373390,-0.289508,0.667770,-0.944482,0.211168,-0.347195,0.160099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25567,0,0,0.599819,-0.723658,2.333805,-0.964385,1.075072,1.786462,0.131113,-0.812649,...,1.145251,-0.052962,0.161124,0.193701,0.129756,0.576738,0.291642,-0.598477,-0.108096,1.183038
25568,0,0,-0.865375,-0.307222,1.988666,0.523322,0.644383,0.437283,-0.989775,-0.015526,...,-1.488059,0.039376,1.778791,-0.260386,0.152112,1.573422,0.499272,-1.666410,-0.593174,-1.713381
25569,2,0,-0.575003,-0.170529,0.221620,-1.053665,0.049099,0.641676,0.016354,1.208632,...,0.887606,1.007116,0.347269,0.376352,-1.302986,0.254448,0.505971,0.665237,-0.601036,-0.241065
25570,1,1,-1.635710,-1.976022,-0.636890,1.330359,-1.719197,-0.259467,-0.455103,-1.308468,...,0.258797,0.568508,-0.835311,-0.344547,0.878997,0.730981,-0.063122,-0.039643,0.344824,0.199732


In [28]:
train_pca = train.copy()
test_pca = test.copy()

train_pca.drop(g_cols, axis=1, inplace=True)
test_pca.drop(g_cols, axis=1, inplace=True)

train_pca.drop(c_cols, axis=1, inplace=True)
test_pca.drop(c_cols, axis=1, inplace=True)

In [29]:
train_pca

Unnamed: 0,cp_time,cp_dose,pca_G-0,pca_G-1,pca_G-2,pca_G-3,pca_G-4,pca_G-5,pca_G-6,pca_G-7,...,pca_C-5,pca_C-6,pca_C-7,pca_C-8,pca_C-9,pca_C-10,pca_C-11,pca_C-12,pca_C-13,pca_C-14
0,0,0,-5.778899,6.154613,8.561315,-7.442511,4.386002,1.258147,3.520685,1.828526,...,1.084173,0.499893,0.361410,-0.060848,0.345115,0.430001,0.294952,0.457666,-1.104604,0.746927
1,2,0,-5.035246,1.003536,-12.642795,4.682019,0.934481,0.017921,0.817860,-1.085419,...,-0.644772,-0.072663,0.691390,-0.915782,0.139468,1.039007,0.163256,-0.388631,-1.131308,-0.578330
2,1,0,0.849837,-8.534120,-2.961085,0.234691,0.712903,3.226471,-1.540530,3.543483,...,1.014754,-0.962508,1.009455,-0.254046,-0.406054,0.674100,0.071775,0.290638,0.701243,-0.010055
3,1,0,11.053726,-10.088315,-0.812731,-4.941979,-7.323094,-2.490876,-2.273711,6.357738,...,0.871548,0.699965,0.872746,-0.628334,0.962443,2.251942,1.342743,-0.423853,-0.559250,0.182839
4,2,1,-6.813030,-5.481174,-9.282727,-4.827295,-7.899419,-8.227711,-3.362621,-3.581453,...,-0.245337,-0.583627,0.459293,0.373390,-0.289508,0.667770,-0.944482,0.211168,-0.347195,0.160099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25567,0,0,3.034423,-2.604996,0.378983,1.080481,4.262611,2.492815,3.584576,-0.012864,...,1.145251,-0.052962,0.161124,0.193701,0.129756,0.576738,0.291642,-0.598477,-0.108096,1.183038
25568,0,0,-7.989301,-0.778425,-4.860383,0.376690,-1.113370,-2.287973,-5.796794,1.580867,...,-1.488059,0.039376,1.778791,-0.260386,0.152112,1.573422,0.499272,-1.666410,-0.593174,-1.713381
25569,2,0,-6.872688,6.782780,1.654480,-7.876308,1.163434,2.100182,4.330693,-0.996572,...,0.887606,1.007116,0.347269,0.376352,-1.302986,0.254448,0.505971,0.665237,-0.601036,-0.241065
25570,1,1,-1.134083,-9.890526,11.790893,7.032540,2.695275,-2.669482,2.486436,-0.267855,...,0.258797,0.568508,-0.835311,-0.344547,0.878997,0.730981,-0.063122,-0.039643,0.344824,0.199732


## feature Selection using Variance Encoding

分散がしきい値以下の特徴量を捨てます。

In [30]:
var_threshold = 0.5

data = train.append(test)
ve_columns = (data.iloc[:, 2:].var() >= var_threshold).values
ve_data = data.iloc[:, 2:].loc[:, ve_columns]

ve_train = ve_data[: train.shape[0]]
ve_test = ve_data[-test.shape[0] :]


train = pd.DataFrame(train[["cp_time", "cp_dose"]].values.reshape(-1, 2), columns=["cp_time", "cp_dose"])
train = pd.concat([train, ve_train], axis=1)


test = pd.DataFrame(test[["cp_time", "cp_dose"]].values.reshape(-1, 2), columns=["cp_time", "cp_dose"])
test = pd.concat([test, ve_test], axis=1)

In [31]:
train

Unnamed: 0,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,...,pca_C-5,pca_C-6,pca_C-7,pca_C-8,pca_C-9,pca_C-10,pca_C-11,pca_C-12,pca_C-13,pca_C-14
0,0,0,1.124260,0.896698,-0.436214,-0.965311,-0.287443,-1.016437,-1.360774,-0.045876,...,1.084173,0.499893,0.361410,-0.060848,0.345115,0.430001,0.294952,0.457666,-1.104604,0.746927
1,2,0,0.117451,0.667759,0.260124,0.097531,1.204172,0.692876,0.356691,0.559630,...,-0.644772,-0.072663,0.691390,-0.915782,0.139468,1.039007,0.163256,-0.388631,-1.131308,-0.578330
2,1,0,0.777229,0.935347,1.414044,-0.113563,-0.025489,1.494313,0.277364,0.357917,...,1.014754,-0.962508,1.009455,-0.254046,-0.406054,0.674100,0.071775,0.290638,0.701243,-0.010055
3,1,0,-0.749489,-0.299404,-0.459100,0.774708,2.344556,-0.856449,-2.323390,0.298781,...,0.871548,0.699965,0.872746,-0.628334,0.962443,2.251942,1.342743,-0.423853,-0.559250,0.182839
4,2,1,-0.460555,-0.508226,0.959313,0.984009,1.451890,-0.867329,-0.342599,-0.234770,...,-0.245337,-0.583627,0.459293,0.373390,-0.289508,0.667770,-0.944482,0.211168,-0.347195,0.160099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25567,0,0,0.599819,-0.723658,2.333805,-0.964385,1.075072,1.786462,0.131113,-0.812649,...,1.145251,-0.052962,0.161124,0.193701,0.129756,0.576738,0.291642,-0.598477,-0.108096,1.183038
25568,0,0,-0.865375,-0.307222,1.988666,0.523322,0.644383,0.437283,-0.989775,-0.015526,...,-1.488059,0.039376,1.778791,-0.260386,0.152112,1.573422,0.499272,-1.666410,-0.593174,-1.713381
25569,2,0,-0.575003,-0.170529,0.221620,-1.053665,0.049099,0.641676,0.016354,1.208632,...,0.887606,1.007116,0.347269,0.376352,-1.302986,0.254448,0.505971,0.665237,-0.601036,-0.241065
25570,1,1,-1.635710,-1.976022,-0.636890,1.330359,-1.719197,-0.259467,-0.455103,-1.308468,...,0.258797,0.568508,-0.835311,-0.344547,0.878997,0.730981,-0.063122,-0.039643,0.344824,0.199732


# Create Model - Multi input ResNet

https://www.kaggle.com/rahulsd91/moa-multi-input-resnet-model

In [32]:
def create_model_resnet(n_features, n_features_2, n_labels, n_hidden_layers, units, activations):
    input_1 = L.Input(shape=(n_features,), name="Input1")
    input_2 = L.Input(shape=(n_features_2,), name="Input2")

    head_1 = tf.keras.Sequential(
        [
            L.BatchNormalization(),
            L.Dropout(0.2),
            tfa.layers.WeightNormalization(L.Dense(units[-3], activation=activations[-3])),
            L.BatchNormalization(),
            L.Dropout(0.2),
            tfa.layers.WeightNormalization(L.Dense(units[n_hidden_layers-1], activations[-4])),
        ],
        name="Head1",
    )

    input_3 = head_1(input_1)
    input_3_concat = L.Concatenate()([input_2, input_3])

    layers_2 = []
    for i in range(n_hidden_layers):
        layers_2 += [
            L.BatchNormalization(),
            L.Dropout(0.2),
            tfa.layers.WeightNormalization(L.Dense(units[i], activation=activations[i])),
        ]
    head_2 = tf.keras.Sequential(layers_2, name="Head2")

    input_4 = head_2(input_3_concat)
    input_4_avg = L.Average()([input_3, input_4])

    head_3 = tf.keras.Sequential(
        [
            L.BatchNormalization(),
            tfa.layers.WeightNormalization(L.Dense(units[-2], activation=activations[-2])),
            L.BatchNormalization(),
            L.Dropout(0.2),
            tfa.layers.WeightNormalization(L.Dense(units[-1], activation=activations[-1])),
            L.BatchNormalization(),
            L.Dense(n_labels, activation="sigmoid"),
        ],
        name="Head3",
    )

    output = head_3(input_4_avg)

    model = tf.keras.models.Model(inputs=[input_1, input_2], outputs=output)

    return model

# Learning

In [33]:
models = ["ResNet"]

In [34]:
N_STARTS = len(models) * 3
N_SPLITS = 5

In [35]:
class Objective:
    def __init__(self):
        pass

    def __call__(self, trial):

        n_hidden_layers = trial.suggest_int("n_hidden_layers", 1, 5)

        units = []
        for i in range(n_hidden_layers+3):
            u = trial.suggest_categorical("units_{}".format(i + 1), [128, 256, 512, 1024])
            units.append(u)

        activations = []
        for i in range(n_hidden_layers+4):
            a = trial.suggest_categorical("activations_{}".format(i + 1), ["relu", "elu", "selu", "swish"])
            activations.append(a)

        oof = self.learning(train, train_pca, target_df, N_STARTS, N_SPLITS, n_hidden_layers, units, activations)

        initial_weights = [1.0 / N_STARTS for _ in range(N_STARTS)] + [1.0]
        blend_ = blend(target_df.values.shape, initial_weights[:-1], oof)

        return metric(target_df.values, blend_)

    def learning(self, train_, train_pca_, target, N_STARTS, N_SPLITS, n_hidden_layers, units, activations):
        oof = {}

        for seed in range(N_STARTS):
            model_name = models[seed % len(models)]

            seed_result = target.copy()
            seed_result.loc[:, target.columns] = 0

            kfold_seed = random_seed + seed
            fix_seed(kfold_seed)

            for n, (tr, te) in enumerate(
                MultilabelStratifiedKFold(n_splits=N_SPLITS, random_state=kfold_seed, shuffle=True).split(
                    target, target
                )
            ):
                start_time = time()

                # Build Model
                model = create_model_resnet(
                    len(train_.columns),
                    len(train_pca_.columns),
                    len(target.columns),
                    n_hidden_layers,
                    units,
                    activations,
                )

                # Build Data Sets
                x_tr = [
                    train_.values[tr],
                    train_pca_.values[tr],
                ]
                x_val = [
                    train_.values[te],
                    train_pca_.values[te],
                ]
                y_tr, y_val = target.astype(float).values[tr], target.astype(float).values[te]
                x_tt = [test.values, test_pca.values]

                model.compile(
                    optimizer=tfa.optimizers.AdamW(lr=1e-3, weight_decay=1e-5, clipvalue=756),
                    loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.001),
                    metrics=logloss,
                )

                reduce_lr_loss = ReduceLROnPlateau(
                    monitor="val_loss", factor=0.1, patience=3, verbose=0, min_delta=1e-4, mode="min"
                )
                early_stopping = EarlyStopping(
                    monitor="val_loss",
                    patience=10,
                    mode="min",
                    verbose=0,
                    min_delta=1e-4,
                    restore_best_weights=True,
                )
                model.fit(
                    x_tr,
                    y_tr,
                    validation_data=(x_val, y_val),
                    epochs=200,
                    batch_size=128,
                    callbacks=[reduce_lr_loss, early_stopping],
                    verbose=0,
                )

                val_predict = model.predict(x_val)
                seed_result.loc[te, target.columns] += val_predict
                fold_score = metric(target.loc[te].values, val_predict)

                print(
                    f"[{str(datetime.timedelta(seconds = time() - start_time))[2:7]}] {model_name}: Seed {seed}, Fold {n}:",
                    fold_score,
                )

                K.clear_session()
                del model
                x = gc.collect()

            oof[f"{model_name}_{seed}"] = seed_result

        return oof

    def callback(self, study, trial):
        pass

In [36]:
objective = Objective()
study = optuna.create_study()
study.optimize(objective, n_trials=20, callbacks=[objective.callback])

[32m[I 2020-10-31 08:55:10,726][0m A new study created in memory with name: no-name-f64878c6-1f78-4e73-8eb1-41d1c89fd2a9[0m


[01:23] ResNet: Seed 0, Fold 0: 0.01730022019173251
[01:09] ResNet: Seed 0, Fold 1: 0.01681460064871317
[01:07] ResNet: Seed 0, Fold 2: 0.016784308497728193
[01:08] ResNet: Seed 0, Fold 3: 0.016724396058600485
[01:06] ResNet: Seed 0, Fold 4: 0.016770496849910603
[01:08] ResNet: Seed 1, Fold 0: 0.016756004941455058
[01:09] ResNet: Seed 1, Fold 1: 0.016832563287779812
[01:01] ResNet: Seed 1, Fold 2: 0.017611661430153854
[01:07] ResNet: Seed 1, Fold 3: 0.0168615114025889
[01:12] ResNet: Seed 1, Fold 4: 0.016966599116337962
[01:04] ResNet: Seed 2, Fold 0: 0.01694784974224378
[01:08] ResNet: Seed 2, Fold 1: 0.016835740054707008
[01:05] ResNet: Seed 2, Fold 2: 0.017302794677504507
[01:06] ResNet: Seed 2, Fold 3: 0.01680655904371106
[01:06] ResNet: Seed 2, Fold 4: 0.016779961425711986


[32m[I 2020-10-31 09:12:32,547][0m Trial 0 finished with value: 0.0165680621268295 and parameters: {'n_hidden_layers': 2, 'units_1': 512, 'units_2': 128, 'units_3': 1024, 'units_4': 128, 'units_5': 512, 'activations_1': 'elu', 'activations_2': 'elu', 'activations_3': 'relu', 'activations_4': 'swish', 'activations_5': 'swish', 'activations_6': 'elu'}. Best is trial 0 with value: 0.0165680621268295.[0m


[01:38] ResNet: Seed 0, Fold 0: 0.017133624115439387
[01:34] ResNet: Seed 0, Fold 1: 0.016609749141983494
[01:29] ResNet: Seed 0, Fold 2: 0.01667143201773444
[01:33] ResNet: Seed 0, Fold 3: 0.01642637025346827
[01:30] ResNet: Seed 0, Fold 4: 0.01663630427625837
[01:32] ResNet: Seed 1, Fold 0: 0.01647397006573892
[01:34] ResNet: Seed 1, Fold 1: 0.01653634142373785
[01:28] ResNet: Seed 1, Fold 2: 0.017237629300248945
[01:30] ResNet: Seed 1, Fold 3: 0.016623838284831518
[01:25] ResNet: Seed 1, Fold 4: 0.016672848160936112
[01:31] ResNet: Seed 2, Fold 0: 0.016472210811287115
[01:28] ResNet: Seed 2, Fold 1: 0.016566723061294836
[01:29] ResNet: Seed 2, Fold 2: 0.017126487405195904
[01:33] ResNet: Seed 2, Fold 3: 0.016683912273766855
[01:34] ResNet: Seed 2, Fold 4: 0.01662873918127983


[32m[I 2020-10-31 09:35:43,085][0m Trial 1 finished with value: 0.016323335282783843 and parameters: {'n_hidden_layers': 3, 'units_1': 128, 'units_2': 256, 'units_3': 1024, 'units_4': 256, 'units_5': 1024, 'units_6': 256, 'activations_1': 'swish', 'activations_2': 'elu', 'activations_3': 'swish', 'activations_4': 'elu', 'activations_5': 'selu', 'activations_6': 'swish', 'activations_7': 'relu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:24] ResNet: Seed 0, Fold 0: 0.017262204346072202
[01:14] ResNet: Seed 0, Fold 1: 0.016660318833465883
[01:10] ResNet: Seed 0, Fold 2: 0.016733480706985826
[01:06] ResNet: Seed 0, Fold 3: 0.01669981207790374
[01:13] ResNet: Seed 0, Fold 4: 0.01672013370875611
[01:09] ResNet: Seed 1, Fold 0: 0.01663912841856917
[01:15] ResNet: Seed 1, Fold 1: 0.016760274121862186
[01:13] ResNet: Seed 1, Fold 2: 0.017302723335616287
[01:13] ResNet: Seed 1, Fold 3: 0.016740569578668745
[01:11] ResNet: Seed 1, Fold 4: 0.016728553871909156
[01:11] ResNet: Seed 2, Fold 0: 0.016632399102371177
[01:10] ResNet: Seed 2, Fold 1: 0.016689677303258776
[01:13] ResNet: Seed 2, Fold 2: 0.017238362921037677
[01:10] ResNet: Seed 2, Fold 3: 0.01675953214299775
[01:08] ResNet: Seed 2, Fold 4: 0.016732467541726536


[32m[I 2020-10-31 09:54:09,787][0m Trial 2 finished with value: 0.016482014492844622 and parameters: {'n_hidden_layers': 2, 'units_1': 256, 'units_2': 128, 'units_3': 128, 'units_4': 1024, 'units_5': 256, 'activations_1': 'elu', 'activations_2': 'swish', 'activations_3': 'selu', 'activations_4': 'swish', 'activations_5': 'elu', 'activations_6': 'elu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:25] ResNet: Seed 0, Fold 0: 0.01718753312794185
[01:18] ResNet: Seed 0, Fold 1: 0.01667955553044776
[01:12] ResNet: Seed 0, Fold 2: 0.016784387698660246
[01:15] ResNet: Seed 0, Fold 3: 0.01655850681895706
[01:14] ResNet: Seed 0, Fold 4: 0.01680578866116879
[01:15] ResNet: Seed 1, Fold 0: 0.01659196876012878
[01:14] ResNet: Seed 1, Fold 1: 0.016688144139891524
[01:07] ResNet: Seed 1, Fold 2: 0.017443433588641317
[01:16] ResNet: Seed 1, Fold 3: 0.01660799109315626
[01:13] ResNet: Seed 1, Fold 4: 0.016756487380942188
[01:15] ResNet: Seed 2, Fold 0: 0.016599397716543146
[01:10] ResNet: Seed 2, Fold 1: 0.016719855363016848
[01:22] ResNet: Seed 2, Fold 2: 0.01720362692774493
[01:10] ResNet: Seed 2, Fold 3: 0.01693645311725194
[01:20] ResNet: Seed 2, Fold 4: 0.016675352774850574


[32m[I 2020-10-31 10:13:23,098][0m Trial 3 finished with value: 0.016535273101180442 and parameters: {'n_hidden_layers': 1, 'units_1': 256, 'units_2': 128, 'units_3': 128, 'units_4': 128, 'activations_1': 'swish', 'activations_2': 'elu', 'activations_3': 'elu', 'activations_4': 'relu', 'activations_5': 'relu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:23] ResNet: Seed 0, Fold 0: 0.01761054348840397
[01:20] ResNet: Seed 0, Fold 1: 0.016779955618811675
[01:14] ResNet: Seed 0, Fold 2: 0.016851338714705733
[01:18] ResNet: Seed 0, Fold 3: 0.0167247008231166
[01:15] ResNet: Seed 0, Fold 4: 0.01679194905376042
[01:15] ResNet: Seed 1, Fold 0: 0.01688795774753394
[01:17] ResNet: Seed 1, Fold 1: 0.016834610391840976
[01:08] ResNet: Seed 1, Fold 2: 0.017589410477117498
[01:18] ResNet: Seed 1, Fold 3: 0.016750641216720413
[01:18] ResNet: Seed 1, Fold 4: 0.016953325122196904
[01:17] ResNet: Seed 2, Fold 0: 0.016738169574485155
[01:17] ResNet: Seed 2, Fold 1: 0.016856567750490818
[01:14] ResNet: Seed 2, Fold 2: 0.017324567977040368
[01:07] ResNet: Seed 2, Fold 3: 0.017128338276017542
[01:17] ResNet: Seed 2, Fold 4: 0.016743601493369516


[32m[I 2020-10-31 10:32:54,058][0m Trial 4 finished with value: 0.016552645478640837 and parameters: {'n_hidden_layers': 3, 'units_1': 256, 'units_2': 128, 'units_3': 128, 'units_4': 1024, 'units_5': 256, 'units_6': 512, 'activations_1': 'elu', 'activations_2': 'selu', 'activations_3': 'swish', 'activations_4': 'swish', 'activations_5': 'swish', 'activations_6': 'relu', 'activations_7': 'elu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:45] ResNet: Seed 0, Fold 0: 0.017348932695919743
[01:43] ResNet: Seed 0, Fold 1: 0.016775234465020303
[01:34] ResNet: Seed 0, Fold 2: 0.01679161832957348
[01:31] ResNet: Seed 0, Fold 3: 0.016652900666759035
[01:38] ResNet: Seed 0, Fold 4: 0.01686273147821573
[01:33] ResNet: Seed 1, Fold 0: 0.016762710194026437
[01:29] ResNet: Seed 1, Fold 1: 0.01676698654384593
[01:25] ResNet: Seed 1, Fold 2: 0.01734706404319093
[01:35] ResNet: Seed 1, Fold 3: 0.01675072601061554
[01:29] ResNet: Seed 1, Fold 4: 0.016974573868847896
[01:24] ResNet: Seed 2, Fold 0: 0.01681888848829135
[01:31] ResNet: Seed 2, Fold 1: 0.016830457219760262
[01:25] ResNet: Seed 2, Fold 2: 0.01730954510739196
[01:30] ResNet: Seed 2, Fold 3: 0.016865646388094544
[01:29] ResNet: Seed 2, Fold 4: 0.01677583523105907


[32m[I 2020-10-31 10:56:30,054][0m Trial 5 finished with value: 0.016564624999915456 and parameters: {'n_hidden_layers': 3, 'units_1': 128, 'units_2': 128, 'units_3': 1024, 'units_4': 256, 'units_5': 512, 'units_6': 128, 'activations_1': 'relu', 'activations_2': 'swish', 'activations_3': 'elu', 'activations_4': 'elu', 'activations_5': 'swish', 'activations_6': 'relu', 'activations_7': 'swish'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:30] ResNet: Seed 0, Fold 0: 0.017101170691003354
[01:18] ResNet: Seed 0, Fold 1: 0.016592512415769416
[01:12] ResNet: Seed 0, Fold 2: 0.016710413343377795
[01:07] ResNet: Seed 0, Fold 3: 0.01657838657061678
[01:17] ResNet: Seed 0, Fold 4: 0.016746487663622064
[01:12] ResNet: Seed 1, Fold 0: 0.016491050821812733
[01:17] ResNet: Seed 1, Fold 1: 0.016624916431167782
[01:09] ResNet: Seed 1, Fold 2: 0.01720785325720468
[01:13] ResNet: Seed 1, Fold 3: 0.01667426160712216
[01:11] ResNet: Seed 1, Fold 4: 0.016752897339232842
[01:09] ResNet: Seed 2, Fold 0: 0.016570346629375544
[01:14] ResNet: Seed 2, Fold 1: 0.016657312878499256
[01:14] ResNet: Seed 2, Fold 2: 0.01714746152445634
[01:12] ResNet: Seed 2, Fold 3: 0.016686265784299954
[01:06] ResNet: Seed 2, Fold 4: 0.016716945006540078


[32m[I 2020-10-31 11:15:28,232][0m Trial 6 finished with value: 0.016378617444859744 and parameters: {'n_hidden_layers': 2, 'units_1': 128, 'units_2': 256, 'units_3': 512, 'units_4': 256, 'units_5': 512, 'activations_1': 'swish', 'activations_2': 'elu', 'activations_3': 'elu', 'activations_4': 'elu', 'activations_5': 'elu', 'activations_6': 'relu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:34] ResNet: Seed 0, Fold 0: 0.01725156786259958
[01:19] ResNet: Seed 0, Fold 1: 0.0166940037702684
[01:19] ResNet: Seed 0, Fold 2: 0.016728000716818826
[01:13] ResNet: Seed 0, Fold 3: 0.016541560585966463
[01:12] ResNet: Seed 0, Fold 4: 0.01674891074662216
[01:14] ResNet: Seed 1, Fold 0: 0.016540973649561617
[01:08] ResNet: Seed 1, Fold 1: 0.016838081419629305
[01:14] ResNet: Seed 1, Fold 2: 0.017229560760112627
[01:12] ResNet: Seed 1, Fold 3: 0.016649639737880337
[01:16] ResNet: Seed 1, Fold 4: 0.016678952283323965
[01:19] ResNet: Seed 2, Fold 0: 0.01666477237904597
[01:10] ResNet: Seed 2, Fold 1: 0.0167312819371661
[01:12] ResNet: Seed 2, Fold 2: 0.01718568781203177
[01:20] ResNet: Seed 2, Fold 3: 0.016698914250358587
[01:14] ResNet: Seed 2, Fold 4: 0.016681446575185048


[32m[I 2020-10-31 11:35:04,490][0m Trial 7 finished with value: 0.016443552702792873 and parameters: {'n_hidden_layers': 2, 'units_1': 256, 'units_2': 128, 'units_3': 256, 'units_4': 256, 'units_5': 256, 'activations_1': 'relu', 'activations_2': 'elu', 'activations_3': 'selu', 'activations_4': 'elu', 'activations_5': 'selu', 'activations_6': 'swish'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:16] ResNet: Seed 0, Fold 0: 0.017631193030835532
[01:07] ResNet: Seed 0, Fold 1: 0.017011390243239347
[00:59] ResNet: Seed 0, Fold 2: 0.017173962913573333
[01:04] ResNet: Seed 0, Fold 3: 0.016903782721656845
[00:56] ResNet: Seed 0, Fold 4: 0.0172118154214145
[00:56] ResNet: Seed 1, Fold 0: 0.017121048063968903
[01:04] ResNet: Seed 1, Fold 1: 0.01708576310826191
[00:56] ResNet: Seed 1, Fold 2: 0.017784869950649873
[01:02] ResNet: Seed 1, Fold 3: 0.017083207165281576
[00:55] ResNet: Seed 1, Fold 4: 0.017265415402257813
[01:04] ResNet: Seed 2, Fold 0: 0.016949202122108865
[00:54] ResNet: Seed 2, Fold 1: 0.017188996976641342
[01:00] ResNet: Seed 2, Fold 2: 0.01755004274393323
[00:55] ResNet: Seed 2, Fold 3: 0.01731805894156901
[00:55] ResNet: Seed 2, Fold 4: 0.01718150979408831


[32m[I 2020-10-31 11:50:49,422][0m Trial 8 finished with value: 0.016816713738984707 and parameters: {'n_hidden_layers': 1, 'units_1': 512, 'units_2': 1024, 'units_3': 256, 'units_4': 128, 'activations_1': 'selu', 'activations_2': 'elu', 'activations_3': 'swish', 'activations_4': 'swish', 'activations_5': 'selu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:55] ResNet: Seed 0, Fold 0: 0.017418283695848982
[01:32] ResNet: Seed 0, Fold 1: 0.016790467320434207
[01:23] ResNet: Seed 0, Fold 2: 0.01690344367726721
[01:29] ResNet: Seed 0, Fold 3: 0.016703028124529125
[01:25] ResNet: Seed 0, Fold 4: 0.016912842952338906
[01:27] ResNet: Seed 1, Fold 0: 0.016752600997785402
[01:31] ResNet: Seed 1, Fold 1: 0.016786333946299095
[01:32] ResNet: Seed 1, Fold 2: 0.01748794678690785
[01:28] ResNet: Seed 1, Fold 3: 0.01682114116727033
[01:35] ResNet: Seed 1, Fold 4: 0.016886656399987328
[01:28] ResNet: Seed 2, Fold 0: 0.016659469246819946
[01:31] ResNet: Seed 2, Fold 1: 0.01678890926136279
[01:26] ResNet: Seed 2, Fold 2: 0.017342038187975695
[01:23] ResNet: Seed 2, Fold 3: 0.016996904799924394
[01:30] ResNet: Seed 2, Fold 4: 0.016852718866756104


[32m[I 2020-10-31 12:14:09,032][0m Trial 9 finished with value: 0.016597489795954674 and parameters: {'n_hidden_layers': 3, 'units_1': 1024, 'units_2': 512, 'units_3': 256, 'units_4': 256, 'units_5': 128, 'units_6': 256, 'activations_1': 'selu', 'activations_2': 'selu', 'activations_3': 'swish', 'activations_4': 'relu', 'activations_5': 'relu', 'activations_6': 'elu', 'activations_7': 'relu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[02:24] ResNet: Seed 0, Fold 0: 0.017190203795715536
[02:03] ResNet: Seed 0, Fold 1: 0.01664621080655165
[02:09] ResNet: Seed 0, Fold 2: 0.0167324969161353
[02:10] ResNet: Seed 0, Fold 3: 0.01658679016119747
[02:05] ResNet: Seed 0, Fold 4: 0.016775657494753942
[02:00] ResNet: Seed 1, Fold 0: 0.016559371818926822
[02:10] ResNet: Seed 1, Fold 1: 0.01666818695765822
[02:04] ResNet: Seed 1, Fold 2: 0.017218274019778674
[02:00] ResNet: Seed 1, Fold 3: 0.016636365312732836
[02:09] ResNet: Seed 1, Fold 4: 0.016809881049648985
[02:10] ResNet: Seed 2, Fold 0: 0.016525923310433205
[02:00] ResNet: Seed 2, Fold 1: 0.016735179948163197
[01:57] ResNet: Seed 2, Fold 2: 0.017245479516848017
[02:11] ResNet: Seed 2, Fold 3: 0.01678110110156144
[02:05] ResNet: Seed 2, Fold 4: 0.016681472432402013


[32m[I 2020-10-31 12:46:37,469][0m Trial 10 finished with value: 0.016394732200978514 and parameters: {'n_hidden_layers': 5, 'units_1': 128, 'units_2': 256, 'units_3': 1024, 'units_4': 512, 'units_5': 1024, 'units_6': 1024, 'units_7': 1024, 'units_8': 128, 'activations_1': 'swish', 'activations_2': 'relu', 'activations_3': 'swish', 'activations_4': 'selu', 'activations_5': 'selu', 'activations_6': 'selu', 'activations_7': 'selu', 'activations_8': 'swish', 'activations_9': 'swish'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:40] ResNet: Seed 0, Fold 0: 0.01756693016359978
[01:29] ResNet: Seed 0, Fold 1: 0.01699347996011508
[01:21] ResNet: Seed 0, Fold 2: 0.0170032947301366
[01:21] ResNet: Seed 0, Fold 3: 0.01691416132471358
[01:25] ResNet: Seed 0, Fold 4: 0.016839599800237564
[01:28] ResNet: Seed 1, Fold 0: 0.01670968444661724
[01:28] ResNet: Seed 1, Fold 1: 0.016817811773931046
[01:17] ResNet: Seed 1, Fold 2: 0.017621927032579667
[01:26] ResNet: Seed 1, Fold 3: 0.016763948694505013
[01:30] ResNet: Seed 1, Fold 4: 0.016943558159020242
[01:19] ResNet: Seed 2, Fold 0: 0.016885204769050043
[01:26] ResNet: Seed 2, Fold 1: 0.016851057157961327
[01:22] ResNet: Seed 2, Fold 2: 0.01749654122186336
[01:28] ResNet: Seed 2, Fold 3: 0.016995001404957077
[01:29] ResNet: Seed 2, Fold 4: 0.01686569926804358


[32m[I 2020-10-31 13:09:00,400][0m Trial 11 finished with value: 0.01662358796805814 and parameters: {'n_hidden_layers': 4, 'units_1': 128, 'units_2': 256, 'units_3': 512, 'units_4': 256, 'units_5': 1024, 'units_6': 256, 'units_7': 256, 'activations_1': 'swish', 'activations_2': 'elu', 'activations_3': 'elu', 'activations_4': 'elu', 'activations_5': 'elu', 'activations_6': 'swish', 'activations_7': 'relu', 'activations_8': 'selu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:52] ResNet: Seed 0, Fold 0: 0.01737310851605147
[01:37] ResNet: Seed 0, Fold 1: 0.016866644373192824
[01:38] ResNet: Seed 0, Fold 2: 0.016852684742988584
[01:38] ResNet: Seed 0, Fold 3: 0.016757373318756767
[01:38] ResNet: Seed 0, Fold 4: 0.016911192810455257
[01:46] ResNet: Seed 1, Fold 0: 0.016739224466639974
[01:35] ResNet: Seed 1, Fold 1: 0.01687585439473281
[01:37] ResNet: Seed 1, Fold 2: 0.017502318382594643
[01:37] ResNet: Seed 1, Fold 3: 0.017002699545630887
[01:29] ResNet: Seed 1, Fold 4: 0.017058149506143584
[01:41] ResNet: Seed 2, Fold 0: 0.016794590780160018
[01:40] ResNet: Seed 2, Fold 1: 0.016809260699037803
[01:31] ResNet: Seed 2, Fold 2: 0.017446389241586056
[01:35] ResNet: Seed 2, Fold 3: 0.017019435407542728
[01:38] ResNet: Seed 2, Fold 4: 0.016817487361075092


[32m[I 2020-10-31 13:34:30,461][0m Trial 12 finished with value: 0.016661853908199686 and parameters: {'n_hidden_layers': 4, 'units_1': 128, 'units_2': 256, 'units_3': 512, 'units_4': 256, 'units_5': 512, 'units_6': 256, 'units_7': 128, 'activations_1': 'swish', 'activations_2': 'relu', 'activations_3': 'relu', 'activations_4': 'elu', 'activations_5': 'elu', 'activations_6': 'swish', 'activations_7': 'relu', 'activations_8': 'relu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:41] ResNet: Seed 0, Fold 0: 0.017418130062380358
[01:26] ResNet: Seed 0, Fold 1: 0.016765355710713116
[01:20] ResNet: Seed 0, Fold 2: 0.017105012473771778
[01:28] ResNet: Seed 0, Fold 3: 0.016749836991038245
[01:24] ResNet: Seed 0, Fold 4: 0.01680772215776534
[01:17] ResNet: Seed 1, Fold 0: 0.016891262323808606
[01:20] ResNet: Seed 1, Fold 1: 0.01678469197003917
[01:17] ResNet: Seed 1, Fold 2: 0.01755049534589903
[01:23] ResNet: Seed 1, Fold 3: 0.016836511184064793
[01:22] ResNet: Seed 1, Fold 4: 0.016877348854865103
[01:18] ResNet: Seed 2, Fold 0: 0.01702477727090411
[01:22] ResNet: Seed 2, Fold 1: 0.016829479186648375
[01:26] ResNet: Seed 2, Fold 2: 0.01727525156824187
[01:23] ResNet: Seed 2, Fold 3: 0.01675105686197688
[01:23] ResNet: Seed 2, Fold 4: 0.016803749704335284


[32m[I 2020-10-31 13:56:21,258][0m Trial 13 finished with value: 0.01655491225816323 and parameters: {'n_hidden_layers': 4, 'units_1': 128, 'units_2': 256, 'units_3': 512, 'units_4': 256, 'units_5': 1024, 'units_6': 256, 'units_7': 512, 'activations_1': 'swish', 'activations_2': 'elu', 'activations_3': 'elu', 'activations_4': 'elu', 'activations_5': 'selu', 'activations_6': 'relu', 'activations_7': 'swish', 'activations_8': 'elu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:22] ResNet: Seed 0, Fold 0: 0.01741980765767599
[01:19] ResNet: Seed 0, Fold 1: 0.016818692432287626
[01:12] ResNet: Seed 0, Fold 2: 0.01680123856886135
[01:14] ResNet: Seed 0, Fold 3: 0.016700614696275005
[01:16] ResNet: Seed 0, Fold 4: 0.016816110595094958
[01:07] ResNet: Seed 1, Fold 0: 0.01684227257043245
[01:11] ResNet: Seed 1, Fold 1: 0.016807667148949035
[01:08] ResNet: Seed 1, Fold 2: 0.017520369656638887
[01:15] ResNet: Seed 1, Fold 3: 0.01683561880400689
[01:10] ResNet: Seed 1, Fold 4: 0.01691570617952686
[01:15] ResNet: Seed 2, Fold 0: 0.016684482404250658
[01:14] ResNet: Seed 2, Fold 1: 0.01675420047074794
[01:05] ResNet: Seed 2, Fold 2: 0.017427878789549132
[01:10] ResNet: Seed 2, Fold 3: 0.016880473904883366
[01:10] ResNet: Seed 2, Fold 4: 0.016880242999148882


[32m[I 2020-10-31 14:15:30,706][0m Trial 14 finished with value: 0.01657719686912629 and parameters: {'n_hidden_layers': 2, 'units_1': 128, 'units_2': 256, 'units_3': 512, 'units_4': 512, 'units_5': 128, 'activations_1': 'swish', 'activations_2': 'elu', 'activations_3': 'swish', 'activations_4': 'elu', 'activations_5': 'elu', 'activations_6': 'selu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:37] ResNet: Seed 0, Fold 0: 0.017355934305548267
[01:21] ResNet: Seed 0, Fold 1: 0.016802520412887777
[01:15] ResNet: Seed 0, Fold 2: 0.016845186084827986
[01:22] ResNet: Seed 0, Fold 3: 0.01679307283060706
[01:14] ResNet: Seed 0, Fold 4: 0.016813992796028663
[01:10] ResNet: Seed 1, Fold 0: 0.016684334149662448
[01:08] ResNet: Seed 1, Fold 1: 0.01678681137422321
[01:11] ResNet: Seed 1, Fold 2: 0.017349919071495443
[01:11] ResNet: Seed 1, Fold 3: 0.016875083364737798
[01:05] ResNet: Seed 1, Fold 4: 0.017083641668037875
[01:14] ResNet: Seed 2, Fold 0: 0.01668875074898097
[01:13] ResNet: Seed 2, Fold 1: 0.016814558951525466
[01:17] ResNet: Seed 2, Fold 2: 0.017383071622587804
[01:11] ResNet: Seed 2, Fold 3: 0.01683728961277668
[01:11] ResNet: Seed 2, Fold 4: 0.016740146143544268


[32m[I 2020-10-31 14:35:14,190][0m Trial 15 finished with value: 0.01649159454862877 and parameters: {'n_hidden_layers': 1, 'units_1': 1024, 'units_2': 1024, 'units_3': 1024, 'units_4': 256, 'activations_1': 'swish', 'activations_2': 'elu', 'activations_3': 'elu', 'activations_4': 'selu', 'activations_5': 'selu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[01:40] ResNet: Seed 0, Fold 0: 0.017289984483507728
[01:30] ResNet: Seed 0, Fold 1: 0.016730842595915167
[01:12] ResNet: Seed 0, Fold 2: 0.016724666742039294
[01:09] ResNet: Seed 0, Fold 3: 0.016785634962429404
[01:17] ResNet: Seed 0, Fold 4: 0.016664716883460367
[01:14] ResNet: Seed 1, Fold 0: 0.016552807292173777
[01:14] ResNet: Seed 1, Fold 1: 0.016733077703398234
[01:14] ResNet: Seed 1, Fold 2: 0.017233284476563793
[01:14] ResNet: Seed 1, Fold 3: 0.01672682436746808
[01:13] ResNet: Seed 1, Fold 4: 0.01677904729555208
[01:09] ResNet: Seed 2, Fold 0: 0.01681176413763404
[01:11] ResNet: Seed 2, Fold 1: 0.016694198395606635
[01:09] ResNet: Seed 2, Fold 2: 0.017351728159056223
[01:14] ResNet: Seed 2, Fold 3: 0.01669276954205934
[01:15] ResNet: Seed 2, Fold 4: 0.01665492724469661


[32m[I 2020-10-31 14:55:15,901][0m Trial 16 finished with value: 0.016411331541488026 and parameters: {'n_hidden_layers': 2, 'units_1': 128, 'units_2': 512, 'units_3': 1024, 'units_4': 256, 'units_5': 512, 'activations_1': 'swish', 'activations_2': 'relu', 'activations_3': 'swish', 'activations_4': 'elu', 'activations_5': 'elu', 'activations_6': 'swish'}. Best is trial 1 with value: 0.016323335282783843.[0m


[02:17] ResNet: Seed 0, Fold 0: 0.017269829635193263
[02:00] ResNet: Seed 0, Fold 1: 0.016723050256999397
[01:53] ResNet: Seed 0, Fold 2: 0.01684003431967979
[01:42] ResNet: Seed 0, Fold 3: 0.01659362536005593
[01:42] ResNet: Seed 0, Fold 4: 0.0167549403329157
[01:47] ResNet: Seed 1, Fold 0: 0.0166023965408007
[01:43] ResNet: Seed 1, Fold 1: 0.01666286281198723
[01:47] ResNet: Seed 1, Fold 2: 0.017356217562969546
[01:48] ResNet: Seed 1, Fold 3: 0.016808275291659253
[01:53] ResNet: Seed 1, Fold 4: 0.01686836337510053
[01:47] ResNet: Seed 2, Fold 0: 0.016674230326715812
[01:43] ResNet: Seed 2, Fold 1: 0.0167584099154441
[01:48] ResNet: Seed 2, Fold 2: 0.01730655604211656
[01:47] ResNet: Seed 2, Fold 3: 0.016896258638873767
[01:39] ResNet: Seed 2, Fold 4: 0.016682790961997596


[32m[I 2020-10-31 15:23:42,168][0m Trial 17 finished with value: 0.016430262214763302 and parameters: {'n_hidden_layers': 5, 'units_1': 128, 'units_2': 256, 'units_3': 512, 'units_4': 1024, 'units_5': 1024, 'units_6': 1024, 'units_7': 256, 'units_8': 512, 'activations_1': 'relu', 'activations_2': 'swish', 'activations_3': 'relu', 'activations_4': 'elu', 'activations_5': 'relu', 'activations_6': 'relu', 'activations_7': 'elu', 'activations_8': 'elu', 'activations_9': 'selu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[02:02] ResNet: Seed 0, Fold 0: 0.017186977437580266
[01:41] ResNet: Seed 0, Fold 1: 0.016631789961030333
[01:37] ResNet: Seed 0, Fold 2: 0.01667108123975644
[01:37] ResNet: Seed 0, Fold 3: 0.016534555673739033
[01:39] ResNet: Seed 0, Fold 4: 0.01669903164796433
[01:35] ResNet: Seed 1, Fold 0: 0.016605231787981914
[01:36] ResNet: Seed 1, Fold 1: 0.016665895863349146
[01:35] ResNet: Seed 1, Fold 2: 0.01721212263797131
[01:25] ResNet: Seed 1, Fold 3: 0.016778233744876307
[01:40] ResNet: Seed 1, Fold 4: 0.01666470083042912
[01:25] ResNet: Seed 2, Fold 0: 0.016629123359995994
[01:33] ResNet: Seed 2, Fold 1: 0.016652267707648345
[01:34] ResNet: Seed 2, Fold 2: 0.017178148039679667
[01:40] ResNet: Seed 2, Fold 3: 0.01675069522272893
[01:31] ResNet: Seed 2, Fold 4: 0.01672427083721813


[32m[I 2020-10-31 15:49:06,665][0m Trial 18 finished with value: 0.016460782661553233 and parameters: {'n_hidden_layers': 3, 'units_1': 128, 'units_2': 256, 'units_3': 512, 'units_4': 256, 'units_5': 512, 'units_6': 128, 'activations_1': 'swish', 'activations_2': 'elu', 'activations_3': 'selu', 'activations_4': 'elu', 'activations_5': 'elu', 'activations_6': 'relu', 'activations_7': 'relu'}. Best is trial 1 with value: 0.016323335282783843.[0m


[02:12] ResNet: Seed 0, Fold 0: 0.017464142556815082
[01:37] ResNet: Seed 0, Fold 1: 0.017307889796177915
[01:31] ResNet: Seed 0, Fold 2: 0.01690613904925685
[01:32] ResNet: Seed 0, Fold 3: 0.01695883294034667
[01:33] ResNet: Seed 0, Fold 4: 0.016995047004946296
[01:31] ResNet: Seed 1, Fold 0: 0.016921384100846003
[01:30] ResNet: Seed 1, Fold 1: 0.01682229507667445
[01:31] ResNet: Seed 1, Fold 2: 0.017556010503807413
[01:30] ResNet: Seed 1, Fold 3: 0.017034544961074257


[33m[W 2020-10-31 16:04:25,014][0m Trial 19 failed because of the following error: InternalError()
Traceback (most recent call last):
  File "/home/ubuntu/work/pytorch/.venv/lib/python3.7/site-packages/optuna/study.py", line 799, in _run_trial
    result = func(trial)
  File "<ipython-input-35-2b3ef63a8e35>", line 19, in __call__
    oof = self.learning(train, train_pca, target_df, N_STARTS, N_SPLITS, n_hidden_layers, units, activations)
  File "<ipython-input-35-2b3ef63a8e35>", line 91, in learning
    verbose=0,
  File "/home/ubuntu/work/pytorch/.venv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 108, in _method_wrapper
    return method(self, *args, **kwargs)
  File "/home/ubuntu/work/pytorch/.venv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 1098, in fit
    tmp_logs = train_function(iterator)
  File "/home/ubuntu/work/pytorch/.venv/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 780, in _

InternalError:  Failed to load in-memory CUBIN: CUDA_ERROR_OUT_OF_MEMORY: out of memory
	 [[{{node cluster_15423_1/xla_run}}]] [Op:__inference_train_function_6299728]

Function call stack:
train_function


# Result

In [None]:
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
optuna.importance.get_param_importances(study)