# Strategy

- Preprocessing
    - RankGauss
    - PCA + Existing Features
    - Variance Encoding
- Model
    - DeepTables
- Learning
    - ~~Pre-train with non-scored label~~
    - Optimizer: AdamW with weight_decay
    - Label smoothing
- Prediction
    - Ensemble above with weight optimization
    - With clipping

# Library

In [1]:
import warnings

warnings.filterwarnings("ignore")

In [2]:
import sys

sys.path.append("../input/iterative-stratification/iterative-stratification-master")
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

sys.path.append("../input/autograd")
import autograd.numpy as np
from autograd import grad

sys.path.append("../../../../github/DeepTables")
from deeptables.models.deepnets import AFM, DCN, FGCNN, PNN, AutoInt, DeepFM, WideDeep, xDeepFM
from deeptables.models.deeptable import DeepTable, ModelConfig
from deeptables.models.preprocessor import DefaultPreprocessor

In [3]:
import datetime
import gc
import os
import random
from collections import defaultdict
from time import time
from typing import Optional

# import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
import tensorflow_addons as tfa
import tensorflow_probability as tfp

# import optuna
from scipy.optimize import fsolve, minimize
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import log_loss
from sklearn.preprocessing import QuantileTransformer, MinMaxScaler
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

In [4]:
MIXED_PRECISION = False
XLA_ACCELERATE = True

if MIXED_PRECISION:
    from tensorflow.keras.mixed_precision import experimental as mixed_precision

    if tpu:
        policy = tf.keras.mixed_precision.experimental.Policy("mixed_bfloat16")
    else:
        policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16")
    mixed_precision.set_policy(policy)
    print("Mixed precision enabled")

if XLA_ACCELERATE:
    tf.config.optimizer.set_jit(True)
    print("Accelerated Linear Algebra enabled")

Accelerated Linear Algebra enabled


# Functions

In [5]:
def fix_seed(seed=2020):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)


random_seed = 22
fix_seed(random_seed)

In [6]:
# https://www.kaggle.com/c/lish-moa/discussion/189857#1043953

# Prediction Clipping Thresholds
p_min = 0.001
p_max = 0.999

# Evaluation Metric with clipping and no label smoothing
def logloss(y_true, y_pred):
    # y_pred = tf.clip_by_value(y_pred, p_min, p_max)
    return -K.mean(y_true * K.log(y_pred) + (1 - y_true) * K.log(1 - y_pred))

In [7]:
# [Fast Numpy Log Loss] https://www.kaggle.com/gogo827jz/optimise-blending-weights-4-5x-faster-log-loss
def metric(y_true, y_pred):
    loss = 0
    y_pred_clip = np.clip(y_pred, 1e-7, 1 - 1e-7)
    for i in range(y_pred.shape[1]):
        loss += -np.mean(y_true[:, i] * np.log(y_pred_clip[:, i]) + (1 - y_true[:, i]) * np.log(1 - y_pred_clip[:, i]))
    return loss / y_pred.shape[1]

In [8]:
def blend(size, weights, oof):
    blend_ = np.zeros(size)
    for i, key in enumerate(oof.keys()):
        blend_ += weights[i] * oof[key].values
    return blend_

# Load Data

In [9]:
train_df = pd.read_csv("../input/lish-moa/train_features.csv")
test_df = pd.read_csv("../input/lish-moa/test_features.csv")
target_df = pd.read_csv("../input/lish-moa/train_targets_scored.csv")
non_target_df = pd.read_csv("../input/lish-moa/train_targets_nonscored.csv")
submit_df = pd.read_csv("../input/lish-moa/sample_submission.csv")

In [10]:
train = train_df.copy()
test = test_df.copy()
ss = submit_df.copy()

# Preprocessing

In [11]:
train.loc[:, "cp_dose"] = train.loc[:, "cp_dose"].map({"D1": 0, "D2": 1})
test.loc[:, "cp_dose"] = test.loc[:, "cp_dose"].map({"D1": 0, "D2": 1})

train.loc[:, "cp_time"] = train.loc[:, "cp_time"].map({24: 0, 48: 1, 72: 2})
test.loc[:, "cp_time"] = test.loc[:, "cp_time"].map({24: 0, 48: 1, 72: 2})

In [12]:
g_cols = [col for col in train_df.columns if col.startswith("g-")]
c_cols = [col for col in train_df.columns if col.startswith("c-")]

## cp_type が ctrl_vehicle なものは MoA を持たない

ので、学習から除外する

In [13]:
target_df = target_df.loc[train["cp_type"] != "ctl_vehicle"].reset_index(drop=True)
non_target_df = non_target_df.loc[train["cp_type"] != "ctl_vehicle"].reset_index(drop=True)
train = train.loc[train["cp_type"] != "ctl_vehicle"].reset_index(drop=True)

In [14]:
train = train.drop("cp_type", axis=1)
test = test.drop("cp_type", axis=1)

In [15]:
del train["sig_id"]
del target_df["sig_id"]
del non_target_df["sig_id"]
del test["sig_id"]
del ss["sig_id"]

In [16]:
train

Unnamed: 0,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,...,c-90,c-91,c-92,c-93,c-94,c-95,c-96,c-97,c-98,c-99
0,0,0,1.0620,0.5577,-0.2479,-0.6208,-0.1944,-1.0120,-1.0220,-0.0326,...,0.2862,0.2584,0.8076,0.5523,-0.1912,0.6584,-0.3981,0.2139,0.3801,0.4176
1,2,0,0.0743,0.4087,0.2991,0.0604,1.0190,0.5207,0.2341,0.3372,...,-0.4265,0.7543,0.4708,0.0230,0.2957,0.4899,0.1522,0.1241,0.6077,0.7371
2,1,0,0.6280,0.5817,1.5540,-0.0764,-0.0323,1.2390,0.1715,0.2155,...,-0.7250,-0.6297,0.6103,0.0223,-1.3240,-0.3174,-0.6417,-0.2187,-1.4080,0.6931
3,1,0,-0.5138,-0.2491,-0.2656,0.5288,4.0620,-0.8095,-1.9590,0.1792,...,-2.0990,-0.6441,-5.6300,-1.3780,-0.8632,-1.2880,-1.6210,-0.8784,-0.3876,-0.8154
4,2,1,-0.3254,-0.4009,0.9700,0.6919,1.4180,-0.8244,-0.2800,-0.1498,...,0.0042,0.0048,0.6670,1.0690,0.5523,-0.3031,0.1094,0.2885,-0.3786,0.7125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21943,2,0,0.1608,-1.0500,0.2551,-0.2239,-0.2431,0.4256,-0.1166,-0.1777,...,0.0789,0.3538,0.0558,0.3377,-0.4753,-0.2504,-0.7415,0.8413,-0.4259,0.2434
21944,0,1,0.1394,-0.0636,-0.1112,-0.5080,-0.4713,0.7201,0.5773,0.3055,...,0.1969,0.0262,-0.8121,0.3434,0.5372,-0.3246,0.0631,0.9171,0.5258,0.4680
21945,0,1,-1.3260,0.3478,-0.3743,0.9905,-0.7178,0.6621,-0.2252,-0.5565,...,0.4286,0.4426,0.0423,-0.3195,-0.8086,-0.9798,-0.2084,-0.1224,-0.2715,0.3689
21946,0,0,0.6660,0.2324,0.4392,0.2044,0.8531,-0.0343,0.0323,0.0463,...,-0.1105,0.4258,-0.2012,0.1506,1.5230,0.7101,0.1732,0.7015,-0.6290,0.0740


## Rank Gauss

https://www.kaggle.com/nayuts/moa-pytorch-nn-pca-rankgauss

連続値を特定の範囲の閉域に押し込めて、分布の偏りを解消する方法です。

In [17]:
for col in g_cols + c_cols:
    transformer = QuantileTransformer(n_quantiles=100, random_state=random_seed, output_distribution="normal")

    vec_len = len(train[col].values)
    vec_len_test = len(test[col].values)

    raw_vec = train[col].values.reshape(vec_len, 1)
    transformer.fit(raw_vec)

    train[col] = transformer.transform(raw_vec).reshape(1, vec_len)[0]
    test[col] = transformer.transform(test[col].values.reshape(vec_len_test, 1)).reshape(1, vec_len_test)[0]

In [18]:
train

Unnamed: 0,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,...,c-90,c-91,c-92,c-93,c-94,c-95,c-96,c-97,c-98,c-99
0,0,0,1.111801,0.903367,-0.433829,-0.971728,-0.286559,-1.011388,-1.357431,-0.041716,...,0.435228,0.388106,1.297345,0.882752,-0.202495,1.052112,-0.472513,0.345458,0.591507,0.692516
1,2,0,0.105667,0.672509,0.257486,0.086759,1.199685,0.691813,0.353695,0.558374,...,-0.491941,1.148246,0.728406,0.097171,0.454821,0.773468,0.233309,0.207813,0.964312,1.223121
2,1,0,0.767036,0.942499,1.408911,-0.126492,-0.028694,1.490985,0.272541,0.359490,...,-0.794302,-0.715229,0.962055,0.096127,-1.176291,-0.361225,-0.727620,-0.248613,-1.076346,1.142699
3,1,0,-0.755626,-0.297077,-0.455058,0.765972,2.343522,-0.852713,-2.316440,0.301512,...,-1.381920,-0.730154,-1.612183,-1.211000,-0.911943,-1.191839,-1.286279,-0.943448,-0.439482,-0.881278
4,2,1,-0.468806,-0.504196,0.956769,0.975864,1.447729,-0.863807,-0.346926,-0.227072,...,0.045906,0.023813,1.057944,1.737007,0.844923,-0.344784,0.176914,0.457388,-0.428668,1.176713
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21943,2,0,0.225332,-1.269833,0.203082,-0.361744,-0.365285,0.574150,-0.118761,-0.271980,...,0.144679,0.531217,0.104134,0.547313,-0.547039,-0.278963,-0.822632,1.339243,-0.480911,0.419996
21944,0,1,0.195938,-0.039550,-0.255311,-0.799750,-0.721089,0.924354,0.779755,0.509043,...,0.303342,0.053359,-0.846121,0.555644,0.821718,-0.369418,0.114555,1.470674,0.826858,0.772835
21945,0,1,-1.946077,0.575730,-0.604782,1.298848,-1.057997,0.856044,-0.274794,-0.731116,...,0.655890,0.665706,0.084499,-0.350560,-0.865833,-1.005158,-0.245946,-0.123235,-0.310856,0.613841
21946,0,0,0.803911,0.405671,0.418646,0.309494,1.068126,-0.020533,0.084007,0.087675,...,-0.105613,0.641177,-0.236051,0.272655,2.254169,1.132558,0.263331,1.109534,-0.658578,0.173880


## PCA features (+ Existing features)

既存のカラムは残したほうがいいのだろうか？？
→ このコンペでは残したほうがいい成績が出ている。

In [19]:
# g-
n_comp = 50

data = pd.concat([pd.DataFrame(train[g_cols]), pd.DataFrame(test[g_cols])])
data2 = PCA(n_components=n_comp, random_state=random_seed).fit_transform(data[g_cols])
train2 = data2[: train.shape[0]]
test2 = data2[-test.shape[0] :]

train2 = pd.DataFrame(train2, columns=[f"pca_G-{i}" for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f"pca_G-{i}" for i in range(n_comp)])

# train.drop(g_cols, axis=1, inplace=True)
# test.drop(g_cols, axis=1, inplace=True)

train = pd.concat((train, train2), axis=1)
test = pd.concat((test, test2), axis=1)

In [20]:
# c-
n_comp = 15

data = pd.concat([pd.DataFrame(train[c_cols]), pd.DataFrame(test[c_cols])])
data2 = PCA(n_components=n_comp, random_state=random_seed).fit_transform(data[c_cols])
train2 = data2[: train.shape[0]]
test2 = data2[-test.shape[0] :]

train2 = pd.DataFrame(train2, columns=[f"pca_C-{i}" for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f"pca_C-{i}" for i in range(n_comp)])

# train.drop(c_cols, axis=1, inplace=True)
# test.drop(c_cols, axis=1, inplace=True)

train = pd.concat((train, train2), axis=1)
test = pd.concat((test, test2), axis=1)

In [21]:
train

Unnamed: 0,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,...,pca_C-5,pca_C-6,pca_C-7,pca_C-8,pca_C-9,pca_C-10,pca_C-11,pca_C-12,pca_C-13,pca_C-14
0,0,0,1.111801,0.903367,-0.433829,-0.971728,-0.286559,-1.011388,-1.357431,-0.041716,...,1.128005,0.425778,-0.343062,-0.195748,0.352685,0.403829,0.277811,0.330617,-0.981467,0.679548
1,2,0,0.105667,0.672509,0.257486,0.086759,1.199685,0.691813,0.353695,0.558374,...,-0.676575,0.021277,-0.388366,-1.150616,0.137138,0.875934,0.165417,-0.475121,-1.175414,-0.739302
2,1,0,0.767036,0.942499,1.408911,-0.126492,-0.028694,1.490985,0.272541,0.359490,...,0.925477,-0.989601,-0.858368,-0.691935,-0.550415,0.581527,0.051508,0.303663,0.545320,-0.118935
3,1,0,-0.755626,-0.297077,-0.455058,0.765972,2.343522,-0.852713,-2.316440,0.301512,...,1.010039,0.635626,-0.677750,-1.010033,0.952495,2.045002,1.416006,-0.397999,-0.616688,0.142138
4,2,1,-0.468806,-0.504196,0.956769,0.975864,1.447729,-0.863807,-0.346926,-0.227072,...,-0.263976,-0.560569,-0.483119,0.088108,-0.381305,0.687056,-0.857168,0.024852,-0.463927,0.182058
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21943,2,0,0.225332,-1.269833,0.203082,-0.361744,-0.365285,0.574150,-0.118761,-0.271980,...,-0.529939,1.351794,-0.019124,-0.352074,-0.098632,0.090319,-0.409949,1.080525,-0.232746,0.303632
21944,0,1,0.195938,-0.039550,-0.255311,-0.799750,-0.721089,0.924354,0.779755,0.509043,...,-0.697863,0.200118,1.212996,-1.002997,-1.273071,0.963687,0.770501,0.796243,-0.197498,-1.433636
21945,0,1,-1.946077,0.575730,-0.604782,1.298848,-1.057997,0.856044,-0.274794,-0.731116,...,-1.749574,-1.653004,0.925076,0.404705,-0.663696,0.914806,0.134995,-1.697094,0.385734,0.509971
21946,0,0,0.803911,0.405671,0.418646,0.309494,1.068126,-0.020533,0.084007,0.087675,...,-0.492358,0.445483,0.295951,0.073333,1.863752,0.101821,-2.197211,0.720895,-0.241714,-0.673445


## feature Selection using Variance Encoding

分散がしきい値以下の特徴量を捨てます。

In [22]:
var_thresh = VarianceThreshold(threshold=0.5)

data = train.append(test)
data_transformed = var_thresh.fit_transform(data.iloc[:, 2:])

train_transformed = data_transformed[: train.shape[0]]
test_transformed = data_transformed[-test.shape[0] :]

train = pd.DataFrame(train[["cp_time", "cp_dose"]].values.reshape(-1, 2), columns=["cp_time", "cp_dose"])
train = pd.concat([train, pd.DataFrame(train_transformed)], axis=1, ignore_index=True)

test = pd.DataFrame(test[["cp_time", "cp_dose"]].values.reshape(-1, 2), columns=["cp_time", "cp_dose"])
test = pd.concat([test, pd.DataFrame(test_transformed)], axis=1, ignore_index=True)

In [23]:
train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,929,930,931,932,933,934,935,936,937,938
0,0,0,1.111801,0.903367,-0.433829,-0.971728,-0.286559,-1.011388,-1.357431,-0.041716,...,1.128005,0.425778,-0.343062,-0.195748,0.352685,0.403829,0.277811,0.330617,-0.981467,0.679548
1,2,0,0.105667,0.672509,0.257486,0.086759,1.199685,0.691813,0.353695,0.558374,...,-0.676575,0.021277,-0.388366,-1.150616,0.137138,0.875934,0.165417,-0.475121,-1.175414,-0.739302
2,1,0,0.767036,0.942499,1.408911,-0.126492,-0.028694,1.490985,0.272541,0.359490,...,0.925477,-0.989601,-0.858368,-0.691935,-0.550415,0.581527,0.051508,0.303663,0.545320,-0.118935
3,1,0,-0.755626,-0.297077,-0.455058,0.765972,2.343522,-0.852713,-2.316440,0.301512,...,1.010039,0.635626,-0.677750,-1.010033,0.952495,2.045002,1.416006,-0.397999,-0.616688,0.142138
4,2,1,-0.468806,-0.504196,0.956769,0.975864,1.447729,-0.863807,-0.346926,-0.227072,...,-0.263976,-0.560569,-0.483119,0.088108,-0.381305,0.687056,-0.857168,0.024852,-0.463927,0.182058
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21943,2,0,0.225332,-1.269833,0.203082,-0.361744,-0.365285,0.574150,-0.118761,-0.271980,...,-0.529939,1.351794,-0.019124,-0.352074,-0.098632,0.090319,-0.409949,1.080525,-0.232746,0.303632
21944,0,1,0.195938,-0.039550,-0.255311,-0.799750,-0.721089,0.924354,0.779755,0.509043,...,-0.697863,0.200118,1.212996,-1.002997,-1.273071,0.963687,0.770501,0.796243,-0.197498,-1.433636
21945,0,1,-1.946077,0.575730,-0.604782,1.298848,-1.057997,0.856044,-0.274794,-0.731116,...,-1.749574,-1.653004,0.925076,0.404705,-0.663696,0.914806,0.134995,-1.697094,0.385734,0.509971
21946,0,0,0.803911,0.405671,0.418646,0.309494,1.068126,-0.020533,0.084007,0.087675,...,-0.492358,0.445483,0.295951,0.073333,1.863752,0.101821,-2.197211,0.720895,-0.241714,-0.673445


## MinMaxScaler

In [24]:
#mm_scaler = MinMaxScaler()
#col_name = train.columns
#
#data = pd.concat([train, test])
#data2 = mm_scaler.fit_transform(data)
#
#train = pd.DataFrame(data2[: train.shape[0]])
#test = pd.DataFrame(data2[-test.shape[0] :])

In [25]:
#train

# Create Model

In [26]:
def create_model_dt(y):

    dt_conf = ModelConfig(
        metrics=[logloss],
        loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.001),
        optimizer=tfa.optimizers.AdamW(lr=1e-3, weight_decay=1e-5, clipvalue=756),
        nets=["dnn_nets"],
        apply_gbm_features=False,
        task="multilabel",
        earlystopping_patience=10,
    )

    dt_preprocessor = DefaultPreprocessor(dt_conf)
    dt_preprocessor.fit_transform_y(y)

    return DeepTable(config=dt_conf, preprocessor=dt_preprocessor)

# Learning

In [27]:
def learning(target, N_STARTS, N_SPLITS, do_predict=False, do_transfer_learning=False):
    oof = {}
    predictions = {}

    for seed in range(N_STARTS):
        seed_result = target.copy()
        seed_result.loc[:, target.columns] = 0
        prediction = ss.copy()
        prediction.loc[:, ss.columns] = 0

        fix_seed(random_seed)

        start_time = time()

        model_name = "DeepTables"
        model = create_model_dt(target)

        if not do_predict:
            continue

        oof_predict, _, test_predict = model.fit_cross_validation(
            train,
            target,
            X_eval=None,
            X_test=test,
            iterators=MultilabelStratifiedKFold(n_splits=N_SPLITS, random_state=random_seed, shuffle=True),
            random_state=random_seed,
            batch_size=128,
            epochs=100,
            verbose=0,
        )

        seed_score = metric(target.values, oof_predict)
        seed_result.loc[:, target.columns] += oof_predict

        if do_predict:
            prediction.loc[:, target.columns] += test_predict / N_SPLITS

        print(
            f"===== Result ===== [{str(datetime.timedelta(seconds = time() - start_time))[2:7]}] {model_name}: Seed {seed}: {seed_score}\n"
        )

        K.clear_session()
        del model
        x = gc.collect()

        oof[f"{model_name}_{seed}"] = seed_result
        predictions[f"{model_name}_{seed}"] = prediction

    return oof, predictions

In [28]:
N_STARTS = 5
N_SPLITS = 7

In [29]:
# Pre train with non-scored labels
_, _ = learning(non_target_df, N_STARTS, N_SPLITS)

In [30]:
oof, predictions = learning(target_df, N_STARTS, N_SPLITS, True, True)

Column index of X has been converted: Index(['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9',
       ...
       'x_929', 'x_930', 'x_931', 'x_932', 'x_933', 'x_934', 'x_935', 'x_936',
       'x_937', 'x_938'],
      dtype='object', length=939)


Start cross validation
Preparing features taken 0.48881030082702637s
Imputation taken 0.8550357818603516s
Categorical encoding taken 4.029273986816406e-05s


Column index of X has been converted: Index(['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9',
       ...
       'x_929', 'x_930', 'x_931', 'x_932', 'x_933', 'x_934', 'x_935', 'x_936',
       'x_937', 'x_938'],
      dtype='object', length=939)


transform X_test
Iterators:MultilabelStratifiedKFold(n_splits=7, random_state=22, shuffle=True)
Injected a callback [EarlyStopping]. monitor:val_logloss, patience:10, mode:min

Fold:1

2 Physical GPUs, 2 Logical GPUs
>>>>>>>>>>>>>>>>>>>>>> Model Desc <<<<<<<<<<<<<<<<<<<<<<< 
---------------------------------------------------------
inputs:
---------------------------------------------------------
['all_categorical_vars: (0)', 'input_continuous_all: (939)']
---------------------------------------------------------
embeddings:
---------------------------------------------------------
input_dims: []
output_dims: []
dropout: 0.3
---------------------------------------------------------
dense: dropout: 0
batch_normalization: False
---------------------------------------------------------
concat_embed_dense: shape: (None, 939)
---------------------------------------------------------
nets: ['dnn_nets']
---------------------------------------------------------
dnn: input_shape (None, 939), ou

Column index of X has been converted: Index(['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9',
       ...
       'x_929', 'x_930', 'x_931', 'x_932', 'x_933', 'x_934', 'x_935', 'x_936',
       'x_937', 'x_938'],
      dtype='object', length=939)


Preparing features taken 0.4971027374267578s
Imputation taken 0.8501465320587158s
Categorical encoding taken 1.8835067749023438e-05s


Column index of X has been converted: Index(['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9',
       ...
       'x_929', 'x_930', 'x_931', 'x_932', 'x_933', 'x_934', 'x_935', 'x_936',
       'x_937', 'x_938'],
      dtype='object', length=939)


transform X_test
Iterators:MultilabelStratifiedKFold(n_splits=7, random_state=22, shuffle=True)
Injected a callback [EarlyStopping]. monitor:val_logloss, patience:10, mode:min

Fold:1

2 Physical GPUs, 2 Logical GPUs
>>>>>>>>>>>>>>>>>>>>>> Model Desc <<<<<<<<<<<<<<<<<<<<<<< 
---------------------------------------------------------
inputs:
---------------------------------------------------------
['all_categorical_vars: (0)', 'input_continuous_all: (939)']
---------------------------------------------------------
embeddings:
---------------------------------------------------------
input_dims: []
output_dims: []
dropout: 0.3
---------------------------------------------------------
dense: dropout: 0
batch_normalization: False
---------------------------------------------------------
concat_embed_dense: shape: (None, 939)
---------------------------------------------------------
nets: ['dnn_nets']
---------------------------------------------------------
dnn: input_shape (None, 939), ou

Column index of X has been converted: Index(['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9',
       ...
       'x_929', 'x_930', 'x_931', 'x_932', 'x_933', 'x_934', 'x_935', 'x_936',
       'x_937', 'x_938'],
      dtype='object', length=939)


Start cross validation
Preparing features taken 0.49294495582580566s
Imputation taken 0.8685486316680908s
Categorical encoding taken 4.6253204345703125e-05s


Column index of X has been converted: Index(['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9',
       ...
       'x_929', 'x_930', 'x_931', 'x_932', 'x_933', 'x_934', 'x_935', 'x_936',
       'x_937', 'x_938'],
      dtype='object', length=939)


transform X_test
Iterators:MultilabelStratifiedKFold(n_splits=7, random_state=22, shuffle=True)
Injected a callback [EarlyStopping]. monitor:val_logloss, patience:10, mode:min

Fold:1

2 Physical GPUs, 2 Logical GPUs
>>>>>>>>>>>>>>>>>>>>>> Model Desc <<<<<<<<<<<<<<<<<<<<<<< 
---------------------------------------------------------
inputs:
---------------------------------------------------------
['all_categorical_vars: (0)', 'input_continuous_all: (939)']
---------------------------------------------------------
embeddings:
---------------------------------------------------------
input_dims: []
output_dims: []
dropout: 0.3
---------------------------------------------------------
dense: dropout: 0
batch_normalization: False
---------------------------------------------------------
concat_embed_dense: shape: (None, 939)
---------------------------------------------------------
nets: ['dnn_nets']
---------------------------------------------------------
dnn: input_shape (None, 939), ou

Column index of X has been converted: Index(['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9',
       ...
       'x_929', 'x_930', 'x_931', 'x_932', 'x_933', 'x_934', 'x_935', 'x_936',
       'x_937', 'x_938'],
      dtype='object', length=939)


Start cross validation
Preparing features taken 0.4926126003265381s
Imputation taken 0.8458337783813477s
Categorical encoding taken 1.9311904907226562e-05s


Column index of X has been converted: Index(['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9',
       ...
       'x_929', 'x_930', 'x_931', 'x_932', 'x_933', 'x_934', 'x_935', 'x_936',
       'x_937', 'x_938'],
      dtype='object', length=939)


transform X_test
Iterators:MultilabelStratifiedKFold(n_splits=7, random_state=22, shuffle=True)
Injected a callback [EarlyStopping]. monitor:val_logloss, patience:10, mode:min

Fold:1

2 Physical GPUs, 2 Logical GPUs
>>>>>>>>>>>>>>>>>>>>>> Model Desc <<<<<<<<<<<<<<<<<<<<<<< 
---------------------------------------------------------
inputs:
---------------------------------------------------------
['all_categorical_vars: (0)', 'input_continuous_all: (939)']
---------------------------------------------------------
embeddings:
---------------------------------------------------------
input_dims: []
output_dims: []
dropout: 0.3
---------------------------------------------------------
dense: dropout: 0
batch_normalization: False
---------------------------------------------------------
concat_embed_dense: shape: (None, 939)
---------------------------------------------------------
nets: ['dnn_nets']
---------------------------------------------------------
dnn: input_shape (None, 939), ou

Column index of X has been converted: Index(['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9',
       ...
       'x_929', 'x_930', 'x_931', 'x_932', 'x_933', 'x_934', 'x_935', 'x_936',
       'x_937', 'x_938'],
      dtype='object', length=939)


Start cross validation
Preparing features taken 0.49587559700012207s
Imputation taken 0.8590977191925049s
Categorical encoding taken 4.363059997558594e-05s


Column index of X has been converted: Index(['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9',
       ...
       'x_929', 'x_930', 'x_931', 'x_932', 'x_933', 'x_934', 'x_935', 'x_936',
       'x_937', 'x_938'],
      dtype='object', length=939)


transform X_test
Iterators:MultilabelStratifiedKFold(n_splits=7, random_state=22, shuffle=True)
Injected a callback [EarlyStopping]. monitor:val_logloss, patience:10, mode:min

Fold:1

2 Physical GPUs, 2 Logical GPUs
>>>>>>>>>>>>>>>>>>>>>> Model Desc <<<<<<<<<<<<<<<<<<<<<<< 
---------------------------------------------------------
inputs:
---------------------------------------------------------
['all_categorical_vars: (0)', 'input_continuous_all: (939)']
---------------------------------------------------------
embeddings:
---------------------------------------------------------
input_dims: []
output_dims: []
dropout: 0.3
---------------------------------------------------------
dense: dropout: 0
batch_normalization: False
---------------------------------------------------------
concat_embed_dense: shape: (None, 939)
---------------------------------------------------------
nets: ['dnn_nets']
---------------------------------------------------------
dnn: input_shape (None, 939), ou

# Cross Validation

In [31]:
initial_weights = [1.0 / N_STARTS for _ in range(N_STARTS)] + [1.0]
print(f"Initial weights: {initial_weights[:-1]}")

# https://www.kaggle.com/gogo827jz/optimise-blending-weights-with-bonus-0#Bonus-(Lagrange-Multiplier)


def lagrange_func(params):
    # weights, _lambda = params
    blend_ = blend(target_df.values.shape, params[:-1], oof)
    return metric(target_df.values, blend_) - params[-1] * (sum(params[:-1]) - 1)


grad_l = grad(lagrange_func)


def lagrange_obj(params):
    # weights, _lambda = params
    d = grad_l(params).tolist()
    return d[:-1] + [sum(params[:-1]) - 1]


blend_ = blend(target_df.values.shape, initial_weights[:-1], oof)
print(f"Initial blend CV: {metric(target_df.values, blend_)}")

optimized_weights = fsolve(lagrange_obj, initial_weights)
blend_ = blend(target_df.values.shape, optimized_weights[:-1], oof)
print(f"Optimized blend CV: {metric(target_df.values, blend_)}")

print(f"Optimized weights: {optimized_weights[:-1]}")
print(f"Check the sum of all weights: {sum(optimized_weights[:-1])}")

Initial weights: [0.2, 0.2, 0.2, 0.2, 0.2]
Initial blend CV: 0.017705981722089145
Optimized blend CV: 0.017705598889274466
Optimized weights: [ 0.07973718  0.48128605  0.14923318  0.41463391 -0.12489032]
Check the sum of all weights: 1.0000000000000002


# Postprocessing

In [32]:
# Weighted blend
submit_df.loc[:, target_df.columns] = blend(ss.shape, optimized_weights[:-1], predictions)

In [33]:
# Clipping
submit_df.loc[:, target_df.columns] = submit_df.loc[:, target_df.columns].clip(1e-7, 1 - 1e-7)

In [34]:
submit_df.loc[test_df["cp_type"] == "ctl_vehicle", target_df.columns] = 0

# Output

In [35]:
submit_df.to_csv("submission.csv", index=False)