# Strategy

- Preprocessing
    - Include ctrl_vehicle
    - RankGauss
    - PCA + Existing Features
    - KMeans
    - Basic stats
- Model
    - Multi head ResNet (tensorflow)
    - TabNet (pytorch)
- Training
    - Pre-train with non-scored target.
    - Train with public test pseudo label
    - Optimizer: Adam/AdamW with weight_decay
    - Loss: BCE with Label smoothing + Logits
- Prediction
    - Ensemble above with average.

# Change Log

- v65
    - Remove clipping.
    - Disable Variance Encoding.
- v66
    - Add AUC.
    - CV only with original training data.
- v67
    - Add `train_drug.csv` .
    - Add Drug and MultiLabel Stratification.
- v68
    - Remove public test pseudo label.
    - Enable pseudo labeling.
    - Disable pre-training with non-scored target.
- V69
    - Disable pseudo labeling.
    - Re-enable pre-training with non-scored target.
    - Re-add public test pseudo label.
    - Add correlation.
    - Update label smoothing parameter.
- v70 - **LB: 0.01840**
    - Amend num of seed.
- v71
    - Update model parameters.
        - ResNet network
        - TabNet dimension
- v72
    - Add KMeans and basic stats.
    - Add NODE model.
- v73
    - Update split condition of group multilabel stratified kfold.
    - Update NODE parameters.
- v74
    - Disable pre-train with non-scored target due to execution time reduction.
- v75
    - Fold 5 to 7.
- v76
    - Remove ResNet for execution time reduction.
- v77
    - Use 3 models. ["ResNet", "TabNet", "NODE"]
    - Enable pre-train for ResNet.
- v78 - **LB: 0.01841**
    - Reset fold eash seeds.
- v79
    - Add simple NN model again.
    - Fold 7 to 5.
- v80
    - Remove simple NN and NODE model.
    - Increase num of seed x2 to x3.
- v81
    - Use ctrl_vehicle

# Setup

## for Google Colab

In [None]:
import sys

IN_COLAB = "google.colab" in sys.modules

In [2]:
COMPETE = "lish-moa"
DATASETS = [
    "imokuri/pytorchtabnet",
    "imokuri/moablendblendblend",
    "imokuri/adabelief010",
    "tolgadincer/autograd",
    "yasufuminakama/iterative-stratification",
    "rahulsd91/moapublictest",
]
PACKAGES = []

In [3]:
"""
if IN_COLAB:
    !pip install -q -U git+https://github.com/IMOKURI/kaggle_on_google_colab.git

    from kaggle_on_google_colab import setup
    kaggle = setup.Setup()
    kaggle.dirs(COMPETE)

    !kaggle competitions download -p /content/zip {COMPETE}
    for line in setup.exec_get_lines(cmd=f"kaggle competitions files --csv {COMPETE} | egrep -v \"Warning: Looks like you're using an outdated API Version|name,size,creationDate\" | cut -d , -f 1"):
        !unzip -q -n /content/zip/{line.decode().strip()}.zip -d /content/{COMPETE}/input/{COMPETE}

    for dataset in DATASETS:
        dataset_name = dataset.split("/")[-1]

        !kaggle datasets download -p /content/zip {dataset}
        !unzip -q -n /content/zip/{dataset_name}.zip -d /content/{COMPETE}/input/{dataset_name}

    for package_ in PACKAGES:
        !pip install {package_}

    !pip install -U tensorflow-addons
    !mv /content/zip/train_drug.csv /content/{COMPETE}/input/{COMPETE}/

    %cd /content/{COMPETE}/output
"""



## Library

In [4]:
import warnings

warnings.filterwarnings("ignore")

In [5]:
import sys

sys.path.append("../input/iterative-stratification/iterative-stratification-master")
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

sys.path.append("../input/autograd")
import autograd.numpy as np
from autograd import grad

sys.path.append("../input/pytorchtabnet")
from pytorch_tabnet.metrics import Metric
from pytorch_tabnet.tab_model import TabNetRegressor

# sys.path.append("../input/adabelief010")
# from AdaBelief import AdaBelief
# from AdaBelief_tf import AdaBeliefOptimizer

In [6]:
import datetime
import gc
import itertools
import os
import random
from collections import defaultdict
from time import time
from typing import Optional

import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
import tensorflow_addons as tfa
import torch
import torch.nn.functional as F
import torch.optim as optim
from scipy.optimize import fsolve, minimize
from scipy.stats import pearsonr
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import QuantileTransformer
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow_probability import distributions as tfp_distributions
from tensorflow_probability import stats as tfp_stats
from torch import nn
from torch.nn.modules.loss import _WeightedLoss
from torch.optim.lr_scheduler import ReduceLROnPlateau as torch_ReduceLROnPlateau

In [7]:
# import numpy as np
import optuna

In [8]:
MIXED_PRECISION = False
XLA_ACCELERATE = True

if MIXED_PRECISION:
    from tensorflow.keras.mixed_precision import experimental as mixed_precision

    if tpu:
        policy = tf.keras.mixed_precision.experimental.Policy("mixed_bfloat16")
    else:
        policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16")
    mixed_precision.set_policy(policy)
    print("Mixed precision enabled")

if XLA_ACCELERATE:
    tf.config.optimizer.set_jit(True)
    print("Accelerated Linear Algebra enabled")

Accelerated Linear Algebra enabled


In [9]:
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# Functions

In [10]:
def fix_seed(seed=2020):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False


random_seed = 22
fix_seed(random_seed)

## Metrics

In [11]:
# Evaluation Metric with sigmoid applied and clipping

## for tensorflow
def logloss(y_true, y_pred):
    logits = 1 / (1 + K.exp(-y_pred))
    aux = (1 - y_true) * K.log(1 - logits + 1e-15) + y_true * K.log(logits + 1e-15)
    return K.mean(-aux)


## for pytorch
class LogitsLogLoss(Metric):
    def __init__(self):
        self._name = "logits_ll"
        self._maximize = False

    def __call__(self, y_true, y_pred):
        logits = 1 / (1 + np.exp(-y_pred))
        aux = (1 - y_true) * np.log(1 - logits + 1e-15) + y_true * np.log(logits + 1e-15)
        return np.mean(-aux)


## for overall
## [Fast Numpy Log Loss] https://www.kaggle.com/gogo827jz/optimise-blending-weights-4-5x-faster-log-loss
def metric(y_true, y_pred):
    loss = 0
    for i in range(y_pred.shape[1]):
        loss += -np.mean(
            y_true[:, i] * np.log(y_pred[:, i] + 1e-15) + (1 - y_true[:, i]) * np.log(1 - y_pred[:, i] + 1e-15)
        )
    return loss / y_pred.shape[1]

## Loss functions

In [12]:
# https://www.kaggle.com/felipebihaiek/torch-continued-from-auxiliary-targets-smoothing
class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction="mean", smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets: torch.Tensor, n_labels: int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1), self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets, self.weight)

        if self.reduction == "sum":
            loss = loss.sum()
        elif self.reduction == "mean":
            loss = loss.mean()

        return loss

## Cross Validation

In [13]:
# Blend oof predictions
def blend(size, weights, oof):
    blend_ = np.zeros(size)
    for i, key in enumerate(oof.keys()):
        blend_ += weights[i] * oof[key].values[: blend_.shape[0], : blend_.shape[1]]
    return blend_

In [14]:
def cross_validation(size, weight, y_true, oof):
    x = size[0]
    blend_ = blend(y_true[:x].shape, weight, oof)

    aucs = []
    for task_id in range(blend_.shape[1]):
        aucs.append(roc_auc_score(y_true=y_true[:x, task_id], y_score=blend_[:, task_id]))

    CV = metric(y_true[:x], blend_)
    AUC = np.mean(aucs)
    print(f"Blended CV: {CV}, AUC : {AUC}")

    return CV, AUC

# Load Data

In [15]:
train_df = pd.read_csv("../input/lish-moa/train_features.csv")
test_df = pd.read_csv("../input/lish-moa/test_features.csv")
target_df = pd.read_csv("../input/lish-moa/train_targets_scored.csv")
non_target_df = pd.read_csv("../input/lish-moa/train_targets_nonscored.csv")
submit_df = pd.read_csv("../input/lish-moa/sample_submission.csv")
drug_df = pd.read_csv("../input/lish-moa/train_drug.csv")

pub_test_df = pd.read_csv("../input/moapublictest/test_features.csv")
pub_submit_df = pd.read_csv("../input/moablendblendblend/submission.csv")

In [16]:
train = train_df.copy()
test = test_df.copy()
target = target_df.copy()
non_target = non_target_df.copy()
ss = submit_df.copy()
drug = drug_df.copy()

pub_test = pub_test_df.copy()
pub_ss = pub_submit_df.copy()

## Use public test data for training

In [17]:
# Merge public test data (and pseudo label) into train data
train = pd.concat([train, pub_test]).reset_index(drop=True)
target = pd.concat([target, pub_ss]).reset_index(drop=True)

In [18]:
target

Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,...,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_000644bb2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,id_000779bfc,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,id_000a6266a,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,id_0015fd391,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,id_001626bd3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27791,id_ff7004b87,0.001377,0.001558,0.001461,0.003053,0.006736,0.002350,0.001215,0.003282,0.001440,...,0.001453,0.004201,0.002171,0.118851,0.007058,0.001545,0.005873,0.001801,0.002117,0.002088
27792,id_ff925dd0d,0.003097,0.002258,0.001451,0.007406,0.021323,0.005489,0.005110,0.004447,0.001555,...,0.001218,0.001697,0.002843,0.002327,0.002278,0.001539,0.002104,0.001877,0.001059,0.002098
27793,id_ffb710450,0.001730,0.001327,0.001455,0.009516,0.032527,0.004954,0.002966,0.004133,0.000729,...,0.001056,0.001165,0.002498,0.002348,0.001464,0.001172,0.001288,0.001693,0.001100,0.001546
27794,id_ffbb869f2,0.001697,0.001459,0.001544,0.019809,0.023506,0.004427,0.004441,0.002557,0.001029,...,0.001086,0.000706,0.002753,0.001079,0.001576,0.001093,0.001453,0.001938,0.000678,0.002631


# Preprocessing

In [19]:
train.loc[:, "cp_dose"] = train.loc[:, "cp_dose"].map({"D1": 0, "D2": 1})
test.loc[:, "cp_dose"] = test.loc[:, "cp_dose"].map({"D1": 0, "D2": 1})

In [20]:
train.loc[:, "cp_time"] = train.loc[:, "cp_time"].map({24: 0, 48: 1, 72: 2})
test.loc[:, "cp_time"] = test.loc[:, "cp_time"].map({24: 0, 48: 1, 72: 2})

## Remove ctrl_vehicle



In [21]:
USE_CTRL_VEHICLE = False

if USE_CTRL_VEHICLE:
    train.loc[:, "cp_type"] = train.loc[:, "cp_type"].map({"ctl_vehicle": 0, "trt_cp": 1})
    test.loc[:, "cp_type"] = test.loc[:, "cp_type"].map({"ctl_vehicle": 0, "trt_cp": 1})

else:
    target = target.loc[train["cp_type"] != "ctl_vehicle"].reset_index(drop=True)
    non_target = non_target.loc[train[: train_df.shape[0]]["cp_type"] != "ctl_vehicle"].reset_index(drop=True)

    train = train.loc[train["cp_type"] != "ctl_vehicle"].reset_index(drop=True)

    train = train.drop("cp_type", axis=1)
    test = test.drop("cp_type", axis=1)

## Merge drug_id into training data

https://www.kaggle.com/c/lish-moa/discussion/195195

In [22]:
target_drug = pd.DataFrame(target.loc[:, "sig_id"]).merge(drug, on="sig_id", how="left")
non_target_drug = pd.DataFrame(non_target.loc[:, "sig_id"]).merge(drug, on="sig_id", how="left")

In [23]:
target_drug = target_drug.fillna("xxxxxxxxx")
non_target_drug = non_target_drug.fillna("xxxxxxxxx")

In [24]:
target_drug

Unnamed: 0,sig_id,drug_id
0,id_000644bb2,b68db1d53
1,id_000779bfc,df89a8e5a
2,id_000a6266a,18bb41b2c
3,id_0015fd391,8c7f86626
4,id_001626bd3,7cbed3131
...,...,...
25567,id_ff7004b87,xxxxxxxxx
25568,id_ff925dd0d,xxxxxxxxx
25569,id_ffb710450,xxxxxxxxx
25570,id_ffbb869f2,xxxxxxxxx


## Remove sig_id

In [25]:
del train["sig_id"]
del target["sig_id"]
del non_target["sig_id"]
del test["sig_id"]
del ss["sig_id"]

In [26]:
train

Unnamed: 0,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,...,c-90,c-91,c-92,c-93,c-94,c-95,c-96,c-97,c-98,c-99
0,0,0,1.0620,0.5577,-0.2479,-0.6208,-0.1944,-1.0120,-1.0220,-0.0326,...,0.2862,0.2584,0.8076,0.5523,-0.1912,0.6584,-0.3981,0.2139,0.3801,0.4176
1,2,0,0.0743,0.4087,0.2991,0.0604,1.0190,0.5207,0.2341,0.3372,...,-0.4265,0.7543,0.4708,0.0230,0.2957,0.4899,0.1522,0.1241,0.6077,0.7371
2,1,0,0.6280,0.5817,1.5540,-0.0764,-0.0323,1.2390,0.1715,0.2155,...,-0.7250,-0.6297,0.6103,0.0223,-1.3240,-0.3174,-0.6417,-0.2187,-1.4080,0.6931
3,1,0,-0.5138,-0.2491,-0.2656,0.5288,4.0620,-0.8095,-1.9590,0.1792,...,-2.0990,-0.6441,-5.6300,-1.3780,-0.8632,-1.2880,-1.6210,-0.8784,-0.3876,-0.8154
4,2,1,-0.3254,-0.4009,0.9700,0.6919,1.4180,-0.8244,-0.2800,-0.1498,...,0.0042,0.0048,0.6670,1.0690,0.5523,-0.3031,0.1094,0.2885,-0.3786,0.7125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25567,0,0,0.4571,-0.5743,3.3930,-0.6202,0.8557,1.6240,0.0640,-0.6316,...,-1.1790,-0.6422,-0.4367,0.0159,-0.6539,-0.4791,-1.2680,-1.1280,-0.4167,-0.6600
25568,0,0,-0.5885,-0.2548,2.5850,0.3456,0.4401,0.3107,-0.7437,-0.0143,...,0.0210,0.5780,-0.5888,0.8057,0.9312,1.2730,0.2614,-0.2790,-0.0131,-0.0934
25569,2,0,-0.3985,-0.1554,0.2677,-0.6813,0.0152,0.4791,-0.0166,0.7501,...,0.4418,0.9153,-0.1862,0.4049,0.9568,0.4666,0.0461,0.5888,-0.4205,-0.1504
25570,1,1,-1.0960,-1.7750,-0.3977,1.0160,-1.3350,-0.2207,-0.3611,-1.3020,...,0.3079,-0.4473,-0.8192,0.7785,0.3133,0.1286,-0.2618,0.5074,0.7430,-0.0484


In [27]:
print(train.shape)
print(target.shape)
print(non_target.shape)

print(test.shape)
print(ss.shape)

(25572, 874)
(25572, 206)
(21948, 402)
(3982, 874)
(3982, 206)


## Rank Gauss

https://www.kaggle.com/nayuts/moa-pytorch-nn-pca-rankgauss



In [28]:
g_cols = [col for col in train_df.columns if col.startswith("g-")]
c_cols = [col for col in train_df.columns if col.startswith("c-")]

for col in g_cols + c_cols:
    transformer = QuantileTransformer(n_quantiles=100, random_state=random_seed, output_distribution="normal")

    vec_len = len(train[col].values)
    vec_len_test = len(test[col].values)

    raw_vec = train[col].values.reshape(vec_len, 1)
    transformer.fit(raw_vec)

    train[col] = transformer.transform(raw_vec).reshape(1, vec_len)[0]
    test[col] = transformer.transform(test[col].values.reshape(vec_len_test, 1)).reshape(1, vec_len_test)[0]

In [29]:
train

Unnamed: 0,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,...,c-90,c-91,c-92,c-93,c-94,c-95,c-96,c-97,c-98,c-99
0,0,0,1.124260,0.896698,-0.436214,-0.965311,-0.287443,-1.016437,-1.360774,-0.045876,...,0.428869,0.384250,1.300482,0.879422,-0.206096,1.046155,-0.479268,0.339234,0.583214,0.696712
1,2,0,0.117451,0.667759,0.260124,0.097531,1.204172,0.692876,0.356691,0.559630,...,-0.499745,1.147297,0.728062,0.089253,0.453665,0.770909,0.226300,0.202945,0.955497,1.219730
2,1,0,0.777229,0.935347,1.414044,-0.113563,-0.025489,1.494313,0.277364,0.357917,...,-0.800373,-0.721883,0.960080,0.088259,-1.182700,-0.358059,-0.732238,-0.253014,-1.085791,1.140342
3,1,0,-0.749489,-0.299404,-0.459100,0.774708,2.344556,-0.856449,-2.323390,0.298781,...,-1.391931,-0.736149,-1.612415,-1.219207,-0.912980,-1.194806,-1.288428,-0.950502,-0.445204,-0.884754
4,2,1,-0.460555,-0.508226,0.959313,0.984009,1.451890,-0.867329,-0.342599,-0.234770,...,0.038727,0.021330,1.056779,1.734597,0.843756,-0.341198,0.169668,0.451146,-0.434772,1.174162
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25567,0,0,0.599819,-0.723658,2.333805,-0.964385,1.075072,1.786462,0.131113,-0.812649,...,-1.126325,-0.734273,-0.505298,0.079622,-0.730794,-0.547424,-1.172338,-1.131847,-0.476173,-0.740089
25568,0,0,-0.865375,-0.307222,1.988666,0.523322,0.644383,0.437283,-0.989775,-0.015526,...,0.062985,0.868007,-0.660578,1.300272,1.418983,2.041320,0.385704,-0.325379,0.004373,-0.059129
25569,2,0,-0.575003,-0.170529,0.221620,-1.053665,0.049099,0.641676,0.016354,1.208632,...,0.670012,1.400519,-0.220545,0.641140,1.461002,0.734826,0.081096,0.924382,-0.479982,-0.136455
25570,1,1,-1.635710,-1.976022,-0.636890,1.330359,-1.719197,-0.259467,-0.455103,-1.308468,...,0.463639,-0.527331,-0.857951,1.256633,0.481844,0.236350,-0.320390,0.796356,1.191530,-0.001347


## PCA features (+ Existing features)



In [30]:
# g-
n_comp = 50

data = pd.concat([pd.DataFrame(train[g_cols]), pd.DataFrame(test[g_cols])])
data2 = PCA(n_components=n_comp, random_state=random_seed).fit_transform(data[g_cols])
train2 = data2[: train.shape[0]]
test2 = data2[-test.shape[0] :]

train2 = pd.DataFrame(train2, columns=[f"pca_G-{i}" for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f"pca_G-{i}" for i in range(n_comp)])

train = pd.concat((train, train2), axis=1)
test = pd.concat((test, test2), axis=1)

In [31]:
# c-
n_comp = 15

data = pd.concat([pd.DataFrame(train[c_cols]), pd.DataFrame(test[c_cols])])
data2 = PCA(n_components=n_comp, random_state=random_seed).fit_transform(data[c_cols])
train2 = data2[: train.shape[0]]
test2 = data2[-test.shape[0] :]

train2 = pd.DataFrame(train2, columns=[f"pca_C-{i}" for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f"pca_C-{i}" for i in range(n_comp)])

train = pd.concat((train, train2), axis=1)
test = pd.concat((test, test2), axis=1)

In [32]:
train

Unnamed: 0,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,...,pca_C-5,pca_C-6,pca_C-7,pca_C-8,pca_C-9,pca_C-10,pca_C-11,pca_C-12,pca_C-13,pca_C-14
0,0,0,1.124260,0.896698,-0.436214,-0.965311,-0.287443,-1.016437,-1.360774,-0.045876,...,1.084173,0.499893,0.361410,-0.060848,0.345115,0.430001,0.294952,0.457666,-1.104604,0.746927
1,2,0,0.117451,0.667759,0.260124,0.097531,1.204172,0.692876,0.356691,0.559630,...,-0.644772,-0.072663,0.691390,-0.915782,0.139468,1.039007,0.163256,-0.388631,-1.131308,-0.578330
2,1,0,0.777229,0.935347,1.414044,-0.113563,-0.025489,1.494313,0.277364,0.357917,...,1.014754,-0.962508,1.009455,-0.254046,-0.406054,0.674100,0.071775,0.290638,0.701243,-0.010055
3,1,0,-0.749489,-0.299404,-0.459100,0.774708,2.344556,-0.856449,-2.323390,0.298781,...,0.871548,0.699965,0.872746,-0.628334,0.962443,2.251942,1.342743,-0.423853,-0.559250,0.182839
4,2,1,-0.460555,-0.508226,0.959313,0.984009,1.451890,-0.867329,-0.342599,-0.234770,...,-0.245337,-0.583627,0.459293,0.373390,-0.289508,0.667770,-0.944482,0.211168,-0.347195,0.160099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25567,0,0,0.599819,-0.723658,2.333805,-0.964385,1.075072,1.786462,0.131113,-0.812649,...,1.145251,-0.052962,0.161124,0.193701,0.129756,0.576738,0.291642,-0.598477,-0.108096,1.183038
25568,0,0,-0.865375,-0.307222,1.988666,0.523322,0.644383,0.437283,-0.989775,-0.015526,...,-1.488059,0.039376,1.778791,-0.260386,0.152112,1.573422,0.499272,-1.666410,-0.593174,-1.713381
25569,2,0,-0.575003,-0.170529,0.221620,-1.053665,0.049099,0.641676,0.016354,1.208632,...,0.887606,1.007116,0.347269,0.376352,-1.302986,0.254448,0.505971,0.665237,-0.601036,-0.241065
25570,1,1,-1.635710,-1.976022,-0.636890,1.330359,-1.719197,-0.259467,-0.455103,-1.308468,...,0.258797,0.568508,-0.835311,-0.344547,0.878997,0.730981,-0.063122,-0.039643,0.344824,0.199732


In [33]:
train_pca = train.copy()
test_pca = test.copy()

train_pca.drop(g_cols, axis=1, inplace=True)
test_pca.drop(g_cols, axis=1, inplace=True)

train_pca.drop(c_cols, axis=1, inplace=True)
test_pca.drop(c_cols, axis=1, inplace=True)

In [34]:
train_pca

Unnamed: 0,cp_time,cp_dose,pca_G-0,pca_G-1,pca_G-2,pca_G-3,pca_G-4,pca_G-5,pca_G-6,pca_G-7,...,pca_C-5,pca_C-6,pca_C-7,pca_C-8,pca_C-9,pca_C-10,pca_C-11,pca_C-12,pca_C-13,pca_C-14
0,0,0,-5.778899,6.154613,8.561315,-7.442511,4.386002,1.258147,3.520685,1.828526,...,1.084173,0.499893,0.361410,-0.060848,0.345115,0.430001,0.294952,0.457666,-1.104604,0.746927
1,2,0,-5.035246,1.003536,-12.642795,4.682019,0.934481,0.017921,0.817860,-1.085419,...,-0.644772,-0.072663,0.691390,-0.915782,0.139468,1.039007,0.163256,-0.388631,-1.131308,-0.578330
2,1,0,0.849837,-8.534120,-2.961085,0.234691,0.712903,3.226471,-1.540530,3.543483,...,1.014754,-0.962508,1.009455,-0.254046,-0.406054,0.674100,0.071775,0.290638,0.701243,-0.010055
3,1,0,11.053726,-10.088315,-0.812731,-4.941979,-7.323094,-2.490876,-2.273711,6.357738,...,0.871548,0.699965,0.872746,-0.628334,0.962443,2.251942,1.342743,-0.423853,-0.559250,0.182839
4,2,1,-6.813030,-5.481174,-9.282727,-4.827295,-7.899419,-8.227711,-3.362621,-3.581453,...,-0.245337,-0.583627,0.459293,0.373390,-0.289508,0.667770,-0.944482,0.211168,-0.347195,0.160099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25567,0,0,3.034423,-2.604996,0.378983,1.080481,4.262611,2.492815,3.584576,-0.012864,...,1.145251,-0.052962,0.161124,0.193701,0.129756,0.576738,0.291642,-0.598477,-0.108096,1.183038
25568,0,0,-7.989301,-0.778425,-4.860383,0.376690,-1.113370,-2.287973,-5.796794,1.580867,...,-1.488059,0.039376,1.778791,-0.260386,0.152112,1.573422,0.499272,-1.666410,-0.593174,-1.713381
25569,2,0,-6.872688,6.782780,1.654480,-7.876308,1.163434,2.100182,4.330693,-0.996572,...,0.887606,1.007116,0.347269,0.376352,-1.302986,0.254448,0.505971,0.665237,-0.601036,-0.241065
25570,1,1,-1.134083,-9.890526,11.790893,7.032540,2.695275,-2.669482,2.486436,-0.267855,...,0.258797,0.568508,-0.835311,-0.344547,0.878997,0.730981,-0.063122,-0.039643,0.344824,0.199732


## feature Selection using Variance Encoding



In [35]:
# https://www.kaggle.com/c/lish-moa/discussion/194973#1067941
if False:

    var_threshold = 0.5

    data = train.append(test)
    ve_columns = (data.iloc[:, 2:].var() >= var_threshold).values
    ve_data = data.iloc[:, 2:].loc[:, ve_columns]

    ve_train = ve_data[: train.shape[0]]
    ve_test = ve_data[-test.shape[0] :]

    train = pd.DataFrame(train[["cp_time", "cp_dose"]].values.reshape(-1, 2), columns=["cp_time", "cp_dose"])
    train = pd.concat([train, ve_train], axis=1)

    test = pd.DataFrame(test[["cp_time", "cp_dose"]].values.reshape(-1, 2), columns=["cp_time", "cp_dose"])
    test = pd.concat([test, ve_test], axis=1)

In [36]:
# train

## KMeans

In [37]:
%%time

features_g = [col for col in train.columns if col.startswith("g-")]
features_c = [col for col in train.columns if col.startswith("c-")]


def fe_cluster(train_, test_, n_clusters_g=35, n_clusters_c=5):
    def create_cluster(tr, te, features, kind="g", n_clusters=n_clusters_g):
        tmp_train_ = tr[features].copy()
        tmp_test_ = te[features].copy()
        data = pd.concat([tmp_train_, tmp_test_], axis=0)

        kmeans = KMeans(n_clusters=n_clusters, random_state=random_seed).fit(data)

        tr[f"clusters_{kind}"] = kmeans.labels_[: tr.shape[0]]
        te[f"clusters_{kind}"] = kmeans.labels_[-te.shape[0] :]
        tr = pd.get_dummies(tr, columns=[f"clusters_{kind}"])
        te = pd.get_dummies(te, columns=[f"clusters_{kind}"])
        return tr, te

    train_, test_ = create_cluster(train_, test_, features_g, kind="g", n_clusters=n_clusters_g)
    train_, test_ = create_cluster(train_, test_, features_c, kind="c", n_clusters=n_clusters_c)
    return train_, test_


train, test = fe_cluster(train, test)

CPU times: user 3min 38s, sys: 1min 3s, total: 4min 42s
Wall time: 23.6 s


In [38]:
train

Unnamed: 0,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,...,clusters_g_30,clusters_g_31,clusters_g_32,clusters_g_33,clusters_g_34,clusters_c_0,clusters_c_1,clusters_c_2,clusters_c_3,clusters_c_4
0,0,0,1.124260,0.896698,-0.436214,-0.965311,-0.287443,-1.016437,-1.360774,-0.045876,...,0,0,0,0,0,1,0,0,0,0
1,2,0,0.117451,0.667759,0.260124,0.097531,1.204172,0.692876,0.356691,0.559630,...,0,0,0,1,0,1,0,0,0,0
2,1,0,0.777229,0.935347,1.414044,-0.113563,-0.025489,1.494313,0.277364,0.357917,...,0,0,0,0,0,0,0,0,1,0
3,1,0,-0.749489,-0.299404,-0.459100,0.774708,2.344556,-0.856449,-2.323390,0.298781,...,0,1,0,0,0,0,0,1,0,0
4,2,1,-0.460555,-0.508226,0.959313,0.984009,1.451890,-0.867329,-0.342599,-0.234770,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25567,0,0,0.599819,-0.723658,2.333805,-0.964385,1.075072,1.786462,0.131113,-0.812649,...,0,0,0,0,0,0,0,0,1,0
25568,0,0,-0.865375,-0.307222,1.988666,0.523322,0.644383,0.437283,-0.989775,-0.015526,...,0,0,0,0,0,1,0,0,0,0
25569,2,0,-0.575003,-0.170529,0.221620,-1.053665,0.049099,0.641676,0.016354,1.208632,...,0,0,0,0,0,1,0,0,0,0
25570,1,1,-1.635710,-1.976022,-0.636890,1.330359,-1.719197,-0.259467,-0.455103,-1.308468,...,0,0,0,0,0,0,1,0,0,0


## Basic stats

In [39]:
for stats in ["sum", "mean", "std", "kurt", "skew"]:
    train["g_" + stats] = getattr(train[features_g], stats)(axis=1)
    train["c_" + stats] = getattr(train[features_c], stats)(axis=1)
    train["gc_" + stats] = getattr(train[features_g + features_c], stats)(axis=1)

    test["g_" + stats] = getattr(test[features_g], stats)(axis=1)
    test["c_" + stats] = getattr(test[features_c], stats)(axis=1)
    test["gc_" + stats] = getattr(test[features_g + features_c], stats)(axis=1)

In [40]:
train

Unnamed: 0,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,...,gc_mean,g_std,c_std,gc_std,g_kurt,c_kurt,gc_kurt,g_skew,c_skew,gc_skew
0,0,0,1.124260,0.896698,-0.436214,-0.965311,-0.287443,-1.016437,-1.360774,-0.045876,...,0.050884,0.868307,0.731294,0.869209,-0.270006,-0.321285,-0.270608,0.019115,0.073814,-0.015508
1,2,0,0.117451,0.667759,0.260124,0.097531,1.204172,0.692876,0.356691,0.559630,...,0.062270,0.850889,0.608372,0.842821,-0.217545,0.088938,-0.233240,0.045890,-0.163448,-0.041249
2,1,0,0.777229,0.935347,1.414044,-0.113563,-0.025489,1.494313,0.277364,0.357917,...,-0.038900,0.941310,0.665178,0.914129,-0.356922,-0.182024,-0.286903,-0.044156,0.385872,-0.008376
3,1,0,-0.749489,-0.299404,-0.459100,0.774708,2.344556,-0.856449,-2.323390,0.298781,...,-0.136704,1.080671,0.576449,1.088267,-0.918764,3.952398,-0.959980,0.086528,1.953350,0.245358
4,2,1,-0.460555,-0.508226,0.959313,0.984009,1.451890,-0.867329,-0.342599,-0.234770,...,0.020415,1.103348,0.677183,1.070526,-0.214614,-0.723722,-0.102022,-0.187344,0.076016,-0.251234
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25567,0,0,0.599819,-0.723658,2.333805,-0.964385,1.075072,1.786462,0.131113,-0.812649,...,-0.105162,0.778264,0.472869,0.783517,-0.299977,0.946507,-0.386378,0.085385,0.903288,0.210787
25568,0,0,-0.865375,-0.307222,1.988666,0.523322,0.644383,0.437283,-0.989775,-0.015526,...,0.084645,0.737451,0.794342,0.776206,-0.242835,-0.665963,-0.291044,0.075617,-0.237591,0.122949
25569,2,0,-0.575003,-0.170529,0.221620,-1.053665,0.049099,0.641676,0.016354,1.208632,...,-0.007073,0.817300,0.636256,0.822376,-0.345356,-0.251386,-0.375475,0.012237,-0.120099,-0.052690
25570,1,1,-1.635710,-1.976022,-0.636890,1.330359,-1.719197,-0.259467,-0.455103,-1.308468,...,0.139976,0.988002,0.631845,0.953971,-0.607105,-0.670234,-0.498840,-0.313186,0.111856,-0.319974


# Model - Simple NN

In [41]:
def create_model_simple_nn(num_col, output_dim):
    model = tf.keras.Sequential(
        [
            L.Input(num_col),
            L.BatchNormalization(),
            L.Dropout(0.4),
            tfa.layers.WeightNormalization(L.Dense(256, activation="elu")),
            L.BatchNormalization(),
            L.Dropout(0.3),
            tfa.layers.WeightNormalization(L.Dense(256, activation="swish")),
            L.BatchNormalization(),
            L.Dropout(0.3),
            tfa.layers.WeightNormalization(L.Dense(128, activation="selu")),
            L.BatchNormalization(),
            L.Dense(output_dim),
        ]
    )

    return model

# Model - Multi input ResNet

https://www.kaggle.com/rahulsd91/moa-multi-input-resnet-model

In [42]:
def create_model_resnet(n_features, n_features_2, n_labels):
    input_1 = L.Input(shape=(n_features,), name="Input1")
    input_2 = L.Input(shape=(n_features_2,), name="Input2")

    head_1 = tf.keras.Sequential(
        [
            L.BatchNormalization(),
            L.Dropout(0.2),
            tfa.layers.WeightNormalization(L.Dense(256, activation="selu")),
            L.BatchNormalization(),
            L.Dropout(0.2),
            tfa.layers.WeightNormalization(L.Dense(1024, activation="swish")),
        ],
        name="Head1",
    )

    input_3 = head_1(input_1)
    input_3_concat = L.Concatenate()([input_2, input_3])

    head_2 = tf.keras.Sequential(
        [
            L.BatchNormalization(),
            L.Dropout(0.2),
            tfa.layers.WeightNormalization(L.Dense(256, activation="swish")),
            L.BatchNormalization(),
            L.Dropout(0.2),
            tfa.layers.WeightNormalization(L.Dense(512, activation="relu")),
            L.BatchNormalization(),
            L.Dropout(0.2),
            tfa.layers.WeightNormalization(L.Dense(1024, activation="relu")),
        ],
        name="Head2",
    )

    input_4 = head_2(input_3_concat)
    input_4_avg = L.Average()([input_3, input_4])

    head_3 = tf.keras.Sequential(
        [
            L.BatchNormalization(),
            tfa.layers.WeightNormalization(L.Dense(128, activation="relu")),
            L.BatchNormalization(),
            L.Dropout(0.2),
            tfa.layers.WeightNormalization(L.Dense(128, activation="swish")),
            L.BatchNormalization(),
            # L.Dense(n_labels, activation="sigmoid"),
            L.Dense(n_labels),  # from_logits=True
        ],
        name="Head3",
    )

    output = head_3(input_4_avg)

    model = tf.keras.models.Model(inputs=[input_1, input_2], outputs=output)

    return model

# Model - TabNet

In [43]:
def create_model_tabnet(seed, params=None):
    if params is None:
        tabnet_params = dict(
            n_d=32,
            n_a=32,
            n_steps=1,
            n_independent=1,
            n_shared=1,
            gamma=1.3,
            lambda_sparse=0,
            optimizer_fn=optim.Adam,
            optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
            # optimizer_fn=AdaBelief,
            # optimizer_params=dict(lr=2e-2, weight_decay=1e-5, weight_decouple=False),
            mask_type="entmax",
            scheduler_params=dict(mode="min", patience=5, min_lr=1e-5, threshold=1e-5, factor=0.1),
            scheduler_fn=torch_ReduceLROnPlateau,
            seed=seed,
            verbose=0,
        )

    else:
        tabnet_params = dict(
            n_d=32,
            n_a=32,
            n_steps=1,
            n_independent=1,
            n_shared=1,
            gamma=1.3,
            momentum=params["momentum"],
            #clip_value=params["clip_value"],
            #lambda_sparse=params["lambda_sparse"],
            lambda_sparse=0,
            optimizer_fn=optim.Adam,
            optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
            # optimizer_fn=AdaBelief,
            # optimizer_params=dict(lr=2e-2, weight_decay=1e-5, weight_decouple=False),
            mask_type="entmax",
            scheduler_params=dict(mode="min", patience=5, min_lr=1e-5, threshold=1e-5, factor=0.1),
            scheduler_fn=torch_ReduceLROnPlateau,
            seed=seed,
            verbose=0,
        )
        
        
    model = TabNetRegressor(**tabnet_params)

    return model

# Model - NODE

Neural Oblivious Decision Ensembles

https://www.kaggle.com/gogo827jz/moa-neural-oblivious-decision-ensembles-tf-keras

In [44]:
@tf.function
def sparsemoid(inputs: tf.Tensor):
    return tf.clip_by_value(0.5 * inputs + 0.5, 0.0, 1.0)


@tf.function
def identity(x: tf.Tensor):
    return x

In [45]:
class ODST(L.Layer):
    def __init__(self, n_trees: int = 3, depth: int = 4, units: int = 1, threshold_init_beta: float = 1.0):
        super(ODST, self).__init__()
        self.initialized = False
        self.n_trees = n_trees
        self.depth = depth
        self.units = units
        self.threshold_init_beta = threshold_init_beta

    def build(self, input_shape: tf.TensorShape):
        feature_selection_logits_init = tf.zeros_initializer()
        self.feature_selection_logits = tf.Variable(
            initial_value=feature_selection_logits_init(
                shape=(input_shape[-1], self.n_trees, self.depth), dtype="float32"
            ),
            trainable=True,
            name="feature_selection_logits",
        )

        feature_thresholds_init = tf.zeros_initializer()
        self.feature_thresholds = tf.Variable(
            initial_value=feature_thresholds_init(shape=(self.n_trees, self.depth), dtype="float32"),
            trainable=True,
            name="feature_thresholds",
        )

        log_temperatures_init = tf.ones_initializer()
        self.log_temperatures = tf.Variable(
            initial_value=log_temperatures_init(shape=(self.n_trees, self.depth), dtype="float32"),
            trainable=True,
            name="log_temperatures",
        )

        indices = K.arange(0, 2 ** self.depth, 1)
        offsets = 2 ** K.arange(0, self.depth, 1)
        bin_codes = tf.reshape(indices, (1, -1)) // tf.reshape(offsets, (-1, 1)) % 2
        bin_codes_1hot = tf.stack([bin_codes, 1 - bin_codes], axis=-1)
        self.bin_codes_1hot = tf.Variable(
            initial_value=tf.cast(bin_codes_1hot, "float32"), trainable=False, name="bin_codes_1hot"
        )

        response_init = tf.ones_initializer()
        self.response = tf.Variable(
            initial_value=response_init(shape=(self.n_trees, self.units, 2 ** self.depth), dtype="float32"),
            trainable=True,
            name="response",
        )

    def initialize(self, inputs):
        feature_values = self.feature_values(inputs)

        # intialize feature_thresholds
        percentiles_q = 100 * tfp_distributions.Beta(self.threshold_init_beta, self.threshold_init_beta).sample(
            [self.n_trees * self.depth]
        )
        flattened_feature_values = tf.map_fn(K.flatten, feature_values)
        init_feature_thresholds = tf.linalg.diag_part(
            tfp_stats.percentile(flattened_feature_values, percentiles_q, axis=0)
        )

        self.feature_thresholds.assign(tf.reshape(init_feature_thresholds, self.feature_thresholds.shape))

        # intialize log_temperatures
        self.log_temperatures.assign(
            tfp_stats.percentile(tf.math.abs(feature_values - self.feature_thresholds), 50, axis=0)
        )

    def feature_values(self, inputs: tf.Tensor, training: bool = None):
        feature_selectors = tfa.activations.sparsemax(self.feature_selection_logits)
        # ^--[in_features, n_trees, depth]

        feature_values = tf.einsum("bi,ind->bnd", inputs, feature_selectors)
        # ^--[batch_size, n_trees, depth]

        return feature_values

    def call(self, inputs: tf.Tensor, training: bool = None):
        if not self.initialized:
            self.initialize(inputs)
            self.initialized = True

        feature_values = self.feature_values(inputs)

        threshold_logits_a = (feature_values - self.feature_thresholds) * tf.math.exp(-self.log_temperatures)

        threshold_logits_b = tf.stack([-threshold_logits_a, threshold_logits_a], axis=-1)
        # ^--[batch_size, n_trees, depth, 2]

        bins = sparsemoid(threshold_logits_b)
        # ^--[batch_size, n_trees, depth, 2], approximately binary

        bin_matches = tf.einsum("btds,dcs->btdc", bins, self.bin_codes_1hot)
        # ^--[batch_size, n_trees, depth, 2 ** depth]

        response_weights = tf.math.reduce_prod(bin_matches, axis=-2)
        # ^-- [batch_size, n_trees, 2 ** depth]

        response = tf.einsum("bnd,ncd->bnc", response_weights, self.response)
        # ^-- [batch_size, n_trees, units]

        return tf.reduce_sum(response, axis=1)

In [46]:
class NODE(tf.keras.Model):
    def __init__(
        self,
        units: int = 1,
        n_layers: int = 1,
        output_dim=1,
        dropout_rate=0.1,
        link: tf.function = tf.identity,
        n_trees: int = 3,
        depth: int = 4,
        threshold_init_beta: float = 1.0,
        feature_column: Optional[L.DenseFeatures] = None,
    ):
        super(NODE, self).__init__()
        self.units = units
        self.n_layers = n_layers
        self.n_trees = n_trees
        self.depth = depth
        self.units = units
        self.threshold_init_beta = threshold_init_beta
        self.feature_column = feature_column
        self.dropout_rate = dropout_rate
        self.output_dim = output_dim

        if feature_column is None:
            self.feature = L.Lambda(identity)
        else:
            self.feature = feature_column

        self.bn = [L.BatchNormalization() for _ in range(n_layers + 1)]
        self.dropout = [L.Dropout(self.dropout_rate) for _ in range(n_layers + 1)]
        self.ensemble = [
            ODST(n_trees=n_trees, depth=depth, units=units, threshold_init_beta=threshold_init_beta)
            for _ in range(n_layers)
        ]

        self.last_layer = L.Dense(self.output_dim)

        self.link = link

    def call(self, inputs, training=None):
        X_a = self.feature(inputs)
        X_b = self.bn[0](X_a, training=training)
        X_c = self.dropout[0](X_b, training=training)

        X = defaultdict(dict)
        X[0][0] = X_c
        for i, tree in enumerate(self.ensemble):
            X[i][1] = tf.concat([X[i][0], tree(X[i][0])], axis=1)
            X[i][2] = self.bn[i + 1](X[i][1], training=training)
            X[i + 1][0] = self.dropout[i + 1](X[i][2], training=training)

        return self.link(self.last_layer(X[i + 1][0]))

In [47]:
def create_model_node(output_dim):
    model = tf.keras.Sequential(
        [
            NODE(
                n_layers=2,
                units=128,
                output_dim=128,
                dropout_rate=0.2,
                depth=3,
                n_trees=2,
            ),
            L.BatchNormalization(),
            L.Dropout(0.2),
            tfa.layers.WeightNormalization(L.Dense(128, activation="elu")),
            L.BatchNormalization(),
            L.Dropout(0.2),
            tfa.layers.WeightNormalization(L.Dense(128, activation="swish")),
            L.BatchNormalization(),
            L.Dense(output_dim),  # from_logits=True
        ]
    )

    return model

# Training

In [48]:
models = ["TabNet"]
N_STARTS = len(models) * 2
N_SPLITS = 5

if IN_COLAB:
    models = ["SimpleNN", "ResNet", "TabNet", "NODE"]
    N_STARTS = len(models) * 1
    N_SPLITS = 5

In [49]:
pre_train_models = ["ResNet", "SimpleNN"]

In [50]:
SAVE_MODEL = False


def learning(
    train_,
    train_pca_,
    target_,
    drug_,
    N_STARTS=6,
    N_SPLITS=5,
    do_predict=False,
    transfer_learning_base=None,
    pseudo_labeling=False,
    params=None,
):
    oof = {}
    predictions = {}

    for seed in range(N_STARTS):
        model_name = models[seed % len(models)]

        if not do_predict and model_name not in pre_train_models:
            continue

        seed_result = pd.DataFrame(np.zeros(target_.shape))
        prediction = pd.DataFrame(np.zeros(ss.shape))

        if pseudo_labeling:
            kfold_seed = random_seed * 10 + seed
        elif do_predict:
            kfold_seed = random_seed + seed
        else:
            kfold_seed = seed

        fix_seed(kfold_seed)

        if "fold" in drug_.columns:
            drug_.drop(["fold"], axis=1, inplace=True)

        # LOCATE DRUGS
        vc = drug_.drug_id.value_counts()
        vc1 = vc.loc[(vc == 6) | (vc == 12) | (vc == 18)].index.sort_values()
        vc2 = vc.loc[(vc != 6) & (vc != 12) & (vc != 18)].index.sort_values()

        dct1 = {}
        dct2 = {}

        # STRATIFY DRUGS 18X OR LESS
        skf = MultilabelStratifiedKFold(n_splits=N_SPLITS, random_state=kfold_seed, shuffle=True)
        tmp = pd.concat([drug_, target_], axis=1).groupby("drug_id").mean().loc[vc1]
        for fold, (idxT, idxV) in enumerate(skf.split(tmp, tmp)):
            dd = {k: fold for k in tmp.index[idxV].values}
            dct1.update(dd)

        # STRATIFY DRUGS MORE THAN 18X
        skf = MultilabelStratifiedKFold(n_splits=N_SPLITS, random_state=kfold_seed, shuffle=True)
        tmp = drug_.loc[drug_.drug_id.isin(vc2)].reset_index(drop=True)
        for fold, (idxT, idxV) in enumerate(skf.split(tmp, tmp)):
            dd = {k: fold for k in tmp.sig_id[idxV].values}
            dct2.update(dd)

        # ASSIGN FOLDS
        drug_["fold"] = drug_.drug_id.map(dct1)
        drug_.loc[drug_.fold.isna(), "fold"] = drug_.loc[drug_.fold.isna(), "sig_id"].map(dct2)
        drug_.fold = drug_.fold.astype("int8")

        for n in range(N_SPLITS):
            tr = drug_[drug_["fold"] != n].index
            te = drug_[drug_["fold"] == n].index

            start_time = time()

            # Build Model
            if model_name == "ResNet":
                model = create_model_resnet(len(train_.columns), len(train_pca_.columns), len(target_.columns))

                if transfer_learning_base is not None:
                    model_base = create_model_resnet(
                        len(train_.columns), len(train_pca_.columns), len(transfer_learning_base.columns)
                    )

            elif model_name == "SimpleNN":
                model = create_model_simple_nn(len(train_.columns), len(target_.columns))

                if transfer_learning_base is not None:
                    model_base = create_model_simple_nn(len(train_.columns), len(transfer_learning_base.columns))

            elif model_name == "TabNet":
                model = create_model_tabnet(kfold_seed, params)

            elif model_name == "NODE":
                model = create_model_node(len(target_.columns))

                if transfer_learning_base is not None:
                    model_base = create_model_node(len(transfer_learning_base.columns))

            else:
                raise "Model name is invalid."

            # Build Data Sets
            if model_name == "ResNet":
                x_tr = [
                    train_.values[tr],
                    train_pca_.values[tr],
                ]
                x_val = [
                    train_.values[te],
                    train_pca_.values[te],
                ]
                y_tr, y_val = target_.astype(float).values[tr], target_.astype(float).values[te]
                x_tt = [test.values, test_pca.values]

            else:
                x_tr, x_val = train_.values[tr], train_.values[te]
                y_tr, y_val = target_.astype(float).values[tr], target_.astype(float).values[te]
                x_tt = test.values

            if model_name == "TabNet":
                checkpoint_path = f"{model_name}_repeat:{seed}_fold:{n}"

                if transfer_learning_base is not None and model_name in pre_train_models:
                    model.load_model(checkpoint_path + ".zip")

                model.fit(
                    X_train=x_tr,
                    y_train=y_tr,
                    eval_set=[(x_val, y_val)],
                    eval_name=["val"],
                    eval_metric=["logits_ll"],
                    max_epochs=200,
                    patience=10,
                    batch_size=1024,
                    virtual_batch_size=32,
                    num_workers=1,
                    drop_last=False,
                    # loss_fn=F.binary_cross_entropy_with_logits,
                    loss_fn=SmoothBCEwLogits(smoothing=1e-6),
                )

                if SAVE_MODEL:
                    try:
                        os.remove(checkpoint_path)
                    except OSError:
                        pass
                    model.save_model(checkpoint_path)

            else:
                model.compile(
                    optimizer=tfa.optimizers.AdamW(lr=1e-3, weight_decay=1e-5),
                    # optimizer=AdaBeliefOptimizer(lr=1e-3, weight_decay=1e-5),
                    # loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True, label_smoothing=1e-6),
                    metrics=logloss,
                )

                checkpoint_path = f"{model_name}_repeat:{seed}_fold:{n}.hdf5"

                if transfer_learning_base is not None and model_name in pre_train_models:
                    model_base.load_weights(checkpoint_path)
                    for layer in range(len(model_base.layers[:-1])):
                        model.layers[layer].set_weights(model_base.layers[layer].get_weights())

                if SAVE_MODEL:
                    cb_checkpt = ModelCheckpoint(
                        checkpoint_path,
                        monitor="val_loss",
                        verbose=0,
                        save_best_only=True,
                        save_weights_only=True,
                        mode="min",
                    )
                reduce_lr_loss = ReduceLROnPlateau(
                    monitor="val_loss", factor=0.1, patience=5, verbose=0, min_delta=1e-5, min_lr=1e-5, mode="min"
                )
                early_stopping = EarlyStopping(
                    monitor="val_loss",
                    patience=10,
                    mode="min",
                    verbose=0,
                    min_delta=1e-5,
                    restore_best_weights=True,
                )
                if SAVE_MODEL:
                    callbacks = [cb_checkpt, reduce_lr_loss, early_stopping]
                else:
                    callbacks = [reduce_lr_loss, early_stopping]
                model.fit(
                    x_tr,
                    y_tr,
                    validation_data=(x_val, y_val),
                    epochs=200,
                    batch_size=128,
                    callbacks=callbacks,
                    verbose=0,
                )

            val_predict = model.predict(x_val)
            val_predict = 1 / (1 + np.exp(-val_predict))
            seed_result.loc[te, :] += val_predict

            if do_predict:
                test_predict = model.predict(x_tt)
                test_predict = 1 / (1 + np.exp(-test_predict))
                prediction += test_predict / N_SPLITS

            if model_name == "TabNet":
                fold_score = np.min(model.history["val_logits_ll"])
            else:
                fold_score = metric(target_.loc[te].values, val_predict)

            print(
                f"[{str(datetime.timedelta(seconds = time() - start_time))[2:7]}] {model_name}: Seed {seed}, Fold {n}:",
                fold_score,
            )

            K.clear_session()
            del model
            x = gc.collect()

        oof[f"{model_name}_{seed}"] = seed_result
        predictions[f"{model_name}_{seed}"] = prediction

    return oof, predictions

## Hyper parameter tuning

In [51]:
class Objective:
    def __init__(self):
        self.best_cv = None
        self.best_auc = None

        self.cv = None
        self.auc = None

    def __call__(self, trial):

        params = {
            #"gamma": trial.suggest_float("gamma", 1.0, 2.0),
            "momentum": trial.suggest_float("momentum", 0.01, 0.4, log=True),
            #"clip_value": trial.suggest_float("clip_value", 1e-15, 1e-5, log=True),
            #"lambda_sparse": trial.suggest_float("lambda_sparse", 1e-15, 1e-2, log=True),
        }

        oof, predictions = learning(
            train,
            train_pca,
            target,
            target_drug,
            N_STARTS,
            N_SPLITS,
            do_predict=True,
            transfer_learning_base=None,
            pseudo_labeling=False,
            params=params,
        )

        initial_weights = [1.0 / N_STARTS for _ in range(N_STARTS)] + [1.0]
        y_true = target.values[: non_target.shape[0]]

        self.cv, self.auc = cross_validation(y_true.shape, initial_weights[:-1], y_true, oof)

        return self.cv

    def callback(self, study, trial):
        if study.best_trial == trial:
            self.best_cv = self.cv
            self.best_auc = self.auc

In [52]:
TUNING = True

if TUNING:
    objective = Objective()
    study = optuna.create_study()
    study.optimize(objective, n_trials=10, callbacks=[objective.callback])

[32m[I 2020-11-10 21:42:55,489][0m A new study created in memory with name: no-name-85924a79-f2ef-423d-8c4f-2f4a9153d558[0m


Device used : cuda

Early stopping occured at epoch 64 with best_epoch = 54 and best_val_logits_ll = 0.01742
Best weights from best epoch are automatically used!
[02:25] TabNet: Seed 0, Fold 0: 0.017423659846211904
Device used : cuda

Early stopping occured at epoch 55 with best_epoch = 45 and best_val_logits_ll = 0.0181
Best weights from best epoch are automatically used!
[02:03] TabNet: Seed 0, Fold 1: 0.018100558005893743
Device used : cuda

Early stopping occured at epoch 44 with best_epoch = 34 and best_val_logits_ll = 0.01784
Best weights from best epoch are automatically used!
[01:41] TabNet: Seed 0, Fold 2: 0.017835877647901604
Device used : cuda

Early stopping occured at epoch 51 with best_epoch = 41 and best_val_logits_ll = 0.01749
Best weights from best epoch are automatically used!
[01:55] TabNet: Seed 0, Fold 3: 0.017491292761177592
Device used : cuda

Early stopping occured at epoch 43 with best_epoch = 33 and best_val_logits_ll = 0.0179
Best weights from best epoch are 

[32m[I 2020-11-10 22:01:29,401][0m Trial 0 finished with value: 0.016984424275656267 and parameters: {'momentum': 0.29268395624501736}. Best is trial 0 with value: 0.016984424275656267.[0m


Blended CV: 0.016984424275656267, AUC : 0.6569430066325342
Device used : cuda

Early stopping occured at epoch 64 with best_epoch = 54 and best_val_logits_ll = 0.01742
Best weights from best epoch are automatically used!
[02:23] TabNet: Seed 0, Fold 0: 0.01741712532002623
Device used : cuda

Early stopping occured at epoch 64 with best_epoch = 54 and best_val_logits_ll = 0.018
Best weights from best epoch are automatically used!
[02:23] TabNet: Seed 0, Fold 1: 0.0179968249720276
Device used : cuda

Early stopping occured at epoch 60 with best_epoch = 50 and best_val_logits_ll = 0.01779
Best weights from best epoch are automatically used!
[02:16] TabNet: Seed 0, Fold 2: 0.01779412864336453
Device used : cuda

Early stopping occured at epoch 51 with best_epoch = 41 and best_val_logits_ll = 0.01748
Best weights from best epoch are automatically used!
[01:55] TabNet: Seed 0, Fold 3: 0.017481137356170344
Device used : cuda

Early stopping occured at epoch 43 with best_epoch = 33 and best_va

[32m[I 2020-11-10 22:21:38,479][0m Trial 1 finished with value: 0.01694232257639946 and parameters: {'momentum': 0.15378276342882152}. Best is trial 1 with value: 0.01694232257639946.[0m


Blended CV: 0.01694232257639946, AUC : 0.6591064630666648
Device used : cuda

Early stopping occured at epoch 64 with best_epoch = 54 and best_val_logits_ll = 0.01742
Best weights from best epoch are automatically used!
[02:23] TabNet: Seed 0, Fold 0: 0.01741780710359035
Device used : cuda

Early stopping occured at epoch 64 with best_epoch = 54 and best_val_logits_ll = 0.018
Best weights from best epoch are automatically used!
[02:23] TabNet: Seed 0, Fold 1: 0.018003376501921756
Device used : cuda

Early stopping occured at epoch 44 with best_epoch = 34 and best_val_logits_ll = 0.01784
Best weights from best epoch are automatically used!
[01:41] TabNet: Seed 0, Fold 2: 0.017838837168047936
Device used : cuda

Early stopping occured at epoch 51 with best_epoch = 41 and best_val_logits_ll = 0.01748
Best weights from best epoch are automatically used!
[01:55] TabNet: Seed 0, Fold 3: 0.017482528861484346
Device used : cuda

Early stopping occured at epoch 43 with best_epoch = 33 and best_

[32m[I 2020-11-10 22:41:09,033][0m Trial 2 finished with value: 0.016948085385603377 and parameters: {'momentum': 0.1890401298637486}. Best is trial 1 with value: 0.01694232257639946.[0m


Blended CV: 0.016948085385603377, AUC : 0.6592981999424258
Device used : cuda

Early stopping occured at epoch 53 with best_epoch = 43 and best_val_logits_ll = 0.01744
Best weights from best epoch are automatically used!
[01:59] TabNet: Seed 0, Fold 0: 0.01744072140636863
Device used : cuda

Early stopping occured at epoch 48 with best_epoch = 38 and best_val_logits_ll = 0.01808
Best weights from best epoch are automatically used!
[01:49] TabNet: Seed 0, Fold 1: 0.018081774804802244
Device used : cuda

Early stopping occured at epoch 59 with best_epoch = 49 and best_val_logits_ll = 0.01777
Best weights from best epoch are automatically used!
[02:14] TabNet: Seed 0, Fold 2: 0.017765096029210687
Device used : cuda

Early stopping occured at epoch 51 with best_epoch = 41 and best_val_logits_ll = 0.01748
Best weights from best epoch are automatically used!
[01:55] TabNet: Seed 0, Fold 3: 0.017477945588201576
Device used : cuda

Early stopping occured at epoch 43 with best_epoch = 33 and be

[32m[I 2020-11-10 23:00:43,503][0m Trial 3 finished with value: 0.01694508309090747 and parameters: {'momentum': 0.031070908552115167}. Best is trial 1 with value: 0.01694232257639946.[0m


Blended CV: 0.01694508309090747, AUC : 0.6576024339261282
Device used : cuda

Early stopping occured at epoch 53 with best_epoch = 43 and best_val_logits_ll = 0.01744
Best weights from best epoch are automatically used!
[01:59] TabNet: Seed 0, Fold 0: 0.01743521205393178
Device used : cuda

Early stopping occured at epoch 48 with best_epoch = 38 and best_val_logits_ll = 0.01806
Best weights from best epoch are automatically used!
[01:48] TabNet: Seed 0, Fold 1: 0.01806376934817615
Device used : cuda

Early stopping occured at epoch 42 with best_epoch = 32 and best_val_logits_ll = 0.0181
Best weights from best epoch are automatically used!
[01:36] TabNet: Seed 0, Fold 2: 0.018101956397521973
Device used : cuda

Early stopping occured at epoch 51 with best_epoch = 41 and best_val_logits_ll = 0.01748
Best weights from best epoch are automatically used!
[01:55] TabNet: Seed 0, Fold 3: 0.01747822855824039
Device used : cuda

Early stopping occured at epoch 41 with best_epoch = 31 and best_v

[32m[I 2020-11-10 23:19:42,356][0m Trial 4 finished with value: 0.016982368184015096 and parameters: {'momentum': 0.010235651499991142}. Best is trial 1 with value: 0.01694232257639946.[0m


Blended CV: 0.016982368184015096, AUC : 0.6558332730249985
Device used : cuda

Early stopping occured at epoch 53 with best_epoch = 43 and best_val_logits_ll = 0.01744
Best weights from best epoch are automatically used!
[01:59] TabNet: Seed 0, Fold 0: 0.017444907224682154
Device used : cuda

Early stopping occured at epoch 48 with best_epoch = 38 and best_val_logits_ll = 0.01809
Best weights from best epoch are automatically used!
[01:48] TabNet: Seed 0, Fold 1: 0.0180936974025046
Device used : cuda

Early stopping occured at epoch 61 with best_epoch = 51 and best_val_logits_ll = 0.01776
Best weights from best epoch are automatically used!
[02:19] TabNet: Seed 0, Fold 2: 0.017762933393391238
Device used : cuda

Early stopping occured at epoch 51 with best_epoch = 41 and best_val_logits_ll = 0.01748
Best weights from best epoch are automatically used!
[01:55] TabNet: Seed 0, Fold 3: 0.017477497928334917
Device used : cuda

Early stopping occured at epoch 43 with best_epoch = 33 and bes

[32m[I 2020-11-10 23:39:44,426][0m Trial 5 finished with value: 0.01694198887055702 and parameters: {'momentum': 0.05431806922788371}. Best is trial 5 with value: 0.01694198887055702.[0m


Blended CV: 0.01694198887055702, AUC : 0.6581405806997269
Device used : cuda

Early stopping occured at epoch 53 with best_epoch = 43 and best_val_logits_ll = 0.01745
Best weights from best epoch are automatically used!
[02:00] TabNet: Seed 0, Fold 0: 0.017447682586906384
Device used : cuda

Early stopping occured at epoch 48 with best_epoch = 38 and best_val_logits_ll = 0.0181
Best weights from best epoch are automatically used!
[01:49] TabNet: Seed 0, Fold 1: 0.018103830190167976
Device used : cuda

Early stopping occured at epoch 61 with best_epoch = 51 and best_val_logits_ll = 0.01776
Best weights from best epoch are automatically used!
[02:18] TabNet: Seed 0, Fold 2: 0.01776130960657588
Device used : cuda

Early stopping occured at epoch 51 with best_epoch = 41 and best_val_logits_ll = 0.01748
Best weights from best epoch are automatically used!
[01:56] TabNet: Seed 0, Fold 3: 0.017478061522146517
Device used : cuda

Early stopping occured at epoch 43 with best_epoch = 33 and best

[32m[I 2020-11-10 23:59:49,239][0m Trial 6 finished with value: 0.01694350292524751 and parameters: {'momentum': 0.07188160708793553}. Best is trial 5 with value: 0.01694198887055702.[0m


Blended CV: 0.01694350292524751, AUC : 0.6583012141121448
Device used : cuda

Early stopping occured at epoch 53 with best_epoch = 43 and best_val_logits_ll = 0.01744
Best weights from best epoch are automatically used!
[01:59] TabNet: Seed 0, Fold 0: 0.01744349306682438
Device used : cuda

Early stopping occured at epoch 48 with best_epoch = 38 and best_val_logits_ll = 0.01809
Best weights from best epoch are automatically used!
[01:49] TabNet: Seed 0, Fold 1: 0.018089347817923616
Device used : cuda

Early stopping occured at epoch 61 with best_epoch = 51 and best_val_logits_ll = 0.01776
Best weights from best epoch are automatically used!
[02:18] TabNet: Seed 0, Fold 2: 0.01776437455228944
Device used : cuda

Early stopping occured at epoch 51 with best_epoch = 41 and best_val_logits_ll = 0.01748
Best weights from best epoch are automatically used!
[01:55] TabNet: Seed 0, Fold 3: 0.017477431327740017
Device used : cuda

Early stopping occured at epoch 43 with best_epoch = 33 and best

[32m[I 2020-11-11 00:19:51,929][0m Trial 7 finished with value: 0.01694150818808516 and parameters: {'momentum': 0.04595396317643044}. Best is trial 7 with value: 0.01694150818808516.[0m


Blended CV: 0.01694150818808516, AUC : 0.6580535528542667
Device used : cuda

Early stopping occured at epoch 64 with best_epoch = 54 and best_val_logits_ll = 0.01742
Best weights from best epoch are automatically used!
[02:23] TabNet: Seed 0, Fold 0: 0.01741930470831168
Device used : cuda

Early stopping occured at epoch 64 with best_epoch = 54 and best_val_logits_ll = 0.01802
Best weights from best epoch are automatically used!
[02:23] TabNet: Seed 0, Fold 1: 0.018015048568762584
Device used : cuda

Early stopping occured at epoch 44 with best_epoch = 34 and best_val_logits_ll = 0.01784
Best weights from best epoch are automatically used!
[01:40] TabNet: Seed 0, Fold 2: 0.017837226464877686
Device used : cuda

Early stopping occured at epoch 51 with best_epoch = 41 and best_val_logits_ll = 0.01748
Best weights from best epoch are automatically used!
[01:55] TabNet: Seed 0, Fold 3: 0.01748497125693295
Device used : cuda

Early stopping occured at epoch 43 with best_epoch = 33 and best

[32m[I 2020-11-11 00:39:59,827][0m Trial 8 finished with value: 0.016938784931317234 and parameters: {'momentum': 0.23020347062693788}. Best is trial 8 with value: 0.016938784931317234.[0m


Blended CV: 0.016938784931317234, AUC : 0.6598071730462464
Device used : cuda

Early stopping occured at epoch 64 with best_epoch = 54 and best_val_logits_ll = 0.01742
Best weights from best epoch are automatically used!
[02:24] TabNet: Seed 0, Fold 0: 0.017418370602983946
Device used : cuda

Early stopping occured at epoch 64 with best_epoch = 54 and best_val_logits_ll = 0.01801
Best weights from best epoch are automatically used!
[02:25] TabNet: Seed 0, Fold 1: 0.018007989584697164
Device used : cuda

Early stopping occured at epoch 44 with best_epoch = 34 and best_val_logits_ll = 0.01784
Best weights from best epoch are automatically used!
[01:40] TabNet: Seed 0, Fold 2: 0.017838060388685864
Device used : cuda

Early stopping occured at epoch 51 with best_epoch = 41 and best_val_logits_ll = 0.01748
Best weights from best epoch are automatically used!
[01:55] TabNet: Seed 0, Fold 3: 0.017483465215465746
Device used : cuda

Early stopping occured at epoch 43 with best_epoch = 33 and b

[32m[I 2020-11-11 01:00:09,655][0m Trial 9 finished with value: 0.01693749287842033 and parameters: {'momentum': 0.20723721333453135}. Best is trial 9 with value: 0.01693749287842033.[0m


Blended CV: 0.01693749287842033, AUC : 0.6597277341538852


In [53]:
if TUNING:
    print("Best trial:")
    trial = study.best_trial

    print("  CV:  {}".format(trial.value))
    print("  AUC: {}".format(objective.best_auc))
    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))
        
    print(optuna.importance.get_param_importances(study))

Best trial:
  CV:  0.01693749287842033
  AUC: 0.6597277341538852
  Params: 
    momentum: 0.20723721333453135
OrderedDict([('momentum', 1.0)])


In [54]:
raise

RuntimeError: No active exception to reraise

In [None]:
%%time

PRE_TRAIN = True

if PRE_TRAIN:
    _, _ = learning(
        train[: non_target.shape[0]],
        train_pca[: non_target.shape[0]],
        non_target,
        non_target_drug,
        N_STARTS,
        N_SPLITS,
    )

In [None]:
%%time

oof, predictions = learning(
    train,
    train_pca,
    target,
    target_drug,
    N_STARTS,
    N_SPLITS,
    do_predict=True,
    transfer_learning_base=non_target,
    pseudo_labeling=False,
)

## Cross Validation

In [None]:
initial_weights = [1.0 / N_STARTS for _ in range(N_STARTS)] + [1.0]
y_true = target.values[: non_target.shape[0]]

print(f"===== OOF CV =====")
for key, val in oof.items():
    print(f"OOF Key: {key}, CV: {metric(y_true, val.values[:y_true.shape[0]])}")

oof_by_model = {model: {k: v for k, v in oof.items() if k.startswith(model)} for model in models}
for model, oof_ in oof_by_model.items():
    print(f"\n===== Model {model} CV =====")
    cross_validation(y_true.shape, initial_weights[:-1], y_true, oof_)

print(f"\n===== Overall CV =====")
cross_validation(y_true.shape, initial_weights[:-1], y_true, oof)

optimize = False

if optimize:
    # https://www.kaggle.com/gogo827jz/optimise-blending-weights-with-bonus-0#Bonus-(Lagrange-Multiplier)

    def lagrange_func(params):
        # weights, _lambda = params
        blend_ = blend(y_true.shape, params[:-1], oof)
        return metric(y_true, blend_) - params[-1] * (sum(params[:-1]) - 1)

    grad_l = grad(lagrange_func)

    def lagrange_obj(params):
        # weights, _lambda = params
        d = grad_l(params).tolist()
        return d[:-1] + [sum(params[:-1]) - 1]

    optimized_weights = fsolve(lagrange_obj, initial_weights)
    cross_validation(y_true.shape, optimized_weights[:-1], y_true, oof)

    print(f"Optimized weights: {optimized_weights[:-1]}")
    print(f"Check the sum of all weights: {sum(optimized_weights[:-1])}")

else:
    optimized_weights = initial_weights

In [None]:
predictions_by_model = {model: {k: v for k, v in predictions.items() if k.startswith(model)} for model in models}

blend_by_model = {
    model: pd.DataFrame(blend(ss.shape, initial_weights[:-1], predictions_by_model[model])) for model in models
}

if IN_COLAB:
    pub_test_pseudo_label = pub_ss.drop("sig_id", axis=1)
    pub_test_pseudo_label.columns = range(206)
    blend_by_model["pub_test"] = pub_test_pseudo_label

for a, b in itertools.combinations(blend_by_model.keys(), 2):
    corr = blend_by_model[a].corrwith(blend_by_model[b], axis=1)
    print(f"Prediction correlation between {a} and {b}: {corr.mean()}")

# Pseudo Label

## Preparation

In [None]:
PESEUDO_LABELING = False

if PESEUDO_LABELING:
    # Blend Predictions
    pseudo_label_df = submit_df.copy()
    pseudo_label_df.loc[:, target.columns] = blend(ss.shape, optimized_weights[:-1], predictions)

    # Preprocess Pseudo Label
    pseudo_label_df = pseudo_label_df.loc[test_df["cp_type"] != "ctl_vehicle"].reset_index(drop=True)

    pseudo_label_drug = pd.DataFrame(pseudo_label_df.loc[:, "sig_id"]).merge(drug, on="sig_id", how="left")
    pseudo_label_drug = pseudo_label_drug.fillna("yyyyyyyyy")

    target_drug = target_drug.drop(["fold"], axis=1)

    del pseudo_label_df["sig_id"]

    print(train.shape)
    print(test.shape)

    print(train_pca.shape)
    print(test_pca.shape)

    print(target.shape)
    print(pseudo_label_df.shape)

    print(target_drug.shape)
    print(pseudo_label_drug.shape)

    pseudo_label_df

## Training

In [None]:
if PESEUDO_LABELING:
    oof, predictions = learning(
        pd.concat([train, test], ignore_index=True),
        pd.concat([train_pca, test_pca], ignore_index=True),
        pd.concat([target, pseudo_label_df], ignore_index=True),
        pd.concat([target_drug, pseudo_label_drug], ignore_index=True),
        N_STARTS,
        N_SPLITS,
        do_predict=True,
        transfer_learning_base=target,
        pseudo_labeling=True,
    )

## Cross Validation

In [None]:
if PESEUDO_LABELING:
    for key, val in oof.items():
        print(f"OOF Key: {key}, CV: {metric(y_true, val.values[:y_true.shape[0]])}")

    cross_validation(y_true.shape, initial_weights[:-1], y_true, oof)

# Postprocessing

In [None]:
# Weighted blend
submit_df.loc[:, target.columns] = blend(ss.shape, optimized_weights[:-1], predictions)

In [None]:
# Clipping
# submit_df.loc[:, target.columns] = submit_df.loc[:, target.columns].clip(1e-7, 1 - 1e-7)

In [None]:
submit_df.loc[test_df["cp_type"] == "ctl_vehicle", target.columns] = 0

# Output

In [None]:
submit_df.to_csv("submission.csv", index=False)