In [19]:
"""
[V6]
Blend 6 Models:
* kibuna-nn-hs-1024-last-train (aka. 2stage-NN, LB: 0.01822)
* fork-of-2heads-looper-super-puper-markpeng-infer (LB: 0.1836)
* deepinsight-efficientnet-v7-b3-infer (LB: 0.01850)
* 503-203-tabnet-with-nonscored-features (LB: 0.01836)
* deepinsight_resnest_lightning_v2_infer (LB: 0.01854)

"""

kernel_mode = False

import os
import numpy as np
import pandas as pd
import time
import random
import math
import datetime
import glob

from numba import njit
from scipy.optimize import minimize, fsolve

import optuna

import warnings
warnings.filterwarnings('ignore')

import gc
gc.enable()

rand_seed = 1120

search_mode = True
run_submit_script = False

# method = "CV"
method = "scipy_per_target"
# method = "scipy"
# method = "optuna"
study_name = "moa_blend_team_v6"
# n_trials = 500
# n_trials = 3000
n_trials = 5000

In [20]:
# !mkdir -p /root/.cache/torch/hub/checkpoints/
# !cp ../input/gen-efficientnet-pretrained/tf_efficientnet_*.pth /root/.cache/torch/hub/checkpoints/
# !cp ../input/deepinsight-resnest-v1-resnest50/*.pth /root/.cache/torch/hub/checkpoints/
# !cp ../input/deepinsight-resnest-v2-resnest50-output/*.pth /root/.cache/torch/hub/checkpoints/
# !ls -la /root/.cache/torch/hub/checkpoints/

In [21]:
# !cp ../input/kaggle-moa-team/scripts/* .
# !ls -la

In [22]:
# !pip install --no-index --find-links /kaggle/input/pytorchtabnet/pytorch_tabnet-2.0.0-py3-none-any.whl pytorch-tabnet

In [23]:
dataset_folder = "../input/lish-moa" if kernel_mode else "/workspace/Kaggle/MoA/"

# Add your model inference script here
# Tuple Format: (script, oof_filename, output_filename, weight)
model_list = [
    ("2stageNN_with_ns_oldcv.py" if kernel_mode else "../../Github/kaggle_moa_team/scripts/2stageNN_with_ns_oldcv.py",
     "../../Github/kaggle_moa_team/oof/oof_2stageNN_ns_oldcv.npy",
     "submission_2stageNN_with_ns_oldcv_0.01822.csv",
     0.30923325055652684),
    
    ("script_simpleNN_oldcv.py" if kernel_mode else "../../Github/kaggle_moa_team/scripts/script_simpleNN_oldcv.py",
     "../../Github/kaggle_moa_team/oof/oof_script_simpleNN_oldcv.npy",
     "submission_script_simpleNN_oldcv_0.01836.csv",
     0.09831493504786226),
    
    ("fork-of-2heads-looper-super-puper-markpeng-infer.py" if kernel_mode else "../../Github/kaggle_moa_team/scripts/fork-of-2heads-looper-super-puper-markpeng-infer.py",
     "../../Github/kaggle_moa_team/oof/oof_fork-of-2heads-looper-super-puper-markpeng.npy",
     "submission_2heads-looper-super-puper_0.01836.csv",
     0.018966959973949222),
    
    ("deepinsight_efficientnet_lightning_v7_b3_infer.py" if kernel_mode else "../../Github/kaggle_moa_team/scripts/deepinsight_efficientnet_lightning_v7_b3_infer.py",
     "../../Github/kaggle_moa_team/oof/oof_deepinsight_efficientnet_lightning_v7_b3_0.01850.npy",
     "submission_effnet_v7_b3_0.01850.csv",
     0.19863369862866234),
    
    ("script_tabnet_ns_oldcv.py" if kernel_mode else "../../Github/kaggle_moa_team/scripts/script_tabnet_ns_oldcv.py",
     "../../Github/kaggle_moa_team/oof/oof_tabnet_ns_oldcv.npy",
     "submission_tabnet_ns_oldcv_0.01836.csv",
     0.0013224625996093413),
    
    ("deepinsight_resnest_lightning_v2_infer.py" if kernel_mode else "../../Github/kaggle_moa_team/scripts/deepinsight_resnest_lightning_v2_infer.py",
     "../../Github/kaggle_moa_team/oof/oof_deepinsight_ResNeSt_v2_resnest50_0.01854.npy",
     "submission_resnest_v2_0.01854.csv",
     0.3728865483320761),
]

model_path = "." if kernel_mode else dataset_folder

In [24]:
train_features = pd.read_csv(f"{dataset_folder}/train_features.csv",
                             engine='c')
train_labels = pd.read_csv(f'{dataset_folder}/train_targets_scored.csv',
                           engine='c')
train_classes = [c for c in train_labels.columns if c != "sig_id"]

non_control_group_rows = train_features["cp_type"] == "trt_cp"
non_control_group_train_labels = train_labels.loc[
    non_control_group_rows, :].copy().reset_index(drop=True)

submission = pd.read_csv(f'{dataset_folder}/sample_submission.csv')
submission.iloc[:, 1:] = 0

In [25]:
def mean_logloss(y_pred, y_true):
    logloss = (1 - y_true) * np.log(1 - y_pred +
                                    1e-15) + y_true * np.log(y_pred + 1e-15)
    return np.nanmean(-logloss)

In [26]:
# Reference: https://www.kaggle.com/gogo827jz/optimise-blending-weights-with-bonus-0/notebook
# CPMP's logloss from https://www.kaggle.com/c/lish-moa/discussion/183010
def log_loss_numpy(y_pred, y_true):
    y_true_ravel = np.asarray(y_true).ravel()
    y_pred = np.asarray(y_pred).ravel()
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    loss = np.where(y_true_ravel == 1, -np.log(y_pred),
                    -np.log(1 - y_pred))
    return loss.mean()

def func_numpy_metric(weights):
    oof_blend = np.tensordot(weights, all_oof, axes=((0), (0)))
    return log_loss_numpy(oof_blend, y_true)

@njit
def grad_func_jit(weights):
    oof_clip = np.minimum(1 - 1e-15, np.maximum(all_oof, 1e-15))
    gradients = np.zeros(all_oof.shape[0])
    for i in range(all_oof.shape[0]):
        a, b, c = y_true, oof_clip[i], np.zeros(
            (all_oof.shape[1], all_oof.shape[2]))
        for j in range(oof.shape[0]):
            if j != i:
                c += weights[j] * oof_clip[j]
        gradients[i] = -np.mean(
            (-a * b + (b**2) * weights[i] + b * c) /
            ((b**2) *
             (weights[i]**2) + 2 * b * c * weights[i] - b * weights[i] +
             (c**2) - c))
    return gradients

## Bayesian Optimization and Sequential Least Squares Programming (SLSQP)
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.samplers.TPESampler.html#optuna.samplers.TPESampler

https://docs.scipy.org/doc/scipy/reference/optimize.minimize-slsqp.html

In [27]:
def run_inference_scripts(submission, weights=None):
    for i, (script, oof_filename, output_filename, weight) in enumerate(model_list):
        print(f"Generating submission file from {script} ......")
        infer_start = time.time()
        !python {model_path}/{script}
        infer_elapsed = time.time() - infer_start
        print(f"Time spent on inference: {infer_elapsed/60:.2f} minutes.")

        model_submit = pd.read_csv(output_filename, engine='c')
        print(model_submit.head(5))
        print(model_submit.shape)
        if weights is None:
            print(f"Blending {script} with weight: {weight} ......")
            submission.iloc[:, 1:] += weight * model_submit.iloc[:, 1:]
        else:
            print(f"Blending {script} with weight: {weights[i]} ......")
            submission.iloc[:, 1:] += weights[i] * model_submit.iloc[:, 1:]

    return submission

In [30]:
total_start = time.time()

if not search_mode:
    submission = run_inference_scripts(submission)
elif search_mode and method == "CV":
    y_true = non_control_group_train_labels[train_classes].values

    all_oof = np.zeros(
        (len(model_list), non_control_group_train_labels.shape[0], 206))
    blend_oof = np.zeros((non_control_group_train_labels.shape[0], 206))
    print(all_oof.shape)
    for i, (script, oof_filename, output_filename,
            weight) in enumerate(model_list):
        print(f"Loading OOF from {oof_filename} ......")
        oof = np.load(f"{dataset_folder}/{oof_filename}")

        if oof.shape[0] == 23814:
            oof = oof[non_control_group_rows, :]

        all_oof[i, :, :] = oof
        blend_oof += oof * weight

        oof_loss = mean_logloss(oof, y_true)
        print(f"OOF Validation Loss of {script}: {oof_loss:.6f}\n")

    blend_oof_loss = mean_logloss(blend_oof, y_true)
    print(f"Blend OOF Validation Loss: {blend_oof_loss:.6f}\n")

elif search_mode and method == "optuna":
    print("[Optuna]")
    ## Search Best Blend Weights by Optuna ##
    model_oofs = []

    for i, (script, oof_filename, output_filename,
            weight) in enumerate(model_list):
        print(f"Loading OOF from {oof_filename} ......")
        oof = np.load(f"{dataset_folder}/{oof_filename}")

        if oof.shape[0] == 23814:
            oof = oof[non_control_group_rows, :]

        oof_loss = mean_logloss(
            oof, non_control_group_train_labels[train_classes].values)
        print(f"OOF Validation Loss of {script}: {oof_loss:.6f}\n")
        model_oofs.append(oof)

    def objective(trial):
        weights = []
        for i in range(len(model_list)):
            weights.append(trial.suggest_float(f"w{i}", 0, 1.0))

        blend = np.zeros(model_oofs[0].shape)
        for i in range(len(model_list)):
            blend += weights[i] * model_oofs[i]
        blend = np.clip(blend, 0, 1.0)

        loss = mean_logloss(
            blend, non_control_group_train_labels[train_classes].values)
        return loss

    pruner = optuna.pruners.MedianPruner(
        n_startup_trials=5,
        n_warmup_steps=0,
        interval_steps=1,
    )
    sampler = optuna.samplers.TPESampler(seed=rand_seed)
    study = optuna.create_study(direction="minimize",
                                pruner=pruner,
                                sampler=sampler,
                                study_name=study_name,
                                storage=f'sqlite:///{study_name}.db',
                                load_if_exists=True)

    study.optimize(objective,
                   n_trials=n_trials,
                   timeout=None,
                   gc_after_trial=True,
                   n_jobs=-1)

    trial = study.best_trial

    if run_submit_script:
        optimal_weights = []
        for i, (script, oof_filename, output_filename,
                _) in enumerate(model_list):
            optimal_weights.append(trial.params[f"w{i}"])
        submission = run_inference_scripts(submission, weights=optimal_weights)

    print("\n[Optuna]")
    print("Number of finished trials: {}".format(len(study.trials)))
    print("Best trial:")
    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

elif search_mode and method == "scipy":
    print("[Scipy SLSQP]")
    # Optimise Blending Weights with Bonus
    # https://www.kaggle.com/gogo827jz/optimise-blending-weights-with-bonus-0/notebook
    model_oofs = []
    y_true = non_control_group_train_labels[train_classes].values

    all_oof = np.zeros(
        (len(model_list), non_control_group_train_labels.shape[0], 206))
    print(all_oof.shape)
    for i, (script, oof_filename, output_filename,
            weight) in enumerate(model_list):
        print(f"Loading OOF from {oof_filename} ......")
        oof = np.load(f"{dataset_folder}/{oof_filename}")

        if oof.shape[0] == 23814:
            oof = oof[non_control_group_rows, :]

        all_oof[i, :, :] = oof

        oof_loss = mean_logloss(oof, y_true)
        print(f"OOF Validation Loss of {script}: {oof_loss:.6f}\n")
        model_oofs.append(oof)

    tol = 1e-10
    init_guess = [1 / all_oof.shape[0]] * all_oof.shape[0]
    bnds = [(0, 1) for _ in range(all_oof.shape[0])]
    cons = {
        'type': 'eq',
        'fun': lambda x: np.sum(x) - 1,
        'jac': lambda x: [1] * len(x)
    }

    print('Inital Blend OOF:', func_numpy_metric(init_guess))

    start_time = time.time()
    res_scipy = minimize(
        fun=func_numpy_metric,
        x0=init_guess,
        method='SLSQP',
        # jac=grad_func_jit,  # grad_func
        bounds=bnds,
        constraints=cons,
        tol=tol)
    print("\n[Scipy SLSQP]")
    print(
        f'[{str(datetime.timedelta(seconds = time.time() - start_time))[2:7]}] Optimised Blend OOF:',
        res_scipy.fun)
    print(f'Optimised Weights: {res_scipy.x}\n')

    if run_submit_script:
        submission = run_inference_scripts(submission, weights=res_scipy.x)

# Target-wise Weight Optimization #

elif search_mode and method == "scipy_per_target":
    print("[Scipy SLSQP]")
    # Optimise Blending Weights with Bonus
    # https://www.kaggle.com/gogo827jz/optimise-blending-weights-with-bonus-0/notebook
    model_oofs = []
    y_true = non_control_group_train_labels[train_classes].values

    all_oof = np.zeros(
        (len(model_list), non_control_group_train_labels.shape[0], 206))
    print(all_oof.shape)
    for i, (script, oof_filename, output_filename,
            weight) in enumerate(model_list):
        print(f"Loading OOF from {oof_filename} ......")
        oof = np.load(f"{dataset_folder}/{oof_filename}")

        if oof.shape[0] == 23814:
            oof = oof[non_control_group_rows, :]

        all_oof[i, :, :] = oof

        oof_loss = mean_logloss(oof, y_true)
        print(f"OOF Validation Loss of {script}: {oof_loss:.6f}\n")
        model_oofs.append(oof)

    print("\n[Scipy SLSQP Per Target]")
    optimized_target_weights = []
    for i, target in enumerate(train_classes):
        tol = 1e-10
        init_guess = [1 / all_oof.shape[0]] * all_oof.shape[0]
        bnds = [(0, 1) for _ in range(all_oof.shape[0])]
        cons = {
            'type': 'eq',
            'fun': lambda x: np.sum(x) - 1,
            'jac': lambda x: [1] * len(x)
        }

        def func_numpy_metric_targes(weights):
            oof_blend = np.tensordot(weights,
                                     all_oof[:, :, i],
                                     axes=((0), (0)))
            return log_loss_numpy(oof_blend, y_true[:, i])

        start_time = time.time()
        res_scipy = minimize(
            fun=func_numpy_metric_targes,
            x0=init_guess,
            method='SLSQP',
            # jac=grad_func_jit,  # grad_func
            bounds=bnds,
            constraints=cons,
            tol=tol)

        print(
            f'[{str(datetime.timedelta(seconds = time.time() - start_time))[2:7]}] ' + \
            f'Optimised Blend OOF for {target}:', res_scipy.fun)
        print(f'Optimised Weights for {target}: {res_scipy.x}\n')
        optimized_target_weights.append(res_scipy.x)

    if run_submit_script:
        # TODO: for 206 target weight
        submission = run_inference_scripts(
            submission, target_weights=optimized_target_weights)

[Scipy SLSQP]
(6, 21948, 206)
Loading OOF from ../../Github/kaggle_moa_team/oof/oof_2stageNN_ns_oldcv.npy ......
OOF Validation Loss of ../../Github/kaggle_moa_team/scripts/2stageNN_with_ns_oldcv.py: 0.015606

Loading OOF from ../../Github/kaggle_moa_team/oof/oof_script_simpleNN_oldcv.npy ......
OOF Validation Loss of ../../Github/kaggle_moa_team/scripts/script_simpleNN_oldcv.py: 0.015846

Loading OOF from ../../Github/kaggle_moa_team/oof/oof_fork-of-2heads-looper-super-puper-markpeng.npy ......
OOF Validation Loss of ../../Github/kaggle_moa_team/scripts/fork-of-2heads-looper-super-puper-markpeng-infer.py: 0.015887

Loading OOF from ../../Github/kaggle_moa_team/oof/oof_deepinsight_efficientnet_lightning_v7_b3_0.01850.npy ......
OOF Validation Loss of ../../Github/kaggle_moa_team/scripts/deepinsight_efficientnet_lightning_v7_b3_infer.py: 0.016016

Loading OOF from ../../Github/kaggle_moa_team/oof/oof_tabnet_ns_oldcv.npy ......
OOF Validation Loss of ../../Github/kaggle_moa_team/scripts/

[00:00] Optimised Blend OOF for atm_kinase_inhibitor: 0.0017682635044442502
Optimised Weights for atm_kinase_inhibitor: [0.00000000e+00 1.05918655e-17 4.86773969e-17 2.74791238e-02
 6.60195758e-18 9.72520876e-01]

[00:00] Optimised Blend OOF for atp-sensitive_potassium_channel_antagonist: 0.0008503033082407636
Optimised Weights for atp-sensitive_potassium_channel_antagonist: [0.00000000e+00 0.00000000e+00 6.63128995e-16 0.00000000e+00
 4.97342172e-16 1.00000000e+00]

[00:00] Optimised Blend OOF for atp_synthase_inhibitor: 0.0010510450652743153
Optimised Weights for atp_synthase_inhibitor: [1.76518325e-16 0.00000000e+00 0.00000000e+00 6.41572413e-01
 0.00000000e+00 3.58427587e-01]

[00:00] Optimised Blend OOF for atpase_inhibitor: 0.019703165381132016
Optimised Weights for atpase_inhibitor: [5.81854759e-01 1.50053581e-16 0.00000000e+00 1.81340459e-01
 0.00000000e+00 2.36804782e-01]

[00:00] Optimised Blend OOF for atr_kinase_inhibitor: 0.0026704853053802803
Optimised Weights for atr_kin

[00:00] Optimised Blend OOF for cyclooxygenase_inhibitor: 0.08759610917786487
Optimised Weights for cyclooxygenase_inhibitor: [1.28109342e-01 1.63490346e-01 2.08464754e-01 3.46944695e-18
 3.04311683e-01 1.95623875e-01]

[00:00] Optimised Blend OOF for cytochrome_p450_inhibitor: 0.027580934006906664
Optimised Weights for cytochrome_p450_inhibitor: [2.31911982e-01 1.99493200e-17 2.44008535e-01 1.99238739e-01
 0.00000000e+00 3.24840745e-01]

[00:00] Optimised Blend OOF for dihydrofolate_reductase_inhibitor: 0.007696847200001887
Optimised Weights for dihydrofolate_reductase_inhibitor: [7.80625564e-17 1.00000000e+00 1.23599048e-16 1.22298005e-16
 0.00000000e+00 0.00000000e+00]

[00:00] Optimised Blend OOF for dipeptidyl_peptidase_inhibitor: 0.008225976276457696
Optimised Weights for dipeptidyl_peptidase_inhibitor: [1.57598148e-01 1.08420217e-16 3.06829215e-17 5.73443513e-01
 0.00000000e+00 2.68958339e-01]

[00:00] Optimised Blend OOF for diuretic: 0.0025441975164058675
Optimised Weights for

[00:00] Optimised Blend OOF for hsp_inhibitor: 0.0068539561188335546
Optimised Weights for hsp_inhibitor: [0.38310841 0.         0.08253734 0.12396833 0.         0.41038592]

[00:00] Optimised Blend OOF for igf-1_inhibitor: 0.007319985313307443
Optimised Weights for igf-1_inhibitor: [4.40573909e-01 1.09146626e-01 5.14996032e-19 0.00000000e+00
 0.00000000e+00 4.50279464e-01]

[00:00] Optimised Blend OOF for ikk_inhibitor: 0.005933882368818719
Optimised Weights for ikk_inhibitor: [0.16139846 0.00624026 0.         0.15747458 0.         0.6748867 ]

[00:00] Optimised Blend OOF for imidazoline_receptor_agonist: 0.010216521618950418
Optimised Weights for imidazoline_receptor_agonist: [0.         0.         0.70930764 0.22474144 0.         0.06595092]

[00:00] Optimised Blend OOF for immunosuppressant: 0.016878689523396627
Optimised Weights for immunosuppressant: [2.42312926e-01 0.00000000e+00 9.51387406e-18 1.24673041e-01
 4.09286320e-18 6.33014032e-01]

[00:00] Optimised Blend OOF for insul

[00:00] Optimised Blend OOF for pdgfr_inhibitor: 0.01394240029462277
Optimised Weights for pdgfr_inhibitor: [2.13750520e-01 1.22819777e-17 0.00000000e+00 2.36297196e-01
 4.92541257e-02 5.00698158e-01]

[00:00] Optimised Blend OOF for pdk_inhibitor: 0.0056418685807188475
Optimised Weights for pdk_inhibitor: [1.53147794e-17 1.80362925e-01 0.00000000e+00 1.66445750e-01
 0.00000000e+00 6.53191326e-01]

[00:00] Optimised Blend OOF for phosphodiesterase_inhibitor: 0.05871007092995867
Optimised Weights for phosphodiesterase_inhibitor: [3.11086550e-01 7.89604113e-18 4.63176783e-01 1.39932660e-01
 1.06488982e-17 8.58040059e-02]

[00:00] Optimised Blend OOF for phospholipase_inhibitor: 0.00828775120289737
Optimised Weights for phospholipase_inhibitor: [0.00000000e+00 1.68536146e-17 6.78962295e-17 4.89231517e-01
 4.92386377e-18 5.10768483e-01]

[00:00] Optimised Blend OOF for pi3k_inhibitor: 0.02146783357007677
Optimised Weights for pi3k_inhibitor: [3.65623600e-01 2.35020979e-01 4.75219365e-17 1.

[00:00] Optimised Blend OOF for tachykinin_antagonist: 0.01753260171749754
Optimised Weights for tachykinin_antagonist: [8.18853814e-18 0.00000000e+00 4.01199891e-01 3.80535563e-02
 0.00000000e+00 5.60746553e-01]

[00:00] Optimised Blend OOF for tgf-beta_receptor_inhibitor: 0.0020715550127265426
Optimised Weights for tgf-beta_receptor_inhibitor: [0.00000000e+00 0.00000000e+00 0.00000000e+00 4.33794588e-01
 2.58989482e-17 5.66205412e-01]

[00:00] Optimised Blend OOF for thrombin_inhibitor: 0.006142918090426249
Optimised Weights for thrombin_inhibitor: [0.00000000e+00 0.00000000e+00 9.69926713e-18 6.91505605e-04
 3.62991203e-18 9.99308494e-01]

[00:00] Optimised Blend OOF for thymidylate_synthase_inhibitor: 0.009663447718876355
Optimised Weights for thymidylate_synthase_inhibitor: [0.00000000e+00 6.58119490e-01 6.43920643e-17 9.50356205e-02
 4.17618982e-17 2.46844889e-01]

[00:00] Optimised Blend OOF for tlr_agonist: 0.010139038630606443
Optimised Weights for tlr_agonist: [2.52126482e-17

In [31]:
total_elapsed = time.time() - total_start
print(f"Total time spent: {total_elapsed/60:.2f} minutes.")

Total time spent: 1.76 minutes.


In [34]:
len(optimized_target_weights)

array([7.32288257e-02, 9.43255890e-17, 3.81639165e-17, 9.26771174e-01,
       0.00000000e+00, 3.93565389e-17])

In [35]:
blend_targets_oof = np.zeros((non_control_group_train_labels.shape[0], 206))
for i, (script, oof_filename, output_filename,
        weight) in enumerate(model_list):
    print(f"Loading OOF from {oof_filename} ......")
    oof = np.load(f"{dataset_folder}/{oof_filename}")

    if oof.shape[0] == 23814:
        oof = oof[non_control_group_rows, :]

    for j in range(206):
        blend_targets_oof[:, j] += oof[:, j] * optimized_target_weights[j][i]

    oof_loss = mean_logloss(oof, y_true)
    print(f"OOF Validation Loss of {script}: {oof_loss:.6f}\n")

blend_targets_oof_loss = mean_logloss(blend_targets_oof, y_true)
print(f"Blend Target-Wise OOF Validation Loss: {blend_targets_oof_loss:.6f}\n")

Loading OOF from ../../Github/kaggle_moa_team/oof/oof_2stageNN_ns_oldcv.npy ......
OOF Validation Loss of ../../Github/kaggle_moa_team/scripts/2stageNN_with_ns_oldcv.py: 0.015606

Loading OOF from ../../Github/kaggle_moa_team/oof/oof_script_simpleNN_oldcv.npy ......
OOF Validation Loss of ../../Github/kaggle_moa_team/scripts/script_simpleNN_oldcv.py: 0.015846

Loading OOF from ../../Github/kaggle_moa_team/oof/oof_fork-of-2heads-looper-super-puper-markpeng.npy ......
OOF Validation Loss of ../../Github/kaggle_moa_team/scripts/fork-of-2heads-looper-super-puper-markpeng-infer.py: 0.015887

Loading OOF from ../../Github/kaggle_moa_team/oof/oof_deepinsight_efficientnet_lightning_v7_b3_0.01850.npy ......
OOF Validation Loss of ../../Github/kaggle_moa_team/scripts/deepinsight_efficientnet_lightning_v7_b3_infer.py: 0.016016

Loading OOF from ../../Github/kaggle_moa_team/oof/oof_tabnet_ns_oldcv.npy ......
OOF Validation Loss of ../../Github/kaggle_moa_team/scripts/script_tabnet_ns_oldcv.py: 0.0

In [36]:
# OOF scores per target
target_oof_losses = []
for i, target in enumerate(train_classes):
    print(target)
    # print(y_true[:, i])

    oof_loss = mean_logloss(blend_targets_oof[:, i], y_true[:, i])
    target_oof_losses.append(oof_loss)
    print(f"Blend OOF Validation Loss of {target}: {oof_loss:.6f}\n")

5-alpha_reductase_inhibitor
Blend OOF Validation Loss of 5-alpha_reductase_inhibitor: 0.005349

11-beta-hsd1_inhibitor
Blend OOF Validation Loss of 11-beta-hsd1_inhibitor: 0.006129

acat_inhibitor
Blend OOF Validation Loss of acat_inhibitor: 0.007892

acetylcholine_receptor_agonist
Blend OOF Validation Loss of acetylcholine_receptor_agonist: 0.045990

acetylcholine_receptor_antagonist
Blend OOF Validation Loss of acetylcholine_receptor_antagonist: 0.067551

acetylcholinesterase_inhibitor
Blend OOF Validation Loss of acetylcholinesterase_inhibitor: 0.021333

adenosine_receptor_agonist
Blend OOF Validation Loss of adenosine_receptor_agonist: 0.015513

adenosine_receptor_antagonist
Blend OOF Validation Loss of adenosine_receptor_antagonist: 0.025119

adenylyl_cyclase_activator
Blend OOF Validation Loss of adenylyl_cyclase_activator: 0.001430

adrenergic_receptor_agonist
Blend OOF Validation Loss of adrenergic_receptor_agonist: 0.051788

adrenergic_receptor_antagonist
Blend OOF Validation 

Blend OOF Validation Loss of ubiquitin_specific_protease_inhibitor: 0.002578

vegfr_inhibitor
Blend OOF Validation Loss of vegfr_inhibitor: 0.020177

vitamin_b
Blend OOF Validation Loss of vitamin_b: 0.008823

vitamin_d_receptor_agonist
Blend OOF Validation Loss of vitamin_d_receptor_agonist: 0.004246

wnt_inhibitor
Blend OOF Validation Loss of wnt_inhibitor: 0.009635



In [37]:
target_loss_df = pd.DataFrame(data={
    "target": train_classes,
    "oof_logloss": target_oof_losses
},
                              columns=["target", "oof_logloss"]).sort_values(
                                  by="oof_logloss",
                                  ascending=False).reset_index(drop=True)
target_loss_df

Unnamed: 0,target,oof_logloss
0,cyclooxygenase_inhibitor,0.087596
1,dopamine_receptor_antagonist,0.083898
2,serotonin_receptor_antagonist,0.082275
3,glutamate_receptor_antagonist,0.080252
4,adrenergic_receptor_antagonist,0.078435
...,...,...
201,proteasome_inhibitor,0.001129
202,focal_adhesion_kinase_inhibitor,0.001095
203,atp_synthase_inhibitor,0.001051
204,erbb2_inhibitor,0.000858


In [12]:
# [V6]
# [Optuna]
# Number of finished trials: 5000
# Best trial:
#   Value: 0.015173437622007157
#   Params: 
#     w0: 0.30923325055652684
#     w1: 0.09831493504786226
#     w2: 0.018966959973949222
#     w3: 0.19863369862866234
#     w4: 0.0013224625996093413
#     w5: 0.3728865483320761

# [Scipy SLSQP]
# [00:36] Optimised Blend OOF: 0.015172005464591968
# Optimised Weights: [3.20472642e-01 9.01191588e-02 1.78893358e-18 2.20448482e-01
#  3.27971157e-18 3.68959717e-01]

In [13]:
# [V5]
# Number of finished trials: 3000
# Best trial:
#   Value: 0.015344701181290615
#   Params: 
#     w0: 0.5141433844379889
#     w1: 0.11747776562133813
#     w2: 0.3668324643717302

# [00:14] Optimised Blend OOF: 0.015344695215068541
# Optimised Weights: [0.51922623 0.11292509 0.36784869]

In [14]:
# [V4]
# [Optuna]
# Number of finished trials: 3000
# Best trial:
#   Value: 0.015331901615194453
#   Params: 
#     w0: 0.4505928450756189
#     w1: 0.13010257032841785
#     w2: 0.06308933354044946
#     w3: 0.35639153615958885
#
# [Scipy]
# [00:23] Optimised Blend OOF: 0.015331777381591449
# Optimised Weights: [0.44090106 0.14508641 0.05945655 0.35455598]

In [15]:
# [V3]
# improving-mark-s-2-heads-model-infer
# Number of finished trials: 3000
# Best trial:
#   Value: 0.01515466145873492
#   Params: 
#     w0: 0.0002980690037490555
#     w1: 0.29771381784976886
#     w2: 0.1569191862042946
#     w3: 0.18156875605872544
#     w4: 0.36371774630338105

In [16]:
# [V3]
# fork-of-2heads-looper-super-puper-markpeng-infer
# Number of finished trials: 3000
# Best trial:
#   Value: 0.015170138066049686
#   Params: 
#     w0: 0.00019903389488299251
#     w1: 0.3853752127955825
#     w2: 0.015968332256452233
#     w3: 0.22945916769823432
#     w4: 0.3711290150522236

In [17]:
if run_submit_script:
    print(submission.shape)
    print(submission)
    submission.to_csv('submission.csv', index=False)

## EOF

In [18]:
if kernel_mode:
    !rm ./*.py
    !ls -la