In [1]:
import sys
sys.path.append('../script/')
from os.path import exists
import gc
from functools import partial

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import optuna

import utils
import models
import train as trainer

DEVICE = "cuda"
EPOCHS = 2



## Add folds No. for CV

In [2]:
path_fold = "../input/folds/train_folds.csv"
if not exists(path_fold):
    from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
    df = pd.read_csv("../input/lish-moa/train_targets_scored.csv")
    df.loc[:, "kfold"] = -1
    df = df.sample(frac=1).reset_index(drop=True)
    targets = df.drop("sig_id", axis=1).values

    mskf = MultilabelStratifiedKFold(n_splits=5)
    for fold_, (tr_, va_) in enumerate(mskf.split(X=df, y=targets)):
        df.loc[va_, "kfold"] = fold_
    df.to_csv(path_fold, index=False)

## Params for training function `run_training`

In [3]:
fold = 0
# optuna 使わないので適当
params = {
    "num_layers": 3,
    "hidden_size": 16,
    "dropout": 0.3,
    "learning_rate": 1e-3,
}
save_model=True

---
## Prototyping training process from HERE

In [4]:
df = pd.read_csv("../input/folds/train.csv")
with open("../input/folds/targets", "r") as f:
    targets = f.read().split("\n")
with open("../input/folds/features", "r") as f:
    features = f.read().split("\n")

In [5]:
print(f'[Fold No.{fold:>3}]\n')
train_df = df[df.kfold != fold].reset_index(drop=True)
valid_df = df[df.kfold == fold].reset_index(drop=True)

[Fold No.  0]



In [6]:
x_tr = train_df[features].to_numpy()
x_va = valid_df[features].to_numpy()

In [7]:
y_tr = train_df[targets].to_numpy()
y_va = valid_df[targets].to_numpy()

In [8]:
dataset_tr = utils.MoaDataset(x_tr, y_tr)
loader_tr = torch.utils.data.DataLoader(dataset_tr, batch_size=1024, num_workers=2)
dataset_va = utils.MoaDataset(x_va, y_va)
loader_va = torch.utils.data.DataLoader(dataset_va, batch_size=1024, num_workers=2)

In [9]:
model = models.BaseLine(
    num_features=x_tr.shape[1],
    num_targets=y_tr.shape[1],
    params=params
)
model.to(DEVICE)

BaseLine(
  (model): Sequential(
    (0): Linear(in_features=879, out_features=16, bias=False)
    (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=16, out_features=16, bias=False)
    (4): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): Dropout(p=0.3, inplace=False)
    (6): Linear(in_features=16, out_features=16, bias=False)
    (7): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Dropout(p=0.3, inplace=False)
    (9): Linear(in_features=16, out_features=207, bias=True)
  )
)

In [10]:
optimizer = torch.optim.Adam(model.parameters(), lr=params["learning_rate"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, patience=3, threshold=0.00001, mode="min", verbose=True
)
eng = utils.Engine(model, optimizer, device=DEVICE)

In [11]:
def run_training(fold, params, save_model=False):

    df = pd.read_csv("../input/folds/train.csv")
    with open("../input/folds/targets", "r") as f:
        targets = f.read().split("\n")
    with open("../input/folds/features", "r") as f:
        features = f.read().split("\n")

    print(f"\n[Fold No.{fold:>2}]\n")
    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    x_tr = train_df[features].to_numpy()
    x_va = valid_df[features].to_numpy()

    y_tr = train_df[targets].to_numpy()
    y_va = valid_df[targets].to_numpy()

    # TODO: [BEGIN] NN以外の学習を記述
    dataset_tr = utils.MoaDataset(x_tr, y_tr)
    loader_tr = torch.utils.data.DataLoader(dataset_tr, batch_size=1024, num_workers=2)
    dataset_va = utils.MoaDataset(x_va, y_va)
    loader_va = torch.utils.data.DataLoader(dataset_va, batch_size=1024, num_workers=2)
    
    model = models.BaseLine(
        num_features=x_tr.shape[1],
        num_targets=y_tr.shape[1],
        params=params
    )
    model.to(DEVICE)

    optimizer = torch.optim.Adam(model.parameters(), lr=params["learning_rate"])
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, patience=3, threshold=0.00001, mode="min", verbose=True
    )
    eng = utils.Engine(model, optimizer, device=DEVICE)

    # Free RAM space as much as possible before training
    del df, train_df, valid_df, x_tr, x_va, y_tr, y_va
    gc.collect()
    
    loss_best = np.inf
    patience = 10
    patience_cnt = 0
    for ep in range(EPOCHS):
        loss_tr = eng.train(loader_tr)
        loss_va = eng.validate(loader_va)
        scheduler.step(loss_va)
        print(f"epoch:{ep:>3}, train:{loss_tr:>.5}, valid:{loss_va:>.5}")
        
        if loss_va < loss_best:
            loss_best = loss_va
            if save_model:
                pass
        else:
            patience_cnt += 1
        if patience_cnt > patience:
            break
    
    print(f"[Fold No.{fold:>2}]")
    print(f"epoch:{ep:>3}, train:{loss_tr:>.5}, valid:{loss_va:>.5}")

    # TODO: [END] NN以外の学習を記述

    # if save_model:
    #     now = datetime.now()
    #     now = str(now)[5:17].replace(" ", "_").replace(":", "")
    #     filename = f"weight/model{now}.pt"
    #     torch.save(model.model.state_dict(), filename)
    #     print("model saved at:", filename)

    return loss_best

In [12]:
def objective(trial):
    params = {
        "num_layers": trial.suggest_int("num_layers", 1, 7),
        "hidden_size": trial.suggest_int("hidden_size", 16, 2048),
        "dropout": trial.suggest_uniform("dropout", 0.1, 0.8),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-6, 1e-3),
    }
    loss_all = []
    for fold_ in range(5):
        loss_tmp = run_training(fold_, params, save_model=False)
        loss_all.append(loss_tmp)
    return np.mean(loss_all)

In [13]:
is_pruning = True  # TODO: Impl as Param in future

partial_obj = partial(objective)
pruner = optuna.pruners.MedianPruner() if is_pruning else optuna.pruners.NopPruner()
study = optuna.create_study(direction="minimize", pruner=pruner)
study.optimize(partial_obj, n_trials=10) # i want to use "timeout" in practice

print("\n---- ---- ---- ---- ----\nBest trial:")
trial_best = study.best_trial

print(f"Value: {trial_best.value}")
print("Params: ")
best_params = trial_best.params
print(best_params)


[Fold No. 0]

epoch:  0, train:0.74813, valid:0.71099
epoch:  1, train:0.7466, valid:0.71391
[Fold No. 0]
epoch:  1, train:0.7466, valid:0.71391

[Fold No. 1]

epoch:  0, train:0.74549, valid:0.70845
epoch:  1, train:0.74416, valid:0.71114
[Fold No. 1]
epoch:  1, train:0.74416, valid:0.71114

[Fold No. 2]

epoch:  0, train:0.74679, valid:0.71024
epoch:  1, train:0.74554, valid:0.71293
[Fold No. 2]
epoch:  1, train:0.74554, valid:0.71293

[Fold No. 3]

epoch:  0, train:0.74808, valid:0.71028
epoch:  1, train:0.74632, valid:0.71396
[Fold No. 3]
epoch:  1, train:0.74632, valid:0.71396

[Fold No. 4]

epoch:  0, train:0.74822, valid:0.71147
epoch:  1, train:0.74685, valid:0.71453
[Fold No. 4]
epoch:  1, train:0.74685, valid:0.71453


[I 2020-10-04 15:04:21,227] Trial 0 finished with value: 0.7102886772155762 and parameters: {'num_layers': 2, 'hidden_size': 1529, 'dropout': 0.34161654376166556, 'learning_rate': 1.2743274573140588e-06}. Best is trial 0 with value: 0.7102886772155762.



[Fold No. 0]

epoch:  0, train:0.71985, valid:0.70478
epoch:  1, train:0.70747, valid:0.69886
[Fold No. 0]
epoch:  1, train:0.70747, valid:0.69886

[Fold No. 1]

epoch:  0, train:0.72046, valid:0.70449
epoch:  1, train:0.70781, valid:0.69807
[Fold No. 1]
epoch:  1, train:0.70781, valid:0.69807

[Fold No. 2]

epoch:  0, train:0.7201, valid:0.70511
epoch:  1, train:0.70745, valid:0.69858
[Fold No. 2]
epoch:  1, train:0.70745, valid:0.69858

[Fold No. 3]

epoch:  0, train:0.71958, valid:0.70408
epoch:  1, train:0.70692, valid:0.698
[Fold No. 3]
epoch:  1, train:0.70692, valid:0.698

[Fold No. 4]

epoch:  0, train:0.72049, valid:0.70556
epoch:  1, train:0.7079, valid:0.69966
[Fold No. 4]
epoch:  1, train:0.7079, valid:0.69966


[I 2020-10-04 15:05:31,640] Trial 1 finished with value: 0.6986337685585022 and parameters: {'num_layers': 1, 'hidden_size': 1521, 'dropout': 0.1489272533869646, 'learning_rate': 5.2721571424316446e-05}. Best is trial 1 with value: 0.6986337685585022.



[Fold No. 0]

epoch:  0, train:0.77117, valid:0.69256
epoch:  1, train:0.74859, valid:0.68825
[Fold No. 0]
epoch:  1, train:0.74859, valid:0.68825

[Fold No. 1]

epoch:  0, train:0.77073, valid:0.69122
epoch:  1, train:0.74772, valid:0.68778
[Fold No. 1]
epoch:  1, train:0.74772, valid:0.68778

[Fold No. 2]

epoch:  0, train:0.77033, valid:0.69174
epoch:  1, train:0.74635, valid:0.68841
[Fold No. 2]
epoch:  1, train:0.74635, valid:0.68841

[Fold No. 3]

epoch:  0, train:0.77099, valid:0.6919
epoch:  1, train:0.74797, valid:0.68884
[Fold No. 3]
epoch:  1, train:0.74797, valid:0.68884

[Fold No. 4]

epoch:  0, train:0.7703, valid:0.69265
epoch:  1, train:0.74733, valid:0.68812
[Fold No. 4]
epoch:  1, train:0.74733, valid:0.68812


[I 2020-10-04 15:06:41,913] Trial 2 finished with value: 0.688279824256897 and parameters: {'num_layers': 4, 'hidden_size': 1253, 'dropout': 0.6263121146674873, 'learning_rate': 0.00012429219505052152}. Best is trial 2 with value: 0.688279824256897.



[Fold No. 0]

epoch:  0, train:0.77938, valid:0.69243
epoch:  1, train:0.77581, valid:0.69193
[Fold No. 0]
epoch:  1, train:0.77581, valid:0.69193

[Fold No. 1]

epoch:  0, train:0.77969, valid:0.69275
epoch:  1, train:0.77576, valid:0.69206
[Fold No. 1]
epoch:  1, train:0.77576, valid:0.69206

[Fold No. 2]

epoch:  0, train:0.77993, valid:0.693
epoch:  1, train:0.77577, valid:0.69249
[Fold No. 2]
epoch:  1, train:0.77577, valid:0.69249

[Fold No. 3]

epoch:  0, train:0.78108, valid:0.69406
epoch:  1, train:0.77672, valid:0.69343
[Fold No. 3]
epoch:  1, train:0.77672, valid:0.69343

[Fold No. 4]

epoch:  0, train:0.77869, valid:0.69246
epoch:  1, train:0.7751, valid:0.69179
[Fold No. 4]
epoch:  1, train:0.7751, valid:0.69179


[I 2020-10-04 15:07:53,780] Trial 3 finished with value: 0.6923413634300232 and parameters: {'num_layers': 6, 'hidden_size': 1487, 'dropout': 0.6137683996460142, 'learning_rate': 2.012239872677871e-05}. Best is trial 2 with value: 0.688279824256897.



[Fold No. 0]

epoch:  0, train:0.75159, valid:0.69304
epoch:  1, train:0.74212, valid:0.69195
[Fold No. 0]
epoch:  1, train:0.74212, valid:0.69195

[Fold No. 1]

epoch:  0, train:0.75312, valid:0.69412
epoch:  1, train:0.74369, valid:0.69308
[Fold No. 1]
epoch:  1, train:0.74369, valid:0.69308

[Fold No. 2]

epoch:  0, train:0.75255, valid:0.69445
epoch:  1, train:0.74299, valid:0.69376
[Fold No. 2]
epoch:  1, train:0.74299, valid:0.69376

[Fold No. 3]

epoch:  0, train:0.752, valid:0.69373
epoch:  1, train:0.74277, valid:0.69257
[Fold No. 3]
epoch:  1, train:0.74277, valid:0.69257

[Fold No. 4]

epoch:  0, train:0.75099, valid:0.69321
epoch:  1, train:0.74188, valid:0.69246
[Fold No. 4]
epoch:  1, train:0.74188, valid:0.69246


[I 2020-10-04 15:09:05,825] Trial 4 finished with value: 0.6927657365798949 and parameters: {'num_layers': 5, 'hidden_size': 1797, 'dropout': 0.43163043830325076, 'learning_rate': 2.7207315760446225e-05}. Best is trial 2 with value: 0.688279824256897.



[Fold No. 0]

epoch:  0, train:0.73022, valid:0.7145
epoch:  1, train:0.72118, valid:0.71118
[Fold No. 0]
epoch:  1, train:0.72118, valid:0.71118

[Fold No. 1]

epoch:  0, train:0.73014, valid:0.71221
epoch:  1, train:0.72077, valid:0.70989
[Fold No. 1]
epoch:  1, train:0.72077, valid:0.70989

[Fold No. 2]

epoch:  0, train:0.73109, valid:0.7143
epoch:  1, train:0.72192, valid:0.71135
[Fold No. 2]
epoch:  1, train:0.72192, valid:0.71135

[Fold No. 3]

epoch:  0, train:0.73183, valid:0.71535
epoch:  1, train:0.72254, valid:0.71223
[Fold No. 3]
epoch:  1, train:0.72254, valid:0.71223

[Fold No. 4]

epoch:  0, train:0.72961, valid:0.71402
epoch:  1, train:0.72082, valid:0.71084
[Fold No. 4]
epoch:  1, train:0.72082, valid:0.71084


[I 2020-10-04 15:10:16,050] Trial 5 finished with value: 0.7110964179039001 and parameters: {'num_layers': 1, 'hidden_size': 841, 'dropout': 0.17562153870582936, 'learning_rate': 1.9077043494166626e-05}. Best is trial 2 with value: 0.688279824256897.



[Fold No. 0]

epoch:  0, train:0.72733, valid:0.69675
epoch:  1, train:0.71426, valid:0.69511
[Fold No. 0]
epoch:  1, train:0.71426, valid:0.69511

[Fold No. 1]

epoch:  0, train:0.72666, valid:0.69546
epoch:  1, train:0.71352, valid:0.69414
[Fold No. 1]
epoch:  1, train:0.71352, valid:0.69414

[Fold No. 2]

epoch:  0, train:0.72864, valid:0.69786
epoch:  1, train:0.71543, valid:0.69682
[Fold No. 2]
epoch:  1, train:0.71543, valid:0.69682

[Fold No. 3]

epoch:  0, train:0.72767, valid:0.69625
epoch:  1, train:0.71409, valid:0.69505
[Fold No. 3]
epoch:  1, train:0.71409, valid:0.69505

[Fold No. 4]

epoch:  0, train:0.7283, valid:0.69737
epoch:  1, train:0.7155, valid:0.69605
[Fold No. 4]
epoch:  1, train:0.7155, valid:0.69605


[I 2020-10-04 15:11:27,980] Trial 6 finished with value: 0.6954344272613525 and parameters: {'num_layers': 4, 'hidden_size': 1040, 'dropout': 0.1637981163071046, 'learning_rate': 2.4158962418410857e-05}. Best is trial 2 with value: 0.688279824256897.



[Fold No. 0]

epoch:  0, train:0.72825, valid:0.69475
epoch:  1, train:0.71814, valid:0.69598
[Fold No. 0]
epoch:  1, train:0.71814, valid:0.69598

[Fold No. 1]

epoch:  0, train:0.72887, valid:0.69492
epoch:  1, train:0.71851, valid:0.69614
[Fold No. 1]
epoch:  1, train:0.71851, valid:0.69614

[Fold No. 2]

epoch:  0, train:0.72779, valid:0.69425
epoch:  1, train:0.71734, valid:0.69568
[Fold No. 2]
epoch:  1, train:0.71734, valid:0.69568

[Fold No. 3]

epoch:  0, train:0.72832, valid:0.6946
epoch:  1, train:0.71815, valid:0.6962
[Fold No. 3]
epoch:  1, train:0.71815, valid:0.6962

[Fold No. 4]

epoch:  0, train:0.72801, valid:0.6943
epoch:  1, train:0.71776, valid:0.69571
[Fold No. 4]
epoch:  1, train:0.71776, valid:0.69571


[I 2020-10-04 15:12:43,960] Trial 7 finished with value: 0.69456463098526 and parameters: {'num_layers': 7, 'hidden_size': 1742, 'dropout': 0.10869602554920929, 'learning_rate': 7.117593740794063e-06}. Best is trial 2 with value: 0.688279824256897.



[Fold No. 0]

epoch:  0, train:0.81941, valid:0.69416
epoch:  1, train:0.8123, valid:0.69372
[Fold No. 0]
epoch:  1, train:0.8123, valid:0.69372

[Fold No. 1]

epoch:  0, train:0.81889, valid:0.69376
epoch:  1, train:0.81058, valid:0.69301
[Fold No. 1]
epoch:  1, train:0.81058, valid:0.69301

[Fold No. 2]

epoch:  0, train:0.81807, valid:0.69342
epoch:  1, train:0.81115, valid:0.69301
[Fold No. 2]
epoch:  1, train:0.81115, valid:0.69301

[Fold No. 3]

epoch:  0, train:0.81823, valid:0.69396
epoch:  1, train:0.81041, valid:0.69351
[Fold No. 3]
epoch:  1, train:0.81041, valid:0.69351

[Fold No. 4]

epoch:  0, train:0.81679, valid:0.69183
epoch:  1, train:0.80941, valid:0.69143
[Fold No. 4]
epoch:  1, train:0.80941, valid:0.69143


[I 2020-10-04 15:13:56,154] Trial 8 finished with value: 0.6929359364509583 and parameters: {'num_layers': 4, 'hidden_size': 719, 'dropout': 0.7508156278904834, 'learning_rate': 2.9549937724242617e-05}. Best is trial 2 with value: 0.688279824256897.



[Fold No. 0]

epoch:  0, train:0.72218, valid:0.69422
epoch:  1, train:0.70649, valid:0.69164
[Fold No. 0]
epoch:  1, train:0.70649, valid:0.69164

[Fold No. 1]

epoch:  0, train:0.72237, valid:0.69415
epoch:  1, train:0.70677, valid:0.69154
[Fold No. 1]
epoch:  1, train:0.70677, valid:0.69154

[Fold No. 2]

epoch:  0, train:0.72256, valid:0.69515
epoch:  1, train:0.70696, valid:0.69208
[Fold No. 2]
epoch:  1, train:0.70696, valid:0.69208

[Fold No. 3]

epoch:  0, train:0.72187, valid:0.69431
epoch:  1, train:0.70582, valid:0.69136
[Fold No. 3]
epoch:  1, train:0.70582, valid:0.69136

[Fold No. 4]

epoch:  0, train:0.72246, valid:0.69475
epoch:  1, train:0.70685, valid:0.69192
[Fold No. 4]
epoch:  1, train:0.70685, valid:0.69192


[I 2020-10-04 15:15:09,259] Trial 9 finished with value: 0.6917083716392517 and parameters: {'num_layers': 4, 'hidden_size': 1721, 'dropout': 0.17363401146073038, 'learning_rate': 3.446642745757357e-05}. Best is trial 2 with value: 0.688279824256897.



---- ---- ---- ---- ----
Best trial:
Value: 0.688279824256897
Params: 
{'num_layers': 4, 'hidden_size': 1253, 'dropout': 0.6263121146674873, 'learning_rate': 0.00012429219505052152}


---

df_te = pd.read_csv("../input/lish-moa/test_features.csv")
df_te = utils.process_data(df_te)
x_te = df_te[features].to_numpy()
dataset_te = utils.TestMoaDataset(dataset=x_te)
loader_te = torch.utils.data.DataLoader(
    dataset_te, batch_size=1024, num_workers=4, shuffle=False,
)

predictions = np.zeros((x_te.shape[0], 206))
inference_model = model.model

inference_model.eval()
for ind, batch in enumerate(loader_te):
    p = torch.sigmoid(inference_model(batch["x"])).detach().cpu().numpy()
    predictions[ind * 1024 : (ind + 1) * 1024] = p[:,:-1]

test_features1 = pd.read_csv("../input/lish-moa/test_features.csv")
s = pd.DataFrame({"sig_id": test_features1["sig_id"].values})

for col in folds.columns[1:-2].tolist():
    s[col] = 0
s.loc[:, folds.columns[1:-2]] = predictions

s.loc[
    s["sig_id"].isin(test_features1.loc[test_features1["cp_type"] == "ctl_vehicle", "sig_id"]),
    folds.columns[1:-2],
] = 0

s.to_csv("../submission/submission.csv", index=False)
torch.save(model.model.state_dict(), "../weight/model.pt")