In [1]:
import gc
import sys
import glob
import logging

import optuna
import numpy as np
import pandas as pd
from pathlib import Path
import torch

sys.path.append('../src')
import const
import factory
from utils import DataHandler, seed_everything, reduce_mem_usage
from trainer import NNTrainer

import warnings
warnings.filterwarnings('ignore')

In [2]:
dh = DataHandler()

In [3]:
cfg = dh.load('../configs/exp/mlp_012.yml')
device = "cuda" if torch.cuda.is_available() else "cpu"
run_name = 'optuna_tabnet'

In [4]:
features_params = dh.load(f'../configs/feature/{cfg.data.features.name}.yml')
features = features_params.features

In [5]:
logger_path = Path(f'../logs/{run_name}')

seed_everything(cfg.common.seed)

logger_path.mkdir(exist_ok=True)
logging.basicConfig(filename=logger_path / 'train.log', level=logging.DEBUG)

dh.save(logger_path / 'config.yml', cfg)

In [6]:
train_x = dh.load('../data/team/X_tra_wo_lec_20M.feather').iloc[:2_000_000]
val_x = dh.load('../data/team/X_val_wo_lec.feather').iloc[:250_000]

train_x['is_val'] = 0
val_x['is_val'] = 1

train_x = pd.concat([train_x, val_x], axis=0, sort=False, ignore_index=True)
train_y = train_x[const.TARGET_COLS[0]]

use_row_id = train_x['row_id'].values
val_idx = train_x[train_x['is_val'] == 1].index
drop_cols = set(train_x.columns) - set(features + const.TARGET_COLS)
train_x = train_x.drop(drop_cols, axis=1)

In [7]:
add_df = pd.DataFrame(index=train_x.index)

additional_cols = set(features) - set(train_x.columns)
for col in additional_cols:
    feat_df = pd.read_feather(f'../features/{col}_train.feather')
    add_df[col] = feat_df.loc[use_row_id, col].values

add_df = reduce_mem_usage(add_df)
train_x = pd.concat([train_x, add_df], axis=1)

del add_df; gc.collect()

11

In [8]:
for col in train_x.columns:
    if col not in cfg.data.features.embedding_cols:
        if col != const.TARGET_COLS[0]:
            inf_idx = train_x[train_x[col] == np.inf].index.values

            if len(inf_idx) > 0:
                train_x.loc[inf_idx, col] = np.nan
            null_count = train_x[col].isnull().sum()

            if null_count > 0:
                mean_ = train_x[col].mean()
                train_x[col] = train_x[col].fillna(mean_)

            train_x[col] = (train_x[col] - train_x[col].mean()) / train_x[col].std()

In [9]:
fold_df = pd.DataFrame(index=range(len(train_x)))
fold_df['fold_0'] = 0
fold_df.loc[val_idx, 'fold_0'] += 1

In [10]:
cfg.model.epochs = 10
# cfg.data.train.loader.batch_size = 2048

In [11]:
def objective(trial):
    hidden1 = int(trial.suggest_loguniform('hidden1', 128, 1024))
    hidden2 = int(trial.suggest_loguniform('hidden2', 128, 1024))
    e_dim = int(trial.suggest_loguniform('e_dim', 32, 128))
    e_fc_dim = int(trial.suggest_loguniform('e_fc_dim', 2, 128))

#     dropout1 = trial.suggest_loguniform('dropout1', 1e-3, 0.2)
#     dropout2 = trial.suggest_loguniform('dropout2', 1e-3, 0.2)
#     dropout3 = trial.suggest_loguniform('dropout3', 1e-3, 0.2)
    
#     cfg.model.head.drop1.params.p = dropout1
#     cfg.model.head.drop2.params.p = dropout2
#     cfg.model.head.drop2.params.p = dropout3

    cfg.model.head.linear1.params.out_features = hidden1
#     cfg.model.head.bn2.params.num_features = hidden_size
    cfg.model.head.linear2.params.in_features = hidden1
    cfg.model.head.linear2.params.out_features = hidden2
#     cfg.model.head.bn3.params.num_features = hidden_size
    cfg.model.head.linear3.params.in_features = hidden2

    cfg.model.params.e_dim = e_dim
    cfg.model.params.e_fc_dim = e_fc_dim
    cfg.model.head.linear1.params.in_features = 111 + e_fc_dim
    
    trainer = NNTrainer(run_name, fold_df, cfg)
    cv = trainer.train(train_df=train_x, target_df=train_y)

    return cv

In [12]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, timeout=1_800)   # n_trials, timeout

[32m[I 2020-12-22 23:34:24,338][0m A new study created in memory with name: no-name-ad2da215-8b3b-49b9-b7af-fc1bad568bba[0m




Epoch 1 - avg_train_loss: 0.545585  avg_val_loss: 0.543013 val_score: 0.768608 time: 18s
Epoch 2 - avg_train_loss: 0.540045  avg_val_loss: 0.541885 val_score: 0.770060 time: 17s
Epoch 3 - avg_train_loss: 0.537534  avg_val_loss: 0.541049 val_score: 0.771012 time: 17s
Epoch 4 - avg_train_loss: 0.534830  avg_val_loss: 0.540298 val_score: 0.771827 time: 17s
Epoch 5 - avg_train_loss: 0.531652  avg_val_loss: 0.539511 val_score: 0.772692 time: 17s
Epoch 6 - avg_train_loss: 0.528412  avg_val_loss: 0.538890 val_score: 0.773583 time: 17s
Epoch 7 - avg_train_loss: 0.525154  avg_val_loss: 0.540412 val_score: 0.773253 time: 17s
Epoch 8 - avg_train_loss: 0.521785  avg_val_loss: 0.539918 val_score: 0.772268 time: 17s
Epoch 9 - avg_train_loss: 0.518337  avg_val_loss: 0.542510 val_score: 0.771808 time: 17s
█

[32m[I 2020-12-22 23:37:15,787][0m Trial 0 finished with value: 0.773582559187455 and parameters: {'hidden1': 520.6595714467619, 'hidden2': 255.09688496749757, 'e_dim': 76.47605176562722, 'e_fc_dim': 83.90462322933557}. Best is trial 0 with value: 0.773582559187455.[0m


Epoch 10 - avg_train_loss: 0.514882  avg_val_loss: 0.542873 val_score: 0.770880 time: 17s

Epoch 6 - val_score: 0.773583



CV: 0.773583





Epoch 1 - avg_train_loss: 0.545602  avg_val_loss: 0.542957 val_score: 0.768408 time: 17s
Epoch 2 - avg_train_loss: 0.539980  avg_val_loss: 0.541025 val_score: 0.770840 time: 17s
Epoch 3 - avg_train_loss: 0.537583  avg_val_loss: 0.539498 val_score: 0.772206 time: 17s
Epoch 4 - avg_train_loss: 0.534936  avg_val_loss: 0.538765 val_score: 0.773863 time: 17s
Epoch 5 - avg_train_loss: 0.531985  avg_val_loss: 0.537557 val_score: 0.774378 time: 17s
Epoch 6 - avg_train_loss: 0.529301  avg_val_loss: 0.536514 val_score: 0.775485 time: 17s
Epoch 7 - avg_train_loss: 0.526800  avg_val_loss: 0.537057 val_score: 0.775446 time: 17s
Epoch 8 - avg_train_loss: 0.524353  avg_val_loss: 0.536960 val_score: 0.775596 time: 17s
Epoch 9 - avg_train_loss: 0.522213  avg_val_loss: 0.537413 val_score: 0.775400 time: 17s
█

[32m[I 2020-12-22 23:40:02,937][0m Trial 1 finished with value: 0.7755956676394911 and parameters: {'hidden1': 241.29958368552286, 'hidden2': 309.76298069120577, 'e_dim': 54.15434705663396, 'e_fc_dim': 15.082246752842032}. Best is trial 1 with value: 0.7755956676394911.[0m


Epoch 10 - avg_train_loss: 0.519920  avg_val_loss: 0.537495 val_score: 0.774615 time: 17s

Epoch 8 - val_score: 0.775596



CV: 0.775596





Epoch 1 - avg_train_loss: 0.546296  avg_val_loss: 0.543081 val_score: 0.768197 time: 17s
Epoch 2 - avg_train_loss: 0.540070  avg_val_loss: 0.541421 val_score: 0.770468 time: 17s
Epoch 3 - avg_train_loss: 0.538017  avg_val_loss: 0.539906 val_score: 0.771765 time: 17s
Epoch 4 - avg_train_loss: 0.535630  avg_val_loss: 0.538538 val_score: 0.773231 time: 17s
Epoch 5 - avg_train_loss: 0.533302  avg_val_loss: 0.537932 val_score: 0.774087 time: 17s
Epoch 6 - avg_train_loss: 0.530900  avg_val_loss: 0.537487 val_score: 0.774344 time: 17s
Epoch 7 - avg_train_loss: 0.528449  avg_val_loss: 0.537688 val_score: 0.775696 time: 17s
Epoch 8 - avg_train_loss: 0.526217  avg_val_loss: 0.536052 val_score: 0.776103 time: 17s
Epoch 9 - avg_train_loss: 0.523816  avg_val_loss: 0.536401 val_score: 0.775860 time: 16s
█

[32m[I 2020-12-22 23:42:49,396][0m Trial 2 finished with value: 0.7761031244055123 and parameters: {'hidden1': 150.49908968960582, 'hidden2': 487.612444362821, 'e_dim': 36.35776373327214, 'e_fc_dim': 13.312454497461124}. Best is trial 2 with value: 0.7761031244055123.[0m


Epoch 10 - avg_train_loss: 0.521918  avg_val_loss: 0.536976 val_score: 0.775358 time: 17s

Epoch 8 - val_score: 0.776103



CV: 0.776103





Epoch 1 - avg_train_loss: 0.545089  avg_val_loss: 0.543683 val_score: 0.768215 time: 17s
Epoch 2 - avg_train_loss: 0.539711  avg_val_loss: 0.540427 val_score: 0.771213 time: 17s
Epoch 3 - avg_train_loss: 0.536932  avg_val_loss: 0.540689 val_score: 0.770905 time: 17s
Epoch 4 - avg_train_loss: 0.533581  avg_val_loss: 0.539469 val_score: 0.772618 time: 16s
Epoch 5 - avg_train_loss: 0.530071  avg_val_loss: 0.538527 val_score: 0.773682 time: 17s
Epoch 6 - avg_train_loss: 0.526480  avg_val_loss: 0.537868 val_score: 0.774364 time: 17s
Epoch 7 - avg_train_loss: 0.523460  avg_val_loss: 0.539329 val_score: 0.773566 time: 17s
Epoch 8 - avg_train_loss: 0.520645  avg_val_loss: 0.538850 val_score: 0.774090 time: 17s
Epoch 9 - avg_train_loss: 0.517592  avg_val_loss: 0.540306 val_score: 0.772822 time: 17s
█

[32m[I 2020-12-22 23:45:37,731][0m Trial 3 finished with value: 0.7743643784555907 and parameters: {'hidden1': 780.2685355203258, 'hidden2': 201.51540695300133, 'e_dim': 108.95846743477304, 'e_fc_dim': 21.08875332132785}. Best is trial 2 with value: 0.7761031244055123.[0m


Epoch 10 - avg_train_loss: 0.514827  avg_val_loss: 0.543008 val_score: 0.771764 time: 17s

Epoch 6 - val_score: 0.774364



CV: 0.774364





█

KeyboardInterrupt: 

In [None]:
print(f'BEST SCORE: {study.best_value}')
study.best_params