In [1]:
import sys
sys.path.append('../script/')
import os
from os.path import exists
from datetime import datetime
import json
import gc
from functools import partial
from collections import OrderedDict

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
# from adabelief_pytorch import AdaBelief

import utils
import models
import train as trainer
# DEVICE = "cuda"
DEVICE = "cpu"
EPOCHS = 3000
MODELNAME = "Baseline1122"
if not exists(MODELNAME):
    os.makedirs(f"{MODELNAME}/tensorboard")
now = datetime.now()
now = str(now)[5:17].replace(" ", "_").replace(":", "")
writer = SummaryWriter(log_dir=f"{MODELNAME}/tensorboard")

In [2]:
df = pd.read_csv("../input/folds/train.csv")
with open("../input/folds/targets", "r") as f:
    targets = f.read().split("\n")
with open("../input/folds/features", "r") as f:
    features = f.read().split("\n")
targets = targets[:-1]

In [3]:
# TODO: クラスタごとに並べてConv1d
class BaseLine(nn.Module):
    def __init__(self, num_features, num_targets, num_layers=3, dropout=.2, hidden_size=256, activation="relu", batchnorm=True, weight_norm=True):
        super().__init__()
        layers = []
        for _ in range(num_layers):
            layers.append(nn.utils.weight_norm(nn.Linear(num_features if len(layers)==0 else hidden_size, hidden_size, bias=(not batchnorm))))
            if batchnorm:
                layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.Dropout(dropout))
            if activation == "relu":
                layers.append(nn.ReLU())
            elif activation == "prelu":
                layers.append(nn.PReLU())
            else:
                raise RuntimeError(f'{activation} is not implemented')
        # layers.append(nn.utils.weight_norm(nn.Linear(hidden_size, num_targets)))
        layers.append(nn.Linear(hidden_size, num_targets))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        x = self.model(x)
        return x

In [4]:
def set_output_bias(model, df, targets):   
    init_bias = np.array([])
    for target in targets:
        try:
            neg, pos = np.bincount(df[target])
        except ValueError:
            neg, pos = np.array([df.shape[0], 0.01])
        init_bias_ = np.log([pos/neg])
        init_bias = np.append(init_bias, init_bias_)
    model.model[-1].bias.data = torch.tensor(init_bias, dtype=torch.float32)
    return model

In [5]:
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self):
        super(LabelSmoothingCrossEntropy, self).__init__()
    def forward(self, x, target, smoothing=0.2):
        confidence = 1. - smoothing
        logprobs = F.log_softmax(x, dim=-1)
        nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
        nll_loss = nll_loss.squeeze(1)
        smooth_loss = -logprobs.mean(dim=-1)
        loss = confidence * nll_loss + smoothing * smooth_loss
        return loss.mean()
from utils import LabelSmoothingCrossEntropy

# criterion = LabelSmoothingCrossEntropy()
# loss = criterion(outputs, targets)
# loss.backward()
# optimizer.step()

In [6]:
def run_training(df, fold, params, hp_tune=False):

    save_model = False if hp_tune else True
    print(f'\n[Fold No.{fold:>3}]')
    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    x_tr = train_df[features].to_numpy()
    x_va = valid_df[features].to_numpy()

    y_tr = train_df[targets].to_numpy()
    y_va = valid_df[targets].to_numpy()

    dataset_tr = utils.MoaDataset(x_tr, y_tr)
    loader_tr = torch.utils.data.DataLoader(dataset_tr, batch_size=512, num_workers=2, pin_memory=True)
    dataset_va = utils.MoaDataset(x_va, y_va)
    loader_va = torch.utils.data.DataLoader(dataset_va, batch_size=512, num_workers=2, pin_memory=True)

    model = BaseLine(num_features=x_tr.shape[1], num_targets=y_tr.shape[1], **params['nn_params'])
    model = set_output_bias(model, train_df, targets)
    model.to(DEVICE)

    if params["optimizer"] == "SGD":
        optimizer = torch.optim.SGD(model.parameters(), **params["optim_params"])
    elif params["optimizer"] == "Adam":
        optimizer = torch.optim.Adam(model.parameters(), **params["optim_params"])
    elif params["optimizer"] == "AdamW":
        optimizer = torch.optim.AdamW(model.parameters(), **params["optim_params"])
    elif params["optimizer"] == "AdaBelief":
        optimizer = AdaBelief(model.parameters(), **params["optim_params"])
    else:
        raise RuntimeError(f'{params["optimizer"]} is not implemented')

    if params["scheduler"] == "ReduceLROnPlateau":
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", **params["scdl_params"])
    elif params["scheduler"] == "CosineAnnealingLR":
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, **params["scdl_params"])
    elif params["scheduler"] == "none": 
        print("No scheduling will be applied")
        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda ep: 1**ep)
    else:
        raise RuntimeError(f'{params["scheduler"]} is not implemented')

    eng = utils.Engine(model, optimizer, device=DEVICE)

    del df, train_df, valid_df, x_tr, x_va, y_tr, y_va
    gc.collect()

    torch.backends.cudnn.benchmark = True

    print(f'Training state is shown in {MODELNAME}/tensorboard')
    filename = f"{MODELNAME}/{now}_fold{fold}.pt"

    loss_best = np.inf
    patience = 25
    patience_cnt = 0
    for ep in range(EPOCHS):
        loss_tr = eng.train(loader_tr)
        loss_tr_nodrop = eng.validate(loader_tr)
        loss_va = eng.validate(loader_va)
        scheduler.step(loss_va)
        print(f'Ep.{ep:>3}/{EPOCHS:>3}, patience:{patience_cnt:>2}/{patience:>2}, train:{loss_tr:.6}, tr_nodrop:{loss_tr_nodrop:.6}, valid:{loss_va:.6}', end='\r')
        writer.add_scalars(f'{now}/fold{fold}', {'train':loss_tr, 'tr_nodrop':loss_tr_nodrop, 'valid':loss_va}, ep)
        if loss_va < loss_best:
            patience_cnt = 0
            loss_best = loss_va
            if save_model:
                torch.save(model.model.state_dict(), filename)
        else:
            patience_cnt += 1
        if patience_cnt > patience:
            break

    print("\nmodel saved at:", filename)

In [7]:
params = {
    "nn_params": {"dropout": 0.5, "num_layers":4, "hidden_size": 512, "activation": "relu", "batchnorm": True, "weight_norm": True},
    "optimizer": "Adam",
    # # SGD
    # "optim_params": {"lr":1e-4, "momentum": 0.3, "weight_decay": 0.2, "dampening": 0, "nesterov": False},
    # Adam
    "optim_params": {"lr":1e-2, "betas": (0.9, 0.999), "eps": 1e-08, "weight_decay": 1.2e-6, "amsgrad": False},
    # # Adabelief 
    # "optim_params": {"lr": 1e-2, "eps":1e-16, "betas": (0.9,0.999), "weight_decay": 1.2e-6, "weight_decouple": False, "rectify": True, "fixed_decay": False, "amsgrad": False},
    "scheduler": "ReduceLROnPlateau",
    "scdl_params": {"threshold": 1e-5, "patience": 3}
    # # ReduceLROnPlateau
    # "scdl_params": {"threshold": 1e-5, "patience": 3}
    # # CosineAnnealingLR
    # "scdl_params": {"T_max":8, "eta_min":0, "last_epoch":-1}
}
# 0.02355, 0.03 on momentum:0

In [8]:
with open(f'{MODELNAME}/{now}_params.json', 'w') as f:
    json.dump(params, f, indent=4)

In [9]:
%%time
for fold in range(5):
    run_training(df, fold, params)


[Fold No.  0]
Training state is shown in Baseline1122/tensorboard
Ep. 56/3000, patience:25/25, train:0.0145952, tr_nodrop:0.0137794, valid:0.0164189
model saved at: Baseline1122/11-24_2029_fold0.pt

[Fold No.  1]
Training state is shown in Baseline1122/tensorboard
Ep. 28/3000, patience: 8/25, train:0.014782, tr_nodrop:0.0140136, valid:0.01600257

KeyboardInterrupt: 

In [None]:
writer.close()

In [None]:
gc.collect()

---
## Get CV Score

In [None]:
predictions = np.zeros((df.shape[0], len(targets)))
for fold in range(5):
    filename = f"{MODELNAME}/{now}_fold{fold}.pt"
    print(f'[Fold No.{fold:>3}] Predicting...', end='\r')
    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)
    va_idx = df[df.kfold == fold].index
    
    x_tr = train_df[features].to_numpy()
    x_va = valid_df[features].to_numpy()

    y_tr = train_df[targets].to_numpy()
    y_va = valid_df[targets].to_numpy()

    dataset_tr = utils.MoaDataset(x_tr, y_tr)
    loader_tr = torch.utils.data.DataLoader(dataset_tr, batch_size=512, num_workers=2, pin_memory=True)
    dataset_va = utils.MoaDataset(x_va, y_va)
    loader_va = torch.utils.data.DataLoader(dataset_va, batch_size=512, num_workers=2, pin_memory=True)
    
    model = BaseLine(num_features=x_tr.shape[1], num_targets=y_tr.shape[1], **params['nn_params'])
    
    weight = torch.load(filename, map_location=torch.device(DEVICE))
    weight = OrderedDict([(f'model.{k}', v) for k, v in weight.items()])
    model.load_state_dict(weight)
    
    model.eval()
    ps = []
    for ind, batch in enumerate(loader_va):
        ps.append(torch.sigmoid(model(batch["x"])).detach().cpu().numpy())
    ps = np.vstack(ps)
    predictions[va_idx] += ps
print()

In [None]:
def log_loss_metric(y_true, y_pred):
    y_pred_clip = np.clip(y_pred, 1e-15, 1 - 1e-15)
    loss = - np.mean(np.mean(y_true * np.log(y_pred_clip) + (1 - y_true) * np.log(1 - y_pred_clip), axis = 1))
    return loss
print(f'CV score               : {log_loss_metric(df[targets].values, predictions):.6}')

In [None]:
predictions_ = predictions.copy()
predictions_ = np.clip(predictions_,0.0005,0.999)
predictions_[df["cp_type_ctl_vehicle"]==1] = 0
print(f'CV score w/ postprocess: {log_loss_metric(df[targets].values, predictions_):.6}')

In [None]:

print(f'CV score w/ postprocess: {log_loss_metric(df[targets].values, predictions_):.6}')