In [12]:
import sys
sys.path.append('../script/')
import os
from os.path import exists
from datetime import datetime
import json
import gc
from functools import partial

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
import optuna

import utils
import models
import train as trainer
DEVICE = "cuda"
EPOCHS = 10
MODELNAME = "Baseline1013"
if not exists(MODELNAME):
    os.makedirs(f"{MODELNAME}/tensorboard")
save_model = True
now = datetime.now()
now = str(now)[5:17].replace(" ", "_").replace(":", "")
writer = SummaryWriter(log_dir=f"{MODELNAME}/tensorboard")

In [2]:
df = pd.read_csv("../input/folds/train.csv")
with open("../input/folds/targets", "r") as f:
    targets = f.read().split("\n")
with open("../input/folds/features", "r") as f:
    features = f.read().split("\n")

In [3]:
class BaseLine(nn.Module):
    def __init__(self, num_features, num_targets, num_layers=3, dropout=.2, hidden_size=256, activation="relu", batchnorm=True):
        super().__init__()
        layers = []
        for _ in range(num_layers):
            layers.append(nn.Linear(num_features if len(layers)==0 else hidden_size, hidden_size, bias=(not batchnorm)))
            if batchnorm:
                layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.Dropout(dropout))
            if activation == "relu":
                layers.append(nn.ReLU())
            elif activation == "prelu":
                layers.append(nn.PReLU())
            else:
                raise RuntimeError(f'{activation} is not implemented')
        layers.append(nn.Linear(hidden_size, num_targets))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        x = self.model(x)
        return x

In [4]:
def run_training(df, fold, params):
    print(f'\n[Fold No.{fold:>3}]')
    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    x_tr = train_df[features].to_numpy()
    x_va = valid_df[features].to_numpy()

    y_tr = train_df[targets].to_numpy()
    y_va = valid_df[targets].to_numpy()

    dataset_tr = utils.MoaDataset(x_tr, y_tr)
    loader_tr = torch.utils.data.DataLoader(dataset_tr, batch_size=params['batch_size'], num_workers=2)
    dataset_va = utils.MoaDataset(x_va, y_va)
    loader_va = torch.utils.data.DataLoader(dataset_va, batch_size=params['batch_size'], num_workers=2)

    model = BaseLine(num_features=x_tr.shape[1], num_targets=y_tr.shape[1], **params['nn_params'])
    model.to(DEVICE)

    if params["optimizer"] == "SGD":
        optimizer = torch.optim.SGD(model.parameters(), **params["optim_params"])
    elif params["optimizer"] == "Adam":
        optimizer = torch.optim.Adam(model.parameters(), **params["optim_params"])
    else:
        raise RuntimeError(f'{params["optimizer"]} is not implemented')

    if params["scheduler"] == "ReduceLROnPlateau":
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, mode="min", **params["scdl_params"])
    else: 
        print("Not Implemented: No scheduling will be applied")
        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda ep: 1**ep)

    eng = utils.Engine(model, optimizer, device=DEVICE)

    del df, train_df, valid_df, x_tr, x_va, y_tr, y_va
    gc.collect()

    print(f'Training state is shown in {MODELNAME}/tensorboard')
    filename = f"{MODELNAME}/{now}_fold{fold}.pt"
    
    loss_best = np.inf
    patience = 10
    patience_cnt = 0
    for ep in range(EPOCHS):
        print(f'Ep.{ep:>3}/{EPOCHS:>3}, patience:{patience_cnt:>2}/{patience:>2}', end='\r')
        loss_tr = eng.train(loader_tr)
        loss_va = eng.validate(loader_va)
        scheduler.step(loss_va)
        writer.add_scalar(f'{now}/train', loss_tr, ep)
        writer.add_scalar(f'{now}/valid', loss_va, ep)
        if loss_va < loss_best:
            patience_cnt = 0
            loss_best = loss_va
            if save_model:
                torch.save(model.model.state_dict(), filename)
        else:
            patience_cnt += 1
        if patience_cnt > patience:
            break
    print("\nmodel saved at:", filename)

In [5]:
params = {
    "nn_params": {"dropout": 0.2, "num_layers": 3, "hidden_size": 256, "activation": "relu", "batchnorm": True},
    "optimizer": "SGD",
    "optim_params": {"lr": 1e-2, "momentum": 0.0},
    "scheduler": "ReduceLROnPlateau",
    "scdl_params": {"threshold": 0.00001},
    "batch_size": 256,
}

In [6]:
for fold in range(5):
    run_training(df, fold, params)


[Fold No.  0]
Training state is shown in Baseline1013/tensorboard
Ep.  9/ 10, patience: 0/10
model saved at: Baseline1013/10-15_0214_fold0.pt

[Fold No.  1]
Training state is shown in Baseline1013/tensorboard
Ep.  9/ 10, patience: 0/10
model saved at: Baseline1013/10-15_0214_fold1.pt

[Fold No.  2]
Training state is shown in Baseline1013/tensorboard
Ep.  9/ 10, patience: 0/10
model saved at: Baseline1013/10-15_0214_fold2.pt

[Fold No.  3]
Training state is shown in Baseline1013/tensorboard
Ep.  9/ 10, patience: 0/10
model saved at: Baseline1013/10-15_0214_fold3.pt

[Fold No.  4]
Training state is shown in Baseline1013/tensorboard
Ep.  9/ 10, patience: 0/10
model saved at: Baseline1013/10-15_0214_fold4.pt


In [14]:
with open(f'{MODELNAME}/{now}_params.json', 'w') as f:
    json.dump(params, f, indent=4)

In [7]:
writer.close()