# train

In [1]:
import yaml
import os

with open("./config.yaml", 'rb') as f:
    config = yaml.load(f)

  """


In [2]:
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
os.makedirs(OUTPUT_ROOT, exist_ok=True)

## load library

In [3]:
# python default library
import os
import shutil
import datetime
import sys
import pickle

# general analysis tool-kit
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# pytorch
import torch
from torch import optim, nn
from torch.utils.tensorboard import SummaryWriter

from torchsummary import summary

# etc
import yaml
yaml.warnings({'YAMLLoadWarning': False})
import mlflow
from collections import defaultdict
from scipy.stats import zscore

# original library

import common as com
import pytorch_modeler as modeler
from pytorch_model import DAGMM as Model
from pytorch_utils import filtered_load_model
import models

import librosa
import IPython
import librosa.display

## load config and set logger

In [4]:
#with open("./config.yaml", 'rb') as f:
#    config = yaml.load(f)

log_folder = config['IO_OPTION']['OUTPUT_ROOT']+'/{0}.log'.format(datetime.date.today())
logger = com.setup_logger(log_folder, '00_train.py')

  and should_run_async(code)


## Setting

In [5]:
# Setting seed
modeler.set_seed(42)

In [6]:
############################################################################
# Setting I/O path
############################################################################
# input dirs
INPUT_ROOT = config['IO_OPTION']['INPUT_ROOT']
dev_path = INPUT_ROOT + "/dev_data"
add_dev_path = INPUT_ROOT + "/add_dev_data"
# machine type
MACHINE_TYPE = config['IO_OPTION']['MACHINE_TYPE']
machine_types = os.listdir(dev_path)
# output dirs
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
MODEL_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/models'
TB_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/tb'
PKL_DIR = OUTPUT_ROOT + '/pkl'
#os.makedirs(OUTPUT_ROOT, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(TB_DIR, exist_ok=True)
os.makedirs(PKL_DIR, exist_ok=True)
# copy config
shutil.copy('./config.yaml', OUTPUT_ROOT)

'/media/hiroki/working/research/dcase2020/result/2D/DAGMM/stable3/v1/config.yaml'

## make path list and train/valid split

In [7]:
############################################################################
# make path set and train/valid split
############################################################################
'''
train_paths[machine_type]['train' or 'valid'] = path
'''
dev_train_paths = {}
add_train_paths = {}
train_paths = {}

In [8]:
for machine_type in machine_types:
    # dev train
    dev_train_paths = ["{}/{}/train/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(dev_path, machine_type))]
    dev_train_paths = sorted(dev_train_paths)
    # add_dev train
    add_train_paths = ["{}/{}/train/".format(add_dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(add_dev_path, machine_type))]
    add_train_paths = sorted(add_train_paths)
    # valid
    dev_valid_paths = ["{}/{}/test/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/test".format(dev_path, machine_type))]
    dev_valid_paths = sorted(dev_valid_paths)
    
    train_paths[machine_type] = {}
    train_paths[machine_type]['train'] = dev_train_paths + add_train_paths
    train_paths[machine_type]['valid'] = dev_valid_paths

## training

In [9]:
#############################################################################
# run
#############################################################################
def run(machine_type):
    com.tic()
    logger.info('TARGET MACHINE_TYPE: {0}'.format(machine_type))
    logger.info('MAKE DATA_LOADER')
    # dev_train_paths
    dataloaders_dict = modeler.make_dataloader(train_paths, machine_type)
    # define writer for tensorbord
    os.makedirs(TB_DIR+'/'+machine_type, exist_ok=True)         # debug
    tb_log_dir = TB_DIR + '/' + machine_type
    writer = SummaryWriter(log_dir = tb_log_dir)
    logger.info('TRAINING')
    # parameter setting
    net = Model(sample_rate=config['preprocessing']['sample_rate'],
                window_size=config['preprocessing']['window_size'],
                hop_size=config['preprocessing']['hop_size'],
                mel_bins=config['preprocessing']['mel_bins'],
                fmin=config['preprocessing']['fmin'],
                fmax=config['preprocessing']['fmax'],
                latent_size=config['fit']['latent_size'],
                mixture_size=config['fit']['mixture_size'])
    #pretrained_dict = torch.load(config['IO_OPTION']['PREMODEL_PATH'])
    #net = filtered_load_model(net, pretrained_dict)
    optimizer = optim.Adam(net.parameters(), lr=1e-3)
    num_epochs = config['fit']['num_epochs']
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
                                              max_lr=1e-3, epochs=num_epochs, steps_per_epoch=len(dataloaders_dict['train']))
    criterion = nn.MSELoss()
    #try:
    history = modeler.train_net(net, dataloaders_dict, criterion, optimizer, scheduler, num_epochs, writer)
    #except:
        #history = "error"
        #print("error")
        #return history
    # output
    model = history['model']
    model_out_path = MODEL_DIR+'/{}_model.pth'.format(machine_type)
    torch.save(model.state_dict(), model_out_path)
    logger.info('\n success:{0} \n'.format(machine_type) + \
                    'model_out_path ==> \n {0}'.format(model_out_path))
    #  close writer for tensorbord
    writer.close()
    #modeler.mlflow_log(history, config, machine_type, model_out_path, tb_log_dir)
    com.toc()
    return history

In [10]:
machine_types

['fan', 'pump', 'slider', 'ToyCar', 'ToyConveyor', 'valve']

In [None]:
for machine_type in machine_types:
    history = run(machine_type)
    #with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
    #    pickle.dump(history , file)

2020-11-28 15:31:51,327 - 00_train.py - INFO - TARGET MACHINE_TYPE: fan
2020-11-28 15:31:51,328 - 00_train.py - INFO - MAKE DATA_LOADER
2020-11-28 15:31:52,164 - 00_train.py - INFO - TRAINING


use: cuda:0


100%|██████████| 102/102 [01:23<00:00,  1.22it/s]
100%|██████████| 29/29 [00:10<00:00,  2.74it/s]
2020-11-28 15:33:28,318 - pytorch_modeler.py - INFO - Epoch 1/300:train_loss:2869.375830, tr_rec:37.200558, tr_eng:650.826174, tr_covd:553418.531042, val_AUC:0.496764, val_pAUC:0.514358
100%|██████████| 102/102 [00:13<00:00,  7.43it/s]
100%|██████████| 29/29 [00:01<00:00, 21.21it/s]
2020-11-28 15:33:43,423 - pytorch_modeler.py - INFO - Epoch 2/300:train_loss:121.051302, tr_rec:30.512499, tr_eng:776.095913, tr_covd:2585.842251, val_AUC:0.541474, val_pAUC:0.506396
100%|██████████| 102/102 [00:13<00:00,  7.34it/s]
100%|██████████| 29/29 [00:01<00:00, 21.11it/s]
2020-11-28 15:33:58,701 - pytorch_modeler.py - INFO - Epoch 3/300:train_loss:93.384956, tr_rec:8.255368, tr_eng:744.582677, tr_covd:2134.263678, val_AUC:0.514346, val_pAUC:0.503686
100%|██████████| 102/102 [00:14<00:00,  7.16it/s]
100%|██████████| 29/29 [00:01<00:00, 20.40it/s]
2020-11-28 15:34:14,372 - pytorch_modeler.py - INFO - Epoc

100%|██████████| 102/102 [00:24<00:00,  4.24it/s]
100%|██████████| 29/29 [00:02<00:00, 12.00it/s]
2020-11-28 15:42:52,556 - pytorch_modeler.py - INFO - Epoch 30/300:train_loss:-74.216750, tr_rec:0.111514, tr_eng:-822.405791, tr_covd:1582.463227, val_AUC:0.551670, val_pAUC:0.522236
100%|██████████| 102/102 [00:24<00:00,  4.16it/s]
100%|██████████| 29/29 [00:02<00:00, 11.88it/s]
2020-11-28 15:43:19,548 - pytorch_modeler.py - INFO - Epoch 31/300:train_loss:-52.047969, tr_rec:0.104934, tr_eng:-600.515163, tr_covd:1579.722864, val_AUC:0.455699, val_pAUC:0.492128
100%|██████████| 102/102 [00:23<00:00,  4.26it/s]
100%|██████████| 29/29 [00:02<00:00, 11.89it/s]
2020-11-28 15:43:45,913 - pytorch_modeler.py - INFO - Epoch 32/300:train_loss:-64.212353, tr_rec:0.104362, tr_eng:-721.619914, tr_covd:1569.055513, val_AUC:0.499644, val_pAUC:0.499981
100%|██████████| 102/102 [00:24<00:00,  4.16it/s]
100%|██████████| 29/29 [00:02<00:00, 12.14it/s]
2020-11-28 15:44:12,828 - pytorch_modeler.py - INFO - Ep

100%|██████████| 102/102 [00:24<00:00,  4.18it/s]
100%|██████████| 29/29 [00:02<00:00, 12.80it/s]
2020-11-28 15:55:46,322 - pytorch_modeler.py - INFO - Epoch 59/300:train_loss:-83.574860, tr_rec:0.102149, tr_eng:-914.380390, tr_covd:1552.206501, val_AUC:0.506141, val_pAUC:0.498545
100%|██████████| 102/102 [00:24<00:00,  4.20it/s]
100%|██████████| 29/29 [00:02<00:00, 12.39it/s]
2020-11-28 15:56:12,955 - pytorch_modeler.py - INFO - Epoch 60/300:train_loss:-72.415079, tr_rec:0.109825, tr_eng:-803.105773, tr_covd:1557.134780, val_AUC:0.519056, val_pAUC:0.509861
100%|██████████| 102/102 [00:24<00:00,  4.21it/s]
100%|██████████| 29/29 [00:02<00:00, 11.86it/s]
2020-11-28 15:56:39,652 - pytorch_modeler.py - INFO - Epoch 61/300:train_loss:-72.608468, tr_rec:0.099519, tr_eng:-805.270712, tr_covd:1563.817166, val_AUC:0.500000, val_pAUC:0.500000
100%|██████████| 102/102 [00:24<00:00,  4.23it/s]
100%|██████████| 29/29 [00:02<00:00, 12.48it/s]
2020-11-28 15:57:06,088 - pytorch_modeler.py - INFO - Ep

100%|██████████| 102/102 [00:24<00:00,  4.16it/s]
100%|██████████| 29/29 [00:02<00:00, 12.76it/s]
2020-11-28 16:08:39,023 - pytorch_modeler.py - INFO - Epoch 88/300:train_loss:-75.011005, tr_rec:0.034548, tr_eng:-826.030164, tr_covd:1511.492987, val_AUC:0.503924, val_pAUC:0.512187
100%|██████████| 102/102 [00:24<00:00,  4.23it/s]
100%|██████████| 29/29 [00:02<00:00, 12.56it/s]
2020-11-28 16:09:05,452 - pytorch_modeler.py - INFO - Epoch 89/300:train_loss:-82.462831, tr_rec:0.016122, tr_eng:-901.095655, tr_covd:1526.122898, val_AUC:0.513556, val_pAUC:0.507329
100%|██████████| 102/102 [00:24<00:00,  4.21it/s]
100%|██████████| 29/29 [00:02<00:00, 12.52it/s]
2020-11-28 16:09:32,002 - pytorch_modeler.py - INFO - Epoch 90/300:train_loss:-71.701113, tr_rec:0.011494, tr_eng:-793.251598, tr_covd:1522.511048, val_AUC:0.500000, val_pAUC:0.500000
100%|██████████| 102/102 [00:24<00:00,  4.13it/s]
100%|██████████| 29/29 [00:02<00:00, 12.39it/s]
2020-11-28 16:09:59,040 - pytorch_modeler.py - INFO - Ep

100%|██████████| 102/102 [00:24<00:00,  4.21it/s]
100%|██████████| 29/29 [00:02<00:00, 12.61it/s]
2020-11-28 16:21:34,012 - pytorch_modeler.py - INFO - Epoch 117/300:train_loss:-97.632875, tr_rec:0.010799, tr_eng:-1055.167532, tr_covd:1574.616311, val_AUC:0.500000, val_pAUC:0.500000
100%|██████████| 102/102 [00:24<00:00,  4.15it/s]
100%|██████████| 29/29 [00:02<00:00, 12.34it/s]
2020-11-28 16:22:00,930 - pytorch_modeler.py - INFO - Epoch 118/300:train_loss:-92.552071, tr_rec:0.035412, tr_eng:-1003.785224, tr_covd:1558.208388, val_AUC:0.472780, val_pAUC:0.499578
100%|██████████| 102/102 [00:24<00:00,  4.15it/s]
100%|██████████| 29/29 [00:02<00:00, 12.69it/s]
2020-11-28 16:22:27,777 - pytorch_modeler.py - INFO - Epoch 119/300:train_loss:-60.497822, tr_rec:0.033037, tr_eng:-681.096135, tr_covd:1515.751088, val_AUC:0.471875, val_pAUC:0.511259
100%|██████████| 102/102 [00:24<00:00,  4.18it/s]
100%|██████████| 29/29 [00:02<00:00, 12.44it/s]
2020-11-28 16:22:54,547 - pytorch_modeler.py - INFO

100%|██████████| 102/102 [00:14<00:00,  7.26it/s]
100%|██████████| 29/29 [00:01<00:00, 20.50it/s]
2020-11-28 16:32:20,517 - pytorch_modeler.py - INFO - Epoch 146/300:train_loss:-120.126263, tr_rec:0.006527, tr_eng:-1282.053062, tr_covd:1614.503593, val_AUC:0.503113, val_pAUC:0.500156
100%|██████████| 102/102 [00:14<00:00,  7.25it/s]
100%|██████████| 29/29 [00:01<00:00, 17.68it/s]
2020-11-28 16:32:36,232 - pytorch_modeler.py - INFO - Epoch 147/300:train_loss:-129.024769, tr_rec:0.003930, tr_eng:-1370.488804, tr_covd:1604.036665, val_AUC:0.500000, val_pAUC:0.500000
100%|██████████| 102/102 [00:14<00:00,  6.98it/s]
100%|██████████| 29/29 [00:01<00:00, 20.78it/s]
2020-11-28 16:32:52,258 - pytorch_modeler.py - INFO - Epoch 148/300:train_loss:-114.057998, tr_rec:0.003724, tr_eng:-1221.813797, tr_covd:1623.931996, val_AUC:0.534839, val_pAUC:0.501320
100%|██████████| 102/102 [00:13<00:00,  7.29it/s]
100%|██████████| 29/29 [00:01<00:00, 20.06it/s]
2020-11-28 16:33:07,696 - pytorch_modeler.py - 

100%|██████████| 102/102 [00:24<00:00,  4.24it/s]
100%|██████████| 29/29 [00:02<00:00, 12.67it/s]
2020-11-28 16:43:17,580 - pytorch_modeler.py - INFO - Epoch 175/300:train_loss:-93.107918, tr_rec:0.016738, tr_eng:-1010.210843, tr_covd:1579.286071, val_AUC:0.500362, val_pAUC:0.500019
100%|██████████| 102/102 [00:24<00:00,  4.19it/s]
100%|██████████| 29/29 [00:02<00:00, 12.82it/s]
2020-11-28 16:43:44,190 - pytorch_modeler.py - INFO - Epoch 176/300:train_loss:-94.144255, tr_rec:0.013794, tr_eng:-1020.896537, tr_covd:1586.321435, val_AUC:0.516499, val_pAUC:0.509281
100%|██████████| 102/102 [00:24<00:00,  4.20it/s]
100%|██████████| 29/29 [00:02<00:00, 12.33it/s]
2020-11-28 16:44:10,849 - pytorch_modeler.py - INFO - Epoch 177/300:train_loss:-79.111559, tr_rec:0.012640, tr_eng:-869.973023, tr_covd:1574.620971, val_AUC:0.489107, val_pAUC:0.498068
100%|██████████| 102/102 [00:24<00:00,  4.14it/s]
100%|██████████| 29/29 [00:02<00:00, 11.79it/s]
2020-11-28 16:44:37,966 - pytorch_modeler.py - INFO

100%|██████████| 102/102 [00:39<00:00,  2.61it/s]
100%|██████████| 29/29 [00:04<00:00,  7.09it/s]
2020-11-28 17:00:23,953 - pytorch_modeler.py - INFO - Epoch 204/300:train_loss:-108.049740, tr_rec:0.009253, tr_eng:-1158.310829, tr_covd:1554.418383, val_AUC:0.500000, val_pAUC:0.500000
100%|██████████| 102/102 [00:33<00:00,  3.01it/s]
100%|██████████| 29/29 [00:04<00:00,  7.12it/s]
2020-11-28 17:01:01,912 - pytorch_modeler.py - INFO - Epoch 205/300:train_loss:-111.360597, tr_rec:0.005598, tr_eng:-1191.076458, tr_covd:1548.290608, val_AUC:0.502498, val_pAUC:0.500135
100%|██████████| 102/102 [00:33<00:00,  3.01it/s]
100%|██████████| 29/29 [00:03<00:00,  7.52it/s]
2020-11-28 17:01:39,672 - pytorch_modeler.py - INFO - Epoch 206/300:train_loss:-117.445703, tr_rec:0.004800, tr_eng:-1253.177888, tr_covd:1573.457498, val_AUC:0.500000, val_pAUC:0.500000
100%|██████████| 102/102 [00:34<00:00,  2.99it/s]
100%|██████████| 29/29 [00:03<00:00,  7.34it/s]
2020-11-28 17:02:17,760 - pytorch_modeler.py - 

history = run('ToyCar')
with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
    pickle.dump(history , file)