# train

In [1]:
import yaml
import os

with open("./config.yaml", 'rb') as f:
    config = yaml.load(f)

  """


In [2]:
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
os.makedirs(OUTPUT_ROOT, exist_ok=True)

## load library

In [3]:
# python default library
import os
import shutil
import datetime
import sys
import pickle

# general analysis tool-kit
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# pytorch
import torch
from torch import optim, nn
from torch.utils.tensorboard import SummaryWriter

from torchsummary import summary

# etc
import yaml
yaml.warnings({'YAMLLoadWarning': False})
import mlflow
from collections import defaultdict
from scipy.stats import zscore

# original library

import common as com
import pytorch_modeler as modeler
from pytorch_model import DAGMM as Model
from pytorch_utils import filtered_load_model
import models

import librosa
import IPython
import librosa.display

## load config and set logger

In [4]:
#with open("./config.yaml", 'rb') as f:
#    config = yaml.load(f)

log_folder = config['IO_OPTION']['OUTPUT_ROOT']+'/{0}.log'.format(datetime.date.today())
logger = com.setup_logger(log_folder, '00_train.py')

  and should_run_async(code)


## Setting

In [5]:
# Setting seed
modeler.set_seed(42)

In [6]:
############################################################################
# Setting I/O path
############################################################################
# input dirs
INPUT_ROOT = config['IO_OPTION']['INPUT_ROOT']
dev_path = INPUT_ROOT + "/dev_data"
add_dev_path = INPUT_ROOT + "/add_dev_data"
# machine type
MACHINE_TYPE = config['IO_OPTION']['MACHINE_TYPE']
machine_types = os.listdir(dev_path)
# output dirs
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
MODEL_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/models'
TB_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/tb'
PKL_DIR = OUTPUT_ROOT + '/pkl'
#os.makedirs(OUTPUT_ROOT, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(TB_DIR, exist_ok=True)
os.makedirs(PKL_DIR, exist_ok=True)
# copy config
shutil.copy('./config.yaml', OUTPUT_ROOT)

'/media/hiroki/working/research/dcase2020/result/2D/DAGMM/stable/v1/config.yaml'

## make path list and train/valid split

In [7]:
############################################################################
# make path set and train/valid split
############################################################################
'''
train_paths[machine_type]['train' or 'valid'] = path
'''
dev_train_paths = {}
add_train_paths = {}
train_paths = {}

In [8]:
for machine_type in machine_types:
    # dev train
    dev_train_paths = ["{}/{}/train/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(dev_path, machine_type))]
    dev_train_paths = sorted(dev_train_paths)
    # add_dev train
    add_train_paths = ["{}/{}/train/".format(add_dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(add_dev_path, machine_type))]
    add_train_paths = sorted(add_train_paths)
    # valid
    dev_valid_paths = ["{}/{}/test/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/test".format(dev_path, machine_type))]
    dev_valid_paths = sorted(dev_valid_paths)
    
    train_paths[machine_type] = {}
    train_paths[machine_type]['train'] = dev_train_paths + add_train_paths
    train_paths[machine_type]['valid'] = dev_valid_paths

## training

In [9]:
#############################################################################
# run
#############################################################################
def run(machine_type):
    com.tic()
    logger.info('TARGET MACHINE_TYPE: {0}'.format(machine_type))
    logger.info('MAKE DATA_LOADER')
    # dev_train_paths
    dataloaders_dict = modeler.make_dataloader(train_paths, machine_type)
    # define writer for tensorbord
    os.makedirs(TB_DIR+'/'+machine_type, exist_ok=True)         # debug
    tb_log_dir = TB_DIR + '/' + machine_type
    writer = SummaryWriter(log_dir = tb_log_dir)
    logger.info('TRAINING')
    # parameter setting
    net = Model(sample_rate=config['preprocessing']['sample_rate'],
                window_size=config['preprocessing']['window_size'],
                hop_size=config['preprocessing']['hop_size'],
                mel_bins=config['preprocessing']['mel_bins'],
                fmin=config['preprocessing']['fmin'],
                fmax=config['preprocessing']['fmax'],
                latent_size=config['fit']['latent_size'],
                mixture_size=config['fit']['mixture_size'])
    #pretrained_dict = torch.load(config['IO_OPTION']['PREMODEL_PATH'])
    #net = filtered_load_model(net, pretrained_dict)
    optimizer = optim.Adam(net.parameters(), lr=1e-4)
    num_epochs = config['fit']['num_epochs']
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
                                              max_lr=1e-3, epochs=num_epochs, steps_per_epoch=len(dataloaders_dict['train']))
    criterion = nn.MSELoss()
    try:
        history = modeler.train_net(net, dataloaders_dict, criterion, optimizer, scheduler, num_epochs, writer)
    except:
        return "error"
        
    # output
    model = history['model']
    model_out_path = MODEL_DIR+'/{}_model.pth'.format(machine_type)
    torch.save(model.state_dict(), model_out_path)
    logger.info('\n success:{0} \n'.format(machine_type) + \
                    'model_out_path ==> \n {0}'.format(model_out_path))
    #  close writer for tensorbord
    writer.close()
    #modeler.mlflow_log(history, config, machine_type, model_out_path, tb_log_dir)
    com.toc()
    return history

In [10]:
machine_types

['fan', 'pump', 'slider', 'ToyCar', 'ToyConveyor', 'valve']

In [None]:
for machine_type in machine_types:
    history = run(machine_type)
    #with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
    #    pickle.dump(history , file)

2020-11-27 16:50:59,429 - 00_train.py - INFO - TARGET MACHINE_TYPE: fan
2020-11-27 16:50:59,430 - 00_train.py - INFO - MAKE DATA_LOADER
2020-11-27 16:51:00,398 - 00_train.py - INFO - TRAINING


use: cuda:0


100%|██████████| 51/51 [00:13<00:00,  3.67it/s]
100%|██████████| 15/15 [00:02<00:00,  5.93it/s]
2020-11-27 16:51:18,836 - pytorch_modeler.py - INFO - Epoch 1/200:train_loss:111.473075, tr_rec:18.855685, tr_eng:137.567606, tr_covd:15772.126129, val_AUC:0.478345, val_pAUC:0.502063
100%|██████████| 51/51 [00:19<00:00,  2.58it/s]
100%|██████████| 15/15 [00:02<00:00,  5.86it/s]
2020-11-27 16:51:41,165 - pytorch_modeler.py - INFO - Epoch 2/200:train_loss:77.799684, tr_rec:18.789773, tr_eng:151.896920, tr_covd:8764.044067, val_AUC:0.502357, val_pAUC:0.492610
100%|██████████| 51/51 [00:20<00:00,  2.48it/s]
100%|██████████| 15/15 [00:02<00:00,  6.38it/s]
2020-11-27 16:52:04,082 - pytorch_modeler.py - INFO - Epoch 3/200:train_loss:49.485355, tr_rec:18.385191, tr_eng:173.335620, tr_covd:2753.320499, val_AUC:0.465807, val_pAUC:0.490368
100%|██████████| 51/51 [00:20<00:00,  2.55it/s]
100%|██████████| 15/15 [00:02<00:00,  5.96it/s]
2020-11-27 16:52:26,619 - pytorch_modeler.py - INFO - Epoch 4/200:tr

100%|██████████| 51/51 [00:20<00:00,  2.51it/s]
100%|██████████| 15/15 [00:02<00:00,  5.10it/s]
2020-11-27 17:13:49,256 - pytorch_modeler.py - INFO - Epoch 60/200:train_loss:-23.862339, tr_rec:0.822975, tr_eng:-319.332976, tr_covd:1449.596821, val_AUC:0.517436, val_pAUC:0.501227
100%|██████████| 51/51 [00:20<00:00,  2.47it/s]
100%|██████████| 15/15 [00:02<00:00,  5.27it/s]
2020-11-27 17:14:12,739 - pytorch_modeler.py - INFO - Epoch 61/200:train_loss:-34.877773, tr_rec:0.940487, tr_eng:-413.045643, tr_covd:1097.261166, val_AUC:0.484656, val_pAUC:0.499143
100%|██████████| 51/51 [00:19<00:00,  2.57it/s]
100%|██████████| 15/15 [00:02<00:00,  5.18it/s]
2020-11-27 17:14:35,515 - pytorch_modeler.py - INFO - Epoch 62/200:train_loss:-34.665924, tr_rec:1.118340, tr_eng:-412.418283, tr_covd:1091.512869, val_AUC:0.611361, val_pAUC:0.512895
100%|██████████| 51/51 [00:19<00:00,  2.58it/s]
100%|██████████| 15/15 [00:02<00:00,  5.36it/s]
2020-11-27 17:14:58,112 - pytorch_modeler.py - INFO - Epoch 63/2

100%|██████████| 51/51 [00:20<00:00,  2.51it/s]
100%|██████████| 15/15 [00:02<00:00,  5.07it/s]
2020-11-27 17:36:24,752 - pytorch_modeler.py - INFO - Epoch 119/200:train_loss:-42.292326, tr_rec:0.210979, tr_eng:-532.963944, tr_covd:2158.618200, val_AUC:0.364384, val_pAUC:0.495656
100%|██████████| 51/51 [00:19<00:00,  2.57it/s]
100%|██████████| 15/15 [00:02<00:00,  5.67it/s]
2020-11-27 17:36:47,283 - pytorch_modeler.py - INFO - Epoch 120/200:train_loss:-40.904721, tr_rec:0.217068, tr_eng:-514.436360, tr_covd:2064.369610, val_AUC:0.624751, val_pAUC:0.519742
100%|██████████| 51/51 [00:19<00:00,  2.55it/s]
100%|██████████| 15/15 [00:02<00:00,  5.60it/s]
2020-11-27 17:37:09,950 - pytorch_modeler.py - INFO - Epoch 121/200:train_loss:-39.740090, tr_rec:0.215473, tr_eng:-503.383254, tr_covd:2076.552460, val_AUC:0.351353, val_pAUC:0.494546
100%|██████████| 51/51 [00:19<00:00,  2.56it/s]
100%|██████████| 15/15 [00:02<00:00,  5.28it/s]
2020-11-27 17:37:32,731 - pytorch_modeler.py - INFO - Epoch 1

100%|██████████| 51/51 [00:20<00:00,  2.46it/s]
100%|██████████| 15/15 [00:02<00:00,  5.41it/s]
2020-11-27 17:58:54,733 - pytorch_modeler.py - INFO - Epoch 177/200:train_loss:-47.439139, tr_rec:0.218099, tr_eng:-567.772777, tr_covd:1824.008081, val_AUC:0.441193, val_pAUC:0.492533
100%|██████████| 51/51 [00:20<00:00,  2.49it/s]
100%|██████████| 15/15 [00:02<00:00,  5.29it/s]
2020-11-27 17:59:18,066 - pytorch_modeler.py - INFO - Epoch 178/200:train_loss:-47.898696, tr_rec:0.220853, tr_eng:-571.357450, tr_covd:1803.239584, val_AUC:0.417620, val_pAUC:0.499631
100%|██████████| 51/51 [00:20<00:00,  2.49it/s]
100%|██████████| 15/15 [00:03<00:00,  4.96it/s]
2020-11-27 17:59:41,577 - pytorch_modeler.py - INFO - Epoch 179/200:train_loss:-47.698669, tr_rec:0.216748, tr_eng:-571.799396, tr_covd:1852.904982, val_AUC:0.546012, val_pAUC:0.505549
100%|██████████| 51/51 [00:20<00:00,  2.48it/s]
100%|██████████| 15/15 [00:02<00:00,  5.05it/s]
2020-11-27 18:00:05,153 - pytorch_modeler.py - INFO - Epoch 1

elapsed time: 4607.384708881 [sec]


  0%|          | 0/46 [00:00<?, ?it/s]

use: cuda:0


100%|██████████| 46/46 [00:17<00:00,  2.64it/s]
100%|██████████| 7/7 [00:01<00:00,  4.83it/s]
2020-11-27 18:08:06,379 - pytorch_modeler.py - INFO - Epoch 1/200:train_loss:94.235397, tr_rec:17.055945, tr_eng:119.701033, tr_covd:13041.870010, val_AUC:0.500000, val_pAUC:0.500000
100%|██████████| 46/46 [00:18<00:00,  2.44it/s]
100%|██████████| 7/7 [00:01<00:00,  4.27it/s]
2020-11-27 18:08:26,866 - pytorch_modeler.py - INFO - Epoch 2/200:train_loss:69.199248, tr_rec:17.009913, tr_eng:133.372438, tr_covd:7770.418480, val_AUC:0.494792, val_pAUC:0.499720
100%|██████████| 46/46 [00:18<00:00,  2.43it/s]
100%|██████████| 7/7 [00:01<00:00,  5.06it/s]
2020-11-27 18:08:47,241 - pytorch_modeler.py - INFO - Epoch 3/200:train_loss:44.220895, tr_rec:16.688364, tr_eng:154.753106, tr_covd:2411.443966, val_AUC:0.506858, val_pAUC:0.500368
100%|██████████| 46/46 [00:17<00:00,  2.66it/s]
100%|██████████| 7/7 [00:01<00:00,  4.69it/s]
2020-11-27 18:09:06,056 - pytorch_modeler.py - INFO - Epoch 4/200:train_loss:

100%|██████████| 46/46 [00:17<00:00,  2.59it/s]
100%|██████████| 7/7 [00:01<00:00,  4.96it/s]
2020-11-27 18:17:52,042 - pytorch_modeler.py - INFO - Epoch 31/200:train_loss:-23.007275, tr_rec:1.527794, tr_eng:-298.017673, tr_covd:1053.339821, val_AUC:0.587477, val_pAUC:0.505309
100%|██████████| 46/46 [00:17<00:00,  2.56it/s]
100%|██████████| 7/7 [00:01<00:00,  6.12it/s]
2020-11-27 18:18:11,140 - pytorch_modeler.py - INFO - Epoch 32/200:train_loss:-22.965980, tr_rec:1.462399, tr_eng:-290.992492, tr_covd:934.174129, val_AUC:0.512135, val_pAUC:0.501701
100%|██████████| 46/46 [00:18<00:00,  2.44it/s]
100%|██████████| 7/7 [00:01<00:00,  4.85it/s]
2020-11-27 18:18:31,417 - pytorch_modeler.py - INFO - Epoch 33/200:train_loss:-19.886505, tr_rec:1.404028, tr_eng:-263.198155, tr_covd:1005.856833, val_AUC:0.626331, val_pAUC:0.534860
100%|██████████| 46/46 [00:18<00:00,  2.48it/s]
100%|██████████| 7/7 [00:01<00:00,  4.67it/s]
2020-11-27 18:18:51,492 - pytorch_modeler.py - INFO - Epoch 34/200:train_

100%|██████████| 46/46 [00:17<00:00,  2.65it/s]
100%|██████████| 7/7 [00:01<00:00,  5.03it/s]
2020-11-27 18:27:30,729 - pytorch_modeler.py - INFO - Epoch 61/200:train_loss:-19.682303, tr_rec:1.177721, tr_eng:-261.001522, tr_covd:1048.025793, val_AUC:0.590726, val_pAUC:0.506319
100%|██████████| 46/46 [00:17<00:00,  2.61it/s]
100%|██████████| 7/7 [00:01<00:00,  5.20it/s]
2020-11-27 18:27:49,749 - pytorch_modeler.py - INFO - Epoch 62/200:train_loss:-26.680132, tr_rec:1.068526, tr_eng:-325.913878, tr_covd:968.546055, val_AUC:0.418559, val_pAUC:0.495697
100%|██████████| 46/46 [00:17<00:00,  2.63it/s]
100%|██████████| 7/7 [00:01<00:00,  5.21it/s]
2020-11-27 18:28:08,577 - pytorch_modeler.py - INFO - Epoch 63/200:train_loss:-26.069782, tr_rec:1.004027, tr_eng:-320.927378, tr_covd:1003.785984, val_AUC:0.541383, val_pAUC:0.496690
100%|██████████| 46/46 [00:17<00:00,  2.57it/s]
100%|██████████| 7/7 [00:01<00:00,  5.33it/s]
2020-11-27 18:28:27,779 - pytorch_modeler.py - INFO - Epoch 64/200:train_

100%|██████████| 46/46 [00:17<00:00,  2.59it/s]
100%|██████████| 7/7 [00:01<00:00,  5.22it/s]
2020-11-27 18:37:04,066 - pytorch_modeler.py - INFO - Epoch 91/200:train_loss:-33.153917, tr_rec:0.537409, tr_eng:-439.607803, tr_covd:2053.890938, val_AUC:0.501418, val_pAUC:0.500076
100%|██████████| 46/46 [00:17<00:00,  2.67it/s]
100%|██████████| 7/7 [00:01<00:00,  5.43it/s]
2020-11-27 18:37:22,574 - pytorch_modeler.py - INFO - Epoch 92/200:train_loss:-33.744090, tr_rec:0.522153, tr_eng:-437.409213, tr_covd:1894.935995, val_AUC:0.418218, val_pAUC:0.495645
100%|██████████| 46/46 [00:19<00:00,  2.35it/s]
100%|██████████| 7/7 [00:01<00:00,  5.04it/s]
2020-11-27 18:37:43,543 - pytorch_modeler.py - INFO - Epoch 93/200:train_loss:-35.416676, tr_rec:0.531628, tr_eng:-449.402660, tr_covd:1798.392620, val_AUC:0.500000, val_pAUC:0.500000
100%|██████████| 46/46 [00:18<00:00,  2.54it/s]
100%|██████████| 7/7 [00:01<00:00,  5.25it/s]
2020-11-27 18:38:03,023 - pytorch_modeler.py - INFO - Epoch 94/200:train

history = run('ToyCar')
with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
    pickle.dump(history , file)