# train

In [1]:
import yaml
import os

with open("./config.yaml", 'rb') as f:
    config = yaml.load(f)

  """


In [2]:
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
os.makedirs(OUTPUT_ROOT, exist_ok=True)

## load library

In [3]:
# python default library
import os
import shutil
import datetime
import sys
import pickle

# general analysis tool-kit
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt

# pytorch
import torch
from torch import optim, nn
from torch.utils.tensorboard import SummaryWriter

from torchsummary import summary

# etc
import yaml
yaml.warnings({'YAMLLoadWarning': False})
import mlflow
from collections import defaultdict
from scipy.stats import zscore

# original library

import common as com
import pytorch_modeler as modeler
from pytorch_model import DAGMM as Model
from pytorch_utils import filtered_load_model
import models

import librosa
import IPython
import librosa.display

## load config and set logger

In [4]:
#with open("./config.yaml", 'rb') as f:
#    config = yaml.load(f)

log_folder = config['IO_OPTION']['OUTPUT_ROOT']+'/{0}.log'.format(datetime.date.today())
logger = com.setup_logger(log_folder, '00_train.py')

  and should_run_async(code)


## Setting

In [5]:
# Setting seed
modeler.set_seed(42)

In [6]:
############################################################################
# Setting I/O path
############################################################################
# input dirs
INPUT_ROOT = config['IO_OPTION']['INPUT_ROOT']
dev_path = INPUT_ROOT + "/dev_data"
add_dev_path = INPUT_ROOT + "/add_dev_data"
# machine type
MACHINE_TYPE = config['IO_OPTION']['MACHINE_TYPE']
machine_types = os.listdir(dev_path)
# output dirs
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
MODEL_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/models'
TB_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/tb'
PKL_DIR = OUTPUT_ROOT + '/pkl'
#os.makedirs(OUTPUT_ROOT, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(TB_DIR, exist_ok=True)
os.makedirs(PKL_DIR, exist_ok=True)
# copy config
shutil.copy('./config.yaml', OUTPUT_ROOT)

'/media/hiroki/working/research/dcase2020/result/2D/DAGMM/meta_dense/latent5_mixture3_revised_euclid_no2/config.yaml'

## make path list and train/valid split

In [7]:
############################################################################
# make path set and train/valid split
############################################################################
'''
train_paths[machine_type]['train' or 'valid'] = path
'''
dev_train_paths = {}
add_train_paths = {}
train_paths = {}

In [8]:
for machine_type in machine_types:
    # dev train
    dev_train_paths = ["{}/{}/train/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(dev_path, machine_type))]
    dev_train_paths = sorted(dev_train_paths)
    # add_dev train
    add_train_paths = ["{}/{}/train/".format(add_dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(add_dev_path, machine_type))]
    add_train_paths = sorted(add_train_paths)
    # valid
    dev_valid_paths = ["{}/{}/test/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/test".format(dev_path, machine_type))]
    dev_valid_paths = sorted(dev_valid_paths)
    
    train_paths[machine_type] = {}
    train_paths[machine_type]['train'] = dev_train_paths + add_train_paths
    train_paths[machine_type]['valid'] = dev_valid_paths

## training

In [9]:
#############################################################################
# run
#############################################################################
def run(machine_type):
    com.tic()
    logger.info('TARGET MACHINE_TYPE: {0}'.format(machine_type))
    logger.info('MAKE DATA_LOADER')
    # dev_train_paths
    dataloaders_dict = modeler.make_dataloader(train_paths, machine_type)
    # define writer for tensorbord
    os.makedirs(TB_DIR+'/'+machine_type, exist_ok=True)         # debug
    tb_log_dir = TB_DIR + '/' + machine_type
    writer = SummaryWriter(log_dir = tb_log_dir)
    logger.info('TRAINING')
    # parameter setting
    net = Model(sample_rate=config['preprocessing']['sample_rate'],
                window_size=config['preprocessing']['window_size'],
                hop_size=config['preprocessing']['hop_size'],
                mel_bins=config['preprocessing']['mel_bins'],
                fmin=config['preprocessing']['fmin'],
                fmax=config['preprocessing']['fmax'],
                latent_size=config['fit']['latent_size'],
                mixture_size=config['fit']['mixture_size'])
    #pretrained_dict = torch.load(config['IO_OPTION']['PREMODEL_PATH'])
    #net = filtered_load_model(net, pretrained_dict)
    optimizer = optim.Adam(net.parameters(), lr=1e-3)
    num_epochs = config['fit']['num_epochs']
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e2, 
                                              max_lr=1e-3, epochs=num_epochs, steps_per_epoch=len(dataloaders_dict['train']))
    criterion = nn.MSELoss()
    #try:
    history = modeler.train_net(net, dataloaders_dict, criterion, optimizer, scheduler, num_epochs, writer)
    # output
    model = history['model']
    gmm_param = history['gmm_param']
    model_out_path = MODEL_DIR+'/{}_model.pth'.format(machine_type)
    gmm_param_path = MODEL_DIR+'/{}_gmm_param.pkl'.format(machine_type)
    torch.save(model.state_dict(), model_out_path)
    logger.info('\n success:{0} \n'.format(machine_type) + \
                    'model_out_path ==> \n {0}'.format(model_out_path))
    pd.to_pickle(gmm_param, gmm_param_path)
    #  close writer for tensorbord
    writer.close()
    #modeler.mlflow_log(history, config, machine_type, model_out_path, tb_log_dir)
    com.toc()
    #except:
    #    print("error")
    #    history = "error"
    return history

In [10]:
machine_types

['fan', 'pump', 'slider', 'ToyCar', 'ToyConveyor', 'valve']

In [None]:
for machine_type in machine_types:
    history = run(machine_type)
    #with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
    #    pickle.dump(history , file)

2020-11-30 22:47:50,240 - 00_train.py - INFO - TARGET MACHINE_TYPE: fan
2020-11-30 22:47:50,241 - 00_train.py - INFO - MAKE DATA_LOADER
2020-11-30 22:47:51,065 - 00_train.py - INFO - TRAINING


use: cuda:0


100%|██████████| 51/51 [00:24<00:00,  2.08it/s]
100%|██████████| 15/15 [00:02<00:00,  6.08it/s]
2020-11-30 22:48:20,493 - pytorch_modeler.py - INFO - Epoch 1/300:train_loss:48331.083984, tr_rec:48285.225403, tr_eng:409.879065, tr_covd:974.148632, val_AUC:0.541989, val_pAUC:0.502851
100%|██████████| 51/51 [00:23<00:00,  2.20it/s]
100%|██████████| 15/15 [00:02<00:00,  6.28it/s]
2020-11-30 22:48:46,044 - pytorch_modeler.py - INFO - Epoch 2/300:train_loss:47927.822693, tr_rec:47885.704590, tr_eng:372.163644, tr_covd:980.369707, val_AUC:0.493803, val_pAUC:0.506124
100%|██████████| 51/51 [00:23<00:00,  2.14it/s]
100%|██████████| 15/15 [00:02<00:00,  5.63it/s]
2020-11-30 22:49:12,584 - pytorch_modeler.py - INFO - Epoch 3/300:train_loss:47568.479492, tr_rec:47530.629211, tr_eng:330.238149, tr_covd:965.280481, val_AUC:0.477022, val_pAUC:0.503939
100%|██████████| 51/51 [00:22<00:00,  2.25it/s]
100%|██████████| 15/15 [00:02<00:00,  6.30it/s]
2020-11-30 22:49:37,649 - pytorch_modeler.py - INFO - E

100%|██████████| 51/51 [00:22<00:00,  2.32it/s]
100%|██████████| 15/15 [00:02<00:00,  5.76it/s]
2020-11-30 23:00:47,229 - pytorch_modeler.py - INFO - Epoch 30/300:train_loss:391.235312, tr_rec:356.625027, tr_eng:328.668223, tr_covd:348.692388, val_AUC:0.513157, val_pAUC:0.518208
100%|██████████| 51/51 [00:23<00:00,  2.17it/s]
100%|██████████| 15/15 [00:02<00:00,  5.80it/s]
2020-11-30 23:01:13,367 - pytorch_modeler.py - INFO - Epoch 31/300:train_loss:371.765096, tr_rec:337.862457, tr_eng:323.686085, tr_covd:306.806148, val_AUC:0.503179, val_pAUC:0.506734
100%|██████████| 51/51 [00:22<00:00,  2.31it/s]
100%|██████████| 15/15 [00:02<00:00,  6.28it/s]
2020-11-30 23:01:37,853 - pytorch_modeler.py - INFO - Epoch 32/300:train_loss:361.839402, tr_rec:327.999791, tr_eng:324.362128, tr_covd:280.680136, val_AUC:0.516844, val_pAUC:0.515226
100%|██████████| 51/51 [00:23<00:00,  2.17it/s]
100%|██████████| 15/15 [00:02<00:00,  6.00it/s]
2020-11-30 23:02:03,822 - pytorch_modeler.py - INFO - Epoch 33/3

100%|██████████| 15/15 [00:02<00:00,  6.06it/s]
2020-11-30 23:13:03,356 - pytorch_modeler.py - INFO - Epoch 59/300:train_loss:305.575663, tr_rec:280.860437, tr_eng:241.458203, tr_covd:113.881279, val_AUC:0.512638, val_pAUC:0.511681
100%|██████████| 51/51 [00:23<00:00,  2.14it/s]
100%|██████████| 15/15 [00:02<00:00,  5.77it/s]
2020-11-30 23:13:29,773 - pytorch_modeler.py - INFO - Epoch 60/300:train_loss:309.130789, tr_rec:281.248998, tr_eng:273.139749, tr_covd:113.563418, val_AUC:0.541363, val_pAUC:0.514293
100%|██████████| 51/51 [00:22<00:00,  2.30it/s]
100%|██████████| 15/15 [00:02<00:00,  5.82it/s]
2020-11-30 23:13:54,556 - pytorch_modeler.py - INFO - Epoch 61/300:train_loss:307.460260, tr_rec:278.131227, tr_eng:287.650434, tr_covd:112.797658, val_AUC:0.540094, val_pAUC:0.519591
100%|██████████| 51/51 [00:23<00:00,  2.15it/s]
100%|██████████| 15/15 [00:02<00:00,  6.02it/s]
2020-11-30 23:14:20,761 - pytorch_modeler.py - INFO - Epoch 62/300:train_loss:304.689825, tr_rec:276.695837, tr_

100%|██████████| 51/51 [00:23<00:00,  2.15it/s]
100%|██████████| 15/15 [00:02<00:00,  6.07it/s]
2020-11-30 23:25:48,211 - pytorch_modeler.py - INFO - Epoch 89/300:train_loss:279.051301, tr_rec:271.242241, tr_eng:73.830609, tr_covd:85.199879, val_AUC:0.549608, val_pAUC:0.501848
100%|██████████| 51/51 [00:22<00:00,  2.24it/s]
100%|██████████| 15/15 [00:02<00:00,  6.46it/s]
2020-11-30 23:26:13,260 - pytorch_modeler.py - INFO - Epoch 90/300:train_loss:267.234130, tr_rec:271.263478, tr_eng:-44.452740, tr_covd:83.185099, val_AUC:0.494726, val_pAUC:0.500136
100%|██████████| 51/51 [00:23<00:00,  2.14it/s]
100%|██████████| 15/15 [00:02<00:00,  6.01it/s]
2020-11-30 23:26:39,557 - pytorch_modeler.py - INFO - Epoch 91/300:train_loss:269.028625, tr_rec:267.061914, tr_eng:15.461268, tr_covd:84.117305, val_AUC:0.522848, val_pAUC:0.506527
100%|██████████| 51/51 [00:22<00:00,  2.28it/s]
100%|██████████| 15/15 [00:02<00:00,  6.11it/s]
2020-11-30 23:27:04,346 - pytorch_modeler.py - INFO - Epoch 92/300:tr

100%|██████████| 15/15 [00:02<00:00,  5.59it/s]
2020-11-30 23:38:28,374 - pytorch_modeler.py - INFO - Epoch 118/300:train_loss:245.749051, tr_rec:264.538545, tr_eng:-192.215759, tr_covd:86.416687, val_AUC:0.483303, val_pAUC:0.499658
100%|██████████| 51/51 [00:23<00:00,  2.15it/s]
100%|██████████| 15/15 [00:01<00:00,  7.74it/s]
2020-11-30 23:38:53,993 - pytorch_modeler.py - INFO - Epoch 119/300:train_loss:173.558890, tr_rec:269.600121, tr_eng:-968.270863, tr_covd:157.170767, val_AUC:0.514800, val_pAUC:0.498481
100%|██████████| 51/51 [00:24<00:00,  2.11it/s]
100%|██████████| 15/15 [00:02<00:00,  5.73it/s]
2020-11-30 23:39:20,774 - pytorch_modeler.py - INFO - Epoch 120/300:train_loss:245.243191, tr_rec:264.603825, tr_eng:-197.987487, tr_covd:87.622646, val_AUC:0.421064, val_pAUC:0.494720
100%|██████████| 51/51 [00:23<00:00,  2.18it/s]
100%|██████████| 15/15 [00:02<00:00,  6.09it/s]
2020-11-30 23:39:46,640 - pytorch_modeler.py - INFO - Epoch 121/300:train_loss:234.658771, tr_rec:262.719180

100%|██████████| 15/15 [00:02<00:00,  5.31it/s]
2020-11-30 23:51:19,043 - pytorch_modeler.py - INFO - Epoch 147/300:train_loss:232.597835, tr_rec:261.803925, tr_eng:-296.286827, tr_covd:84.518717, val_AUC:0.541970, val_pAUC:0.511437
100%|██████████| 51/51 [00:25<00:00,  2.00it/s]
100%|██████████| 15/15 [00:02<00:00,  6.30it/s]
2020-11-30 23:51:46,960 - pytorch_modeler.py - INFO - Epoch 148/300:train_loss:253.488516, tr_rec:261.850718, tr_eng:-88.094663, tr_covd:89.453040, val_AUC:0.512502, val_pAUC:0.509069
100%|██████████| 51/51 [00:25<00:00,  2.04it/s]
100%|██████████| 15/15 [00:02<00:00,  5.20it/s]
2020-11-30 23:52:14,915 - pytorch_modeler.py - INFO - Epoch 149/300:train_loss:257.797645, tr_rec:260.742393, tr_eng:-33.703920, tr_covd:85.128446, val_AUC:0.518746, val_pAUC:0.506602
100%|██████████| 51/51 [00:25<00:00,  1.99it/s]
100%|██████████| 15/15 [00:02<00:00,  6.16it/s]
2020-11-30 23:52:43,045 - pytorch_modeler.py - INFO - Epoch 150/300:train_loss:266.476957, tr_rec:259.424194, t

100%|██████████| 15/15 [00:02<00:00,  5.68it/s]
2020-12-01 00:04:05,875 - pytorch_modeler.py - INFO - Epoch 176/300:train_loss:240.829711, tr_rec:254.580721, tr_eng:-141.590633, tr_covd:81.611190, val_AUC:0.519892, val_pAUC:0.503840
100%|██████████| 51/51 [00:23<00:00,  2.13it/s]
100%|██████████| 15/15 [00:02<00:00,  6.22it/s]
2020-12-01 00:04:32,249 - pytorch_modeler.py - INFO - Epoch 177/300:train_loss:236.933812, tr_rec:254.448402, tr_eng:-179.358906, tr_covd:84.260093, val_AUC:0.501645, val_pAUC:0.496605
100%|██████████| 51/51 [00:23<00:00,  2.15it/s]
100%|██████████| 15/15 [00:02<00:00,  5.47it/s]
2020-12-01 00:04:58,694 - pytorch_modeler.py - INFO - Epoch 178/300:train_loss:230.764255, tr_rec:254.656656, tr_eng:-242.996687, tr_covd:81.453277, val_AUC:0.492877, val_pAUC:0.498162
100%|██████████| 51/51 [00:24<00:00,  2.12it/s]
100%|██████████| 15/15 [00:02<00:00,  7.49it/s]
2020-12-01 00:05:24,742 - pytorch_modeler.py - INFO - Epoch 179/300:train_loss:226.455592, tr_rec:255.193788,

100%|██████████| 51/51 [00:31<00:00,  1.62it/s]
100%|██████████| 15/15 [00:03<00:00,  4.59it/s]
2020-12-01 00:18:57,100 - pytorch_modeler.py - INFO - Epoch 206/300:train_loss:207.468723, tr_rec:250.599124, tr_eng:-435.999442, tr_covd:93.908819, val_AUC:0.517082, val_pAUC:0.497698
100%|██████████| 51/51 [00:30<00:00,  1.66it/s]
100%|██████████| 15/15 [00:03<00:00,  4.22it/s]
2020-12-01 00:19:31,314 - pytorch_modeler.py - INFO - Epoch 207/300:train_loss:212.909106, tr_rec:250.180286, tr_eng:-377.516895, tr_covd:96.101715, val_AUC:0.498097, val_pAUC:0.501313
100%|██████████| 51/51 [00:31<00:00,  1.64it/s]
100%|██████████| 15/15 [00:03<00:00,  4.79it/s]
2020-12-01 00:20:05,488 - pytorch_modeler.py - INFO - Epoch 208/300:train_loss:231.808835, tr_rec:249.757235, tr_eng:-183.605268, tr_covd:82.425475, val_AUC:0.497894, val_pAUC:0.495001
100%|██████████| 51/51 [00:30<00:00,  1.68it/s]
100%|██████████| 15/15 [00:03<00:00,  4.67it/s]
2020-12-01 00:20:39,123 - pytorch_modeler.py - INFO - Epoch 2

 92%|█████████▏| 47/51 [00:16<00:01,  2.59it/s]

history = run('ToyCar')
with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
    pickle.dump(history , file)