# train

In [1]:
import yaml
import os

with open("./config.yaml", 'rb') as f:
    config = yaml.load(f)

In [2]:
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
os.makedirs(OUTPUT_ROOT, exist_ok=True)

## load library

In [3]:
# python default library
import os
import shutil
import datetime
import sys
import pickle

# general analysis tool-kit
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt

# pytorch
import torch
from torch import optim, nn
from torch.utils.tensorboard import SummaryWriter

from torchsummary import summary

# etc
import yaml
yaml.warnings({'YAMLLoadWarning': False})
import mlflow
from collections import defaultdict
from scipy.stats import zscore

# original library

import common as com
import pytorch_modeler as modeler
from pytorch_model import DAGMM as Model
from pytorch_utils import filtered_load_model
import models

import librosa
import IPython
import librosa.display

## load config and set logger

In [4]:
#with open("./config.yaml", 'rb') as f:
#    config = yaml.load(f)

log_folder = config['IO_OPTION']['OUTPUT_ROOT']+'/{0}.log'.format(datetime.date.today())
logger = com.setup_logger(log_folder, '00_train.py')

  and should_run_async(code)


## Setting

In [5]:
# Setting seed
modeler.set_seed(42)

In [6]:
############################################################################
# Setting I/O path
############################################################################
# input dirs
INPUT_ROOT = config['IO_OPTION']['INPUT_ROOT']
dev_path = INPUT_ROOT + "/dev_data"
add_dev_path = INPUT_ROOT + "/add_dev_data"
# machine type
MACHINE_TYPE = config['IO_OPTION']['MACHINE_TYPE']
machine_types = os.listdir(dev_path)
# output dirs
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
MODEL_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/models'
TB_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/tb'
PKL_DIR = OUTPUT_ROOT + '/pkl'
#os.makedirs(OUTPUT_ROOT, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(TB_DIR, exist_ok=True)
os.makedirs(PKL_DIR, exist_ok=True)
# copy config
shutil.copy('./config.yaml', OUTPUT_ROOT)

'/media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver4/config.yaml'

## make path list and train/valid split

In [7]:
############################################################################
# make path set and train/valid split
############################################################################
'''
train_paths[machine_type]['train' or 'valid'] = path
'''
dev_train_paths = {}
add_train_paths = {}
train_paths = {}

In [8]:
for machine_type in machine_types:
    # dev train
    dev_train_paths = ["{}/{}/train/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(dev_path, machine_type))]
    dev_train_paths = sorted(dev_train_paths)
    # add_dev train
    add_train_paths = ["{}/{}/train/".format(add_dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(add_dev_path, machine_type))]
    add_train_paths = sorted(add_train_paths)
    # valid
    dev_valid_paths = ["{}/{}/test/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/test".format(dev_path, machine_type))]
    dev_valid_paths = sorted(dev_valid_paths)
    
    train_paths[machine_type] = {}
    train_paths[machine_type]['train'] = dev_train_paths + add_train_paths
    train_paths[machine_type]['valid'] = dev_valid_paths

## training

In [9]:
#############################################################################
# run
#############################################################################
def run(machine_type):
    com.tic()
    logger.info('TARGET MACHINE_TYPE: {0}'.format(machine_type))
    logger.info('MAKE DATA_LOADER')
    # dev_train_paths
    dataloaders_dict = modeler.make_dataloader(train_paths, machine_type)
    # define writer for tensorbord
    os.makedirs(TB_DIR+'/'+machine_type, exist_ok=True)         # debug
    tb_log_dir = TB_DIR + '/' + machine_type
    writer = SummaryWriter(log_dir = tb_log_dir)
    logger.info('TRAINING')
    # parameter setting
    net = Model(sample_rate=config['preprocessing']['sample_rate'],
                window_size=config['preprocessing']['window_size'],
                hop_size=config['preprocessing']['hop_size'],
                mel_bins=config['preprocessing']['mel_bins'],
                fmin=config['preprocessing']['fmin'],
                fmax=config['preprocessing']['fmax'],
                latent_size=config['fit']['latent_size'],
                mixture_size=config['fit']['mixture_size'])
    #pretrained_dict = torch.load(config['IO_OPTION']['PREMODEL_PATH'])
    #net = filtered_load_model(net, pretrained_dict)
    optimizer = optim.Adam(net.parameters(), lr=1e-3)
    num_epochs = config['fit']['num_epochs']
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e2, 
                                              max_lr=1e-3, epochs=num_epochs, steps_per_epoch=len(dataloaders_dict['train']))
    criterion = nn.MSELoss()
    #try:
    history = modeler.train_net(net, dataloaders_dict, criterion, optimizer, scheduler, num_epochs, writer)
    # output
    model = history['model']
    gmm_param = history['gmm_param']
    model_out_path = MODEL_DIR+'/{}_model.pth'.format(machine_type)
    gmm_param_path = MODEL_DIR+'/{}_gmm_param.pkl'.format(machine_type)
    torch.save(model.state_dict(), model_out_path)
    logger.info('\n success:{0} \n'.format(machine_type) + \
                    'model_out_path ==> \n {0}'.format(model_out_path))
    pd.to_pickle(gmm_param, gmm_param_path)
    #  close writer for tensorbord
    writer.close()
    #modeler.mlflow_log(history, config, machine_type, model_out_path, tb_log_dir)
    com.toc()
    #except:
    #    print("error")
    #    history = "error"
    return history

In [10]:
machine_types

['fan', 'pump', 'slider', 'ToyCar', 'ToyConveyor', 'valve']

In [11]:
for machine_type in machine_types:
    history = run(machine_type)
    #with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
    #    pickle.dump(history , file)

2020-12-01 16:14:13,267 - 00_train.py - INFO - TARGET MACHINE_TYPE: fan
2020-12-01 16:14:13,268 - 00_train.py - INFO - MAKE DATA_LOADER
2020-12-01 16:14:14,167 - 00_train.py - INFO - TRAINING


use: cuda:0


100%|██████████| 51/51 [00:53<00:00,  1.05s/it]
100%|██████████| 15/15 [00:03<00:00,  4.36it/s]
2020-12-01 16:15:12,518 - pytorch_modeler.py - INFO - Epoch 1/300:train_loss:47927.609192, tr_rec:47924.319763, tr_eng:2.958569, tr_covd:29934.387573, val_AUC:0.520980, val_pAUC:0.512351
100%|██████████| 51/51 [00:53<00:00,  1.05s/it]
100%|██████████| 15/15 [00:03<00:00,  4.45it/s]
2020-12-01 16:16:09,324 - pytorch_modeler.py - INFO - Epoch 2/300:train_loss:47592.411865, tr_rec:47591.816589, tr_eng:-18.192228, tr_covd:24146.577850, val_AUC:0.452042, val_pAUC:0.497252
100%|██████████| 51/51 [00:53<00:00,  1.05s/it]
100%|██████████| 15/15 [00:03<00:00,  4.45it/s]
2020-12-01 16:17:06,505 - pytorch_modeler.py - INFO - Epoch 3/300:train_loss:47224.187561, tr_rec:47227.175354, tr_eng:-48.612312, tr_covd:18734.677673, val_AUC:0.453156, val_pAUC:0.500966
100%|██████████| 51/51 [00:52<00:00,  1.03s/it]
100%|██████████| 15/15 [00:03<00:00,  4.48it/s]
2020-12-01 16:18:02,480 - pytorch_modeler.py - INFO

100%|██████████| 51/51 [00:52<00:00,  1.04s/it]
100%|██████████| 15/15 [00:03<00:00,  4.33it/s]
2020-12-01 16:42:37,515 - pytorch_modeler.py - INFO - Epoch 30/300:train_loss:487.117868, tr_rec:493.460747, tr_eng:-77.179567, tr_covd:13750.816360, val_AUC:0.514095, val_pAUC:0.502898
100%|██████████| 51/51 [00:51<00:00,  1.00s/it]
100%|██████████| 15/15 [00:03<00:00,  4.36it/s]
2020-12-01 16:43:32,089 - pytorch_modeler.py - INFO - Epoch 31/300:train_loss:470.709682, tr_rec:476.539901, tr_eng:-71.538818, tr_covd:13236.624863, val_AUC:0.516605, val_pAUC:0.507634
100%|██████████| 51/51 [00:52<00:00,  1.03s/it]
100%|██████████| 15/15 [00:02<00:00,  5.41it/s]
2020-12-01 16:44:27,308 - pytorch_modeler.py - INFO - Epoch 32/300:train_loss:452.932671, tr_rec:459.167435, tr_eng:-75.644197, tr_covd:13296.610580, val_AUC:0.476113, val_pAUC:0.510110
100%|██████████| 51/51 [00:53<00:00,  1.04s/it]
100%|██████████| 15/15 [00:03<00:00,  4.31it/s]
2020-12-01 16:45:23,962 - pytorch_modeler.py - INFO - Epoc

100%|██████████| 51/51 [00:52<00:00,  1.03s/it]
100%|██████████| 15/15 [00:03<00:00,  4.52it/s]
2020-12-01 17:09:30,718 - pytorch_modeler.py - INFO - Epoch 59/300:train_loss:359.306945, tr_rec:366.765992, tr_eng:-90.240778, tr_covd:15650.295410, val_AUC:0.478238, val_pAUC:0.496971
100%|██████████| 51/51 [00:52<00:00,  1.02s/it]
100%|██████████| 15/15 [00:02<00:00,  5.29it/s]
2020-12-01 17:10:25,780 - pytorch_modeler.py - INFO - Epoch 60/300:train_loss:357.020947, tr_rec:364.435560, tr_eng:-89.669683, tr_covd:15523.537415, val_AUC:0.459953, val_pAUC:0.498659
100%|██████████| 51/51 [00:52<00:00,  1.02s/it]
100%|██████████| 15/15 [00:03<00:00,  4.51it/s]
2020-12-01 17:11:21,247 - pytorch_modeler.py - INFO - Epoch 61/300:train_loss:357.374071, tr_rec:365.462419, tr_eng:-96.738479, tr_covd:15855.040070, val_AUC:0.455399, val_pAUC:0.502129
100%|██████████| 51/51 [00:52<00:00,  1.03s/it]
100%|██████████| 15/15 [00:03<00:00,  4.52it/s]
2020-12-01 17:12:16,951 - pytorch_modeler.py - INFO - Epoc

100%|██████████| 51/51 [00:52<00:00,  1.03s/it]
100%|██████████| 15/15 [00:02<00:00,  5.57it/s]
2020-12-01 17:36:19,394 - pytorch_modeler.py - INFO - Epoch 88/300:train_loss:336.155032, tr_rec:346.727362, tr_eng:-122.616876, tr_covd:16893.585938, val_AUC:0.462145, val_pAUC:0.498715
100%|██████████| 51/51 [00:54<00:00,  1.08s/it]
100%|██████████| 15/15 [00:03<00:00,  4.22it/s]
2020-12-01 17:37:17,867 - pytorch_modeler.py - INFO - Epoch 89/300:train_loss:333.814012, tr_rec:344.461517, tr_eng:-123.235670, tr_covd:16760.628693, val_AUC:0.460374, val_pAUC:0.500506
100%|██████████| 51/51 [00:52<00:00,  1.03s/it]
100%|██████████| 15/15 [00:03<00:00,  4.36it/s]
2020-12-01 17:38:13,675 - pytorch_modeler.py - INFO - Epoch 90/300:train_loss:333.141224, tr_rec:343.952915, tr_eng:-125.325462, tr_covd:17208.552094, val_AUC:0.463831, val_pAUC:0.500703
100%|██████████| 51/51 [00:52<00:00,  1.04s/it]
100%|██████████| 15/15 [00:03<00:00,  4.31it/s]
2020-12-01 17:39:10,055 - pytorch_modeler.py - INFO - E

100%|██████████| 51/51 [00:52<00:00,  1.03s/it]
100%|██████████| 15/15 [00:03<00:00,  4.52it/s]
2020-12-01 18:03:28,621 - pytorch_modeler.py - INFO - Epoch 117/300:train_loss:319.801093, tr_rec:333.801530, tr_eng:-158.231490, tr_covd:18227.095337, val_AUC:0.471341, val_pAUC:0.502748
100%|██████████| 51/51 [00:52<00:00,  1.03s/it]
100%|██████████| 15/15 [00:03<00:00,  4.53it/s]
2020-12-01 18:04:24,353 - pytorch_modeler.py - INFO - Epoch 118/300:train_loss:319.458510, tr_rec:334.008713, tr_eng:-163.790368, tr_covd:18288.342896, val_AUC:0.464145, val_pAUC:0.501200
100%|██████████| 51/51 [00:52<00:00,  1.03s/it]
100%|██████████| 15/15 [00:03<00:00,  4.52it/s]
2020-12-01 18:05:20,003 - pytorch_modeler.py - INFO - Epoch 119/300:train_loss:319.743431, tr_rec:334.365889, tr_eng:-164.479469, tr_covd:18254.882385, val_AUC:0.466624, val_pAUC:0.496915
100%|██████████| 51/51 [00:52<00:00,  1.03s/it]
100%|██████████| 15/15 [00:02<00:00,  5.84it/s]
2020-12-01 18:06:14,864 - pytorch_modeler.py - INFO 

100%|██████████| 51/51 [00:52<00:00,  1.03s/it]
100%|██████████| 15/15 [00:03<00:00,  4.52it/s]
2020-12-01 18:30:22,351 - pytorch_modeler.py - INFO - Epoch 146/300:train_loss:306.205706, tr_rec:325.506629, tr_eng:-213.978303, tr_covd:20969.072968, val_AUC:0.478930, val_pAUC:0.501280
100%|██████████| 51/51 [00:52<00:00,  1.02s/it]
100%|██████████| 15/15 [00:03<00:00,  4.40it/s]
2020-12-01 18:31:18,033 - pytorch_modeler.py - INFO - Epoch 147/300:train_loss:305.682620, tr_rec:324.816695, tr_eng:-212.514040, tr_covd:21173.303223, val_AUC:0.464537, val_pAUC:0.494992
100%|██████████| 51/51 [00:52<00:00,  1.02s/it]
100%|██████████| 15/15 [00:04<00:00,  3.25it/s]
2020-12-01 18:32:14,822 - pytorch_modeler.py - INFO - Epoch 148/300:train_loss:305.876886, tr_rec:324.969002, tr_eng:-211.637608, tr_covd:20716.436401, val_AUC:0.465034, val_pAUC:0.493815
100%|██████████| 51/51 [00:53<00:00,  1.04s/it]
100%|██████████| 15/15 [00:03<00:00,  4.39it/s]
2020-12-01 18:33:11,417 - pytorch_modeler.py - INFO 

KeyboardInterrupt: 

history = run('ToyCar')
with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
    pickle.dump(history , file)