# train

In [1]:
import yaml
import os

with open("./config.yaml", 'rb') as f:
    config = yaml.load(f)

  """


In [2]:
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
os.makedirs(OUTPUT_ROOT, exist_ok=True)

## load library

In [3]:
# python default library
import os
import shutil
import datetime
import sys
import pickle

# general analysis tool-kit
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt

# pytorch
import torch
from torch import optim, nn
from torch.utils.tensorboard import SummaryWriter

from torchsummary import summary

# etc
import yaml
yaml.warnings({'YAMLLoadWarning': False})
import mlflow
from collections import defaultdict
from scipy.stats import zscore

# original library

import common as com
import pytorch_modeler as modeler
from pytorch_model import DAGMM as Model
from pytorch_utils import filtered_load_model
import models

import librosa
import IPython
import librosa.display

## load config and set logger

In [4]:
#with open("./config.yaml", 'rb') as f:
#    config = yaml.load(f)

log_folder = config['IO_OPTION']['OUTPUT_ROOT']+'/{0}.log'.format(datetime.date.today())
logger = com.setup_logger(log_folder, '00_train.py')

  and should_run_async(code)


## Setting

In [5]:
# Setting seed
modeler.set_seed(42)

In [6]:
############################################################################
# Setting I/O path
############################################################################
# input dirs
INPUT_ROOT = config['IO_OPTION']['INPUT_ROOT']
dev_path = INPUT_ROOT + "/dev_data"
add_dev_path = INPUT_ROOT + "/add_dev_data"
# machine type
MACHINE_TYPE = config['IO_OPTION']['MACHINE_TYPE']
machine_types = os.listdir(dev_path)
# output dirs
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
MODEL_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/models'
TB_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/tb'
PKL_DIR = OUTPUT_ROOT + '/pkl'
#os.makedirs(OUTPUT_ROOT, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(TB_DIR, exist_ok=True)
os.makedirs(PKL_DIR, exist_ok=True)
# copy config
shutil.copy('./config.yaml', OUTPUT_ROOT)

'/media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent16_mixture3_batch_norm/config.yaml'

## make path list and train/valid split

In [7]:
############################################################################
# make path set and train/valid split
############################################################################
'''
train_paths[machine_type]['train' or 'valid'] = path
'''
dev_train_paths = {}
add_train_paths = {}
train_paths = {}

In [8]:
for machine_type in machine_types:
    # dev train
    dev_train_paths = ["{}/{}/train/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(dev_path, machine_type))]
    dev_train_paths = sorted(dev_train_paths)
    # add_dev train
    add_train_paths = ["{}/{}/train/".format(add_dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(add_dev_path, machine_type))]
    add_train_paths = sorted(add_train_paths)
    # valid
    dev_valid_paths = ["{}/{}/test/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/test".format(dev_path, machine_type))]
    dev_valid_paths = sorted(dev_valid_paths)
    
    train_paths[machine_type] = {}
    train_paths[machine_type]['train'] = dev_train_paths + add_train_paths
    train_paths[machine_type]['valid'] = dev_valid_paths

## training

In [9]:
#############################################################################
# run
#############################################################################
def run(machine_type):
    com.tic()
    logger.info('TARGET MACHINE_TYPE: {0}'.format(machine_type))
    logger.info('MAKE DATA_LOADER')
    # dev_train_paths
    dataloaders_dict = modeler.make_dataloader(train_paths, machine_type)
    # define writer for tensorbord
    os.makedirs(TB_DIR+'/'+machine_type, exist_ok=True)         # debug
    tb_log_dir = TB_DIR + '/' + machine_type
    writer = SummaryWriter(log_dir = tb_log_dir)
    logger.info('TRAINING')
    # parameter setting
    net = Model(sample_rate=config['preprocessing']['sample_rate'],
                window_size=config['preprocessing']['window_size'],
                hop_size=config['preprocessing']['hop_size'],
                mel_bins=config['preprocessing']['mel_bins'],
                fmin=config['preprocessing']['fmin'],
                fmax=config['preprocessing']['fmax'],
                latent_size=config['fit']['latent_size'],
                mixture_size=config['fit']['mixture_size'])
    #pretrained_dict = torch.load(config['IO_OPTION']['PREMODEL_PATH'])
    #net = filtered_load_model(net, pretrained_dict)
    optimizer = optim.Adam(net.parameters(), lr=1e-3)
    num_epochs = config['fit']['num_epochs']
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e2, 
                                              max_lr=1e-3, epochs=num_epochs, steps_per_epoch=len(dataloaders_dict['train']))
    criterion = nn.MSELoss()
    try:
        history = modeler.train_net(net, dataloaders_dict, criterion, optimizer, scheduler, num_epochs, writer)
        # output
        model = history['model']
        gmm_param = history['gmm_param']
        model_out_path = MODEL_DIR+'/{}_model.pth'.format(machine_type)
        gmm_param_path = MODEL_DIR+'/{}_gmm_param.pkl'.format(machine_type)
        torch.save(model.state_dict(), model_out_path)
        logger.info('\n success:{0} \n'.format(machine_type) + \
                        'model_out_path ==> \n {0}'.format(model_out_path))
        pd.to_pickle(gmm_param, gmm_param_path)
        #  close writer for tensorbord
        writer.close()
        #modeler.mlflow_log(history, config, machine_type, model_out_path, tb_log_dir)
        com.toc()
    except:
        print("error")
        history = "error"
    return history

In [10]:
machine_types

['fan', 'pump', 'slider', 'ToyCar', 'ToyConveyor', 'valve']

In [11]:
for machine_type in machine_types:
    history = run(machine_type)
    #with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
    #    pickle.dump(history , file)

2020-11-30 22:44:18,964 - 00_train.py - INFO - TARGET MACHINE_TYPE: fan
2020-11-30 22:44:18,965 - 00_train.py - INFO - MAKE DATA_LOADER
2020-11-30 22:44:19,788 - 00_train.py - INFO - TRAINING
  0%|          | 0/51 [00:00<?, ?it/s]

use: cuda:0


100%|██████████| 51/51 [00:29<00:00,  1.75it/s]
100%|██████████| 15/15 [00:01<00:00,  7.86it/s]
2020-11-30 22:44:51,719 - pytorch_modeler.py - INFO - Epoch 1/300:train_loss:48348.507996, tr_rec:48249.741394, tr_eng:792.180138, tr_covd:3909.672585, val_AUC:0.502790, val_pAUC:0.498988
100%|██████████| 51/51 [00:28<00:00,  1.79it/s]
100%|██████████| 15/15 [00:01<00:00,  8.02it/s]
2020-11-30 22:45:22,161 - pytorch_modeler.py - INFO - Epoch 2/300:train_loss:47972.970398, tr_rec:47906.884583, tr_eng:468.113750, tr_covd:3854.878761, val_AUC:0.487741, val_pAUC:0.504844
100%|██████████| 51/51 [00:28<00:00,  1.79it/s]
100%|██████████| 15/15 [00:01<00:00,  8.32it/s]
2020-11-30 22:45:52,463 - pytorch_modeler.py - INFO - Epoch 3/300:train_loss:47498.253662, tr_rec:47448.653992, tr_eng:300.516018, tr_covd:3909.549820, val_AUC:0.476325, val_pAUC:0.503465
100%|██████████| 51/51 [00:28<00:00,  1.81it/s]
100%|██████████| 15/15 [00:01<00:00,  8.24it/s]
2020-11-30 22:46:22,523 - pytorch_modeler.py - INFO 

100%|██████████| 51/51 [00:46<00:00,  1.10it/s]
100%|██████████| 15/15 [00:02<00:00,  5.02it/s]
2020-11-30 23:07:09,779 - pytorch_modeler.py - INFO - Epoch 30/300:train_loss:2763.500822, tr_rec:889.000427, tr_eng:-3886.432796, tr_covd:452628.747597, val_AUC:0.506736, val_pAUC:0.496399
100%|██████████| 51/51 [00:46<00:00,  1.09it/s]
100%|██████████| 15/15 [00:02<00:00,  5.06it/s]
2020-11-30 23:07:59,637 - pytorch_modeler.py - INFO - Epoch 31/300:train_loss:1010.907263, tr_rec:1110.073347, tr_eng:-1224.199682, tr_covd:4650.776619, val_AUC:0.500000, val_pAUC:0.500000
100%|██████████| 51/51 [00:46<00:00,  1.09it/s]
100%|██████████| 15/15 [00:02<00:00,  5.63it/s]
2020-11-30 23:08:49,157 - pytorch_modeler.py - INFO - Epoch 32/300:train_loss:732.529280, tr_rec:826.417213, tr_eng:-1104.803148, tr_covd:3318.477993, val_AUC:0.473866, val_pAUC:0.505614
100%|██████████| 51/51 [00:47<00:00,  1.08it/s]
100%|██████████| 15/15 [00:02<00:00,  5.07it/s]
2020-11-30 23:09:39,211 - pytorch_modeler.py - INF

100%|██████████| 51/51 [00:47<00:00,  1.07it/s]
100%|██████████| 15/15 [00:03<00:00,  4.34it/s]
2020-11-30 23:31:14,701 - pytorch_modeler.py - INFO - Epoch 59/300:train_loss:651.082371, tr_rec:878.380129, tr_eng:-2419.911507, tr_covd:2938.679535, val_AUC:0.543517, val_pAUC:0.510701
100%|██████████| 51/51 [00:47<00:00,  1.07it/s]
100%|██████████| 15/15 [00:03<00:00,  4.31it/s]
2020-11-30 23:32:05,759 - pytorch_modeler.py - INFO - Epoch 60/300:train_loss:727.876824, tr_rec:888.972192, tr_eng:-1886.283274, tr_covd:5506.592438, val_AUC:0.503449, val_pAUC:0.504425
100%|██████████| 51/51 [00:47<00:00,  1.07it/s]
100%|██████████| 15/15 [00:03<00:00,  4.26it/s]
2020-11-30 23:32:56,819 - pytorch_modeler.py - INFO - Epoch 61/300:train_loss:788.478131, tr_rec:879.993805, tr_eng:-1076.336901, tr_covd:3223.603008, val_AUC:0.498937, val_pAUC:0.499934
100%|██████████| 51/51 [00:47<00:00,  1.07it/s]
100%|██████████| 15/15 [00:03<00:00,  4.45it/s]
2020-11-30 23:33:47,989 - pytorch_modeler.py - INFO - E

100%|██████████| 51/51 [00:47<00:00,  1.07it/s]
100%|██████████| 15/15 [00:03<00:00,  4.37it/s]
2020-11-30 23:56:09,951 - pytorch_modeler.py - INFO - Epoch 88/300:train_loss:832.699683, tr_rec:964.657061, tr_eng:-1469.557629, tr_covd:2999.678833, val_AUC:0.482887, val_pAUC:0.507292
100%|██████████| 51/51 [00:47<00:00,  1.08it/s]
100%|██████████| 15/15 [00:03<00:00,  4.36it/s]
2020-11-30 23:57:00,561 - pytorch_modeler.py - INFO - Epoch 89/300:train_loss:885.500829, tr_rec:1030.038422, tr_eng:-1600.442877, tr_covd:3101.338493, val_AUC:0.448319, val_pAUC:0.495545
100%|██████████| 51/51 [00:47<00:00,  1.07it/s]
100%|██████████| 15/15 [00:03<00:00,  4.31it/s]
2020-11-30 23:57:51,831 - pytorch_modeler.py - INFO - Epoch 90/300:train_loss:660.714517, tr_rec:879.627318, tr_eng:-2335.042990, tr_covd:2918.301605, val_AUC:0.536184, val_pAUC:0.506626
100%|██████████| 51/51 [00:46<00:00,  1.10it/s]
100%|██████████| 15/15 [00:03<00:00,  4.45it/s]
2020-11-30 23:58:41,728 - pytorch_modeler.py - INFO - 

100%|██████████| 51/51 [01:01<00:00,  1.21s/it]
100%|██████████| 15/15 [00:04<00:00,  3.40it/s]
2020-12-01 00:23:26,915 - pytorch_modeler.py - INFO - Epoch 117/300:train_loss:694.527670, tr_rec:799.609602, tr_eng:-1291.155733, tr_covd:4806.729126, val_AUC:0.502509, val_pAUC:0.500134
100%|██████████| 51/51 [01:01<00:00,  1.21s/it]
100%|██████████| 15/15 [00:04<00:00,  3.18it/s]
2020-12-01 00:24:33,524 - pytorch_modeler.py - INFO - Epoch 118/300:train_loss:702.803679, tr_rec:824.136360, tr_eng:-1454.069984, tr_covd:4814.863716, val_AUC:0.464232, val_pAUC:0.496605
100%|██████████| 51/51 [01:01<00:00,  1.20s/it]
100%|██████████| 15/15 [00:04<00:00,  3.21it/s]
2020-12-01 00:25:39,496 - pytorch_modeler.py - INFO - Epoch 119/300:train_loss:578.365638, tr_rec:846.103114, tr_eng:-3195.204344, tr_covd:10356.591736, val_AUC:0.458248, val_pAUC:0.503306
100%|██████████| 51/51 [01:01<00:00,  1.20s/it]
100%|██████████| 15/15 [00:04<00:00,  3.27it/s]
2020-12-01 00:26:45,115 - pytorch_modeler.py - INFO

error
use: cuda:0


  0%|          | 0/46 [00:01<?, ?it/s]
2020-12-01 00:27:03,656 - 00_train.py - INFO - TARGET MACHINE_TYPE: slider
2020-12-01 00:27:03,656 - 00_train.py - INFO - MAKE DATA_LOADER
2020-12-01 00:27:03,659 - 00_train.py - INFO - TRAINING


error


KeyboardInterrupt: 

history = run('ToyCar')
with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
    pickle.dump(history , file)