# train

## load library

In [1]:
# python default library
import os
import shutil
import datetime
import sys
import pickle

# general analysis tool-kit
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# pytorch
import torch
from torch import optim, nn
from torch.utils.tensorboard import SummaryWriter

from torchsummary import summary

# etc
import yaml
yaml.warnings({'YAMLLoadWarning': False})
import mlflow
from collections import defaultdict
from scipy.stats import zscore

# original library

import common as com
import pytorch_modeler as modeler
from pytorch_model import CNN6PANNsVAE as Model
from pytorch_utils import filtered_load_model
import models

import librosa
import IPython
import librosa.display

## load config and set logger

In [2]:
with open("./config.yaml", 'rb') as f:
    config = yaml.load(f)

log_folder = config['IO_OPTION']['OUTPUT_ROOT']+'/{0}.log'.format(datetime.date.today())
logger = com.setup_logger(log_folder, '00_train.py')

## Setting

In [3]:
# Setting seed
modeler.set_seed(42)

In [4]:
############################################################################
# Setting I/O path
############################################################################
# input dirs
INPUT_ROOT = config['IO_OPTION']['INPUT_ROOT']
dev_path = INPUT_ROOT + "/dev_data"
add_dev_path = INPUT_ROOT + "/add_dev_data"
# machine type
MACHINE_TYPE = config['IO_OPTION']['MACHINE_TYPE']
machine_types = os.listdir(dev_path)
# output dirs
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
MODEL_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/models'
TB_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/tb'
PKL_DIR = OUTPUT_ROOT + '/pkl'
os.makedirs(PKL_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(TB_DIR, exist_ok=True)

# copy config
shutil.copy('./config.yaml', OUTPUT_ROOT)

'/media/hiroki/working/research/dcase2020/result/2D/CONV_VAE/config.yaml'

## make path list and train/valid split

In [5]:
############################################################################
# make path set and train/valid split
############################################################################
'''
train_paths[machine_type]['train' or 'valid'] = path
'''
dev_train_paths = {}
add_train_paths = {}
train_paths = {}

In [6]:
for machine_type in machine_types:
    # dev train
    dev_train_all_paths = ["{}/{}/train/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(dev_path, machine_type))]
    dev_train_all_paths = sorted(dev_train_all_paths)
    dev_train_paths[machine_type] = {}
    dev_train_paths[machine_type]['train'], \
    dev_train_paths[machine_type]['valid'] = train_test_split(dev_train_all_paths,
                                                              test_size=config['etc']['test_size'],
                                                              shuffle=False,
                                                             )
    # add_dev train
    add_train_all_paths = ["{}/{}/train/".format(add_dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(add_dev_path, machine_type))]
    add_train_all_paths = sorted(add_train_all_paths)
    add_train_paths[machine_type] = {}
    add_train_paths[machine_type]['train'], \
    add_train_paths[machine_type]['valid'] = train_test_split(add_train_all_paths,
                                                              test_size=config['etc']['test_size'],
                                                              shuffle=False,
                                                             )
    train_paths[machine_type] = {}
    train_paths[machine_type]['train'] = dev_train_paths[machine_type]['train'] + add_train_paths[machine_type]['train']
    train_paths[machine_type]['valid'] = dev_train_paths[machine_type]['valid'] + add_train_paths[machine_type]['valid']

## training

In [7]:
#############################################################################
# run
#############################################################################
def run(machine_type):
    com.tic()
    logger.info('TARGET MACHINE_TYPE: {0}'.format(machine_type))
    logger.info('MAKE DATA_LOADER')
    # dev_train_paths
    dataloaders_dict = modeler.make_dataloader(train_paths, machine_type)
    # define writer for tensorbord
    os.makedirs(TB_DIR+'/'+machine_type, exist_ok=True)         # debug
    tb_log_dir = TB_DIR + '/' + machine_type
    writer = SummaryWriter(log_dir = tb_log_dir)
    logger.info('TRAINING')
    # parameter setting
    net = Model(sample_rate=config['preprocessing']['sample_rate'],
            window_size=config['preprocessing']['window_size'],
            hop_size=config['preprocessing']['hop_size'],
            mel_bins=config['preprocessing']['mel_bins'],
            fmin=config['preprocessing']['fmin'],
            fmax=config['preprocessing']['fmax'])
    pretrained_dict = torch.load(config['IO_OPTION']['PREMODEL_PATH'])
    net = filtered_load_model(net, pretrained_dict)
    optimizer = optim.Adam(net.parameters())
    criterion = nn.MSELoss()
    num_epochs = config['fit']['num_epochs']
    history = modeler.train_net(net, dataloaders_dict, criterion, optimizer, num_epochs, writer)
    # output
    model = history['model']
    model_out_path = MODEL_DIR+'/{}_model.pth'.format(machine_type)
    torch.save(model.state_dict(), model_out_path)
    logger.info('\n success:{0} \n'.format(machine_type) + \
                    'model_out_path ==> \n {0}'.format(model_out_path))
    #  close writer for tensorbord
    writer.close()
    #modeler.mlflow_log(history, config, machine_type, model_out_path, tb_log_dir)
    com.toc()
    return history

In [8]:
machine_types

['fan', 'pump', 'slider', 'ToyCar', 'ToyConveyor', 'valve']

In [9]:
for machine_type in machine_types:
    if (machine_type == machine_types[2]):
        continue
    history = run(machine_type)
    with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
        pickle.dump(history , file)

2020-10-18 12:18:53,680 - 00_train.py - INFO - TARGET MACHINE_TYPE: fan
2020-10-18 12:18:53,681 - 00_train.py - INFO - MAKE DATA_LOADER
2020-10-18 12:18:54,509 - 00_train.py - INFO - TRAINING
  0%|          | 0/143 [00:00<?, ?it/s]

use: cuda:0


100%|██████████| 143/143 [02:18<00:00,  1.03it/s]
100%|██████████| 62/62 [00:28<00:00,  2.14it/s]
2020-10-18 12:21:43,342 - pytorch_modeler.py - INFO - Epoch 1/60:train_loss:782.486206, valid_loss:635.995516
100%|██████████| 143/143 [01:34<00:00,  1.51it/s]
100%|██████████| 62/62 [00:21<00:00,  2.93it/s]
2020-10-18 12:23:39,257 - pytorch_modeler.py - INFO - Epoch 2/60:train_loss:399.534624, valid_loss:246.547009
100%|██████████| 143/143 [01:34<00:00,  1.51it/s]
100%|██████████| 62/62 [00:21<00:00,  2.86it/s]
2020-10-18 12:25:35,609 - pytorch_modeler.py - INFO - Epoch 3/60:train_loss:120.300530, valid_loss:63.012304
100%|██████████| 143/143 [01:37<00:00,  1.47it/s]
100%|██████████| 62/62 [00:21<00:00,  2.84it/s]
2020-10-18 12:27:34,864 - pytorch_modeler.py - INFO - Epoch 4/60:train_loss:30.145497, valid_loss:26.979679
100%|██████████| 143/143 [01:37<00:00,  1.46it/s]
100%|██████████| 62/62 [00:21<00:00,  2.83it/s]
2020-10-18 12:29:34,576 - pytorch_modeler.py - INFO - Epoch 5/60:train_lo

100%|██████████| 143/143 [01:33<00:00,  1.53it/s]
100%|██████████| 62/62 [00:20<00:00,  2.95it/s]
2020-10-18 13:38:30,838 - pytorch_modeler.py - INFO - Epoch 41/60:train_loss:17.793820, valid_loss:21.858575
100%|██████████| 143/143 [01:33<00:00,  1.53it/s]
100%|██████████| 62/62 [00:21<00:00,  2.95it/s]
2020-10-18 13:40:25,484 - pytorch_modeler.py - INFO - Epoch 42/60:train_loss:17.810031, valid_loss:22.167877
100%|██████████| 143/143 [01:33<00:00,  1.53it/s]
100%|██████████| 62/62 [00:20<00:00,  2.96it/s]
2020-10-18 13:42:20,096 - pytorch_modeler.py - INFO - Epoch 43/60:train_loss:17.772777, valid_loss:22.041238
100%|██████████| 143/143 [01:33<00:00,  1.53it/s]
100%|██████████| 62/62 [00:20<00:00,  2.95it/s]
2020-10-18 13:44:14,736 - pytorch_modeler.py - INFO - Epoch 44/60:train_loss:17.794134, valid_loss:22.039560
100%|██████████| 143/143 [01:33<00:00,  1.53it/s]
100%|██████████| 62/62 [00:21<00:00,  2.95it/s]
2020-10-18 13:46:09,388 - pytorch_modeler.py - INFO - Epoch 45/60:train_lo

elapsed time: 6956.517341137 [sec]


2020-10-18 14:14:50,785 - 00_train.py - INFO - TARGET MACHINE_TYPE: pump
2020-10-18 14:14:50,785 - 00_train.py - INFO - MAKE DATA_LOADER
2020-10-18 14:14:50,787 - 00_train.py - INFO - TRAINING
  0%|          | 0/127 [00:00<?, ?it/s]

use: cuda:0


100%|██████████| 127/127 [02:00<00:00,  1.05it/s]
100%|██████████| 55/55 [00:25<00:00,  2.14it/s]
2020-10-18 14:17:17,665 - pytorch_modeler.py - INFO - Epoch 1/60:train_loss:811.010598, valid_loss:714.460099
100%|██████████| 127/127 [01:23<00:00,  1.52it/s]
100%|██████████| 55/55 [00:18<00:00,  2.96it/s]
2020-10-18 14:18:59,725 - pytorch_modeler.py - INFO - Epoch 2/60:train_loss:471.521152, valid_loss:339.893936
100%|██████████| 127/127 [01:23<00:00,  1.52it/s]
100%|██████████| 55/55 [00:18<00:00,  2.96it/s]
2020-10-18 14:20:41,986 - pytorch_modeler.py - INFO - Epoch 3/60:train_loss:179.082344, valid_loss:110.237200
100%|██████████| 127/127 [01:23<00:00,  1.52it/s]
100%|██████████| 55/55 [00:18<00:00,  2.96it/s]
2020-10-18 14:22:24,288 - pytorch_modeler.py - INFO - Epoch 4/60:train_loss:49.730378, valid_loss:36.508108
100%|██████████| 127/127 [01:23<00:00,  1.52it/s]
100%|██████████| 55/55 [00:18<00:00,  2.96it/s]
2020-10-18 14:24:06,306 - pytorch_modeler.py - INFO - Epoch 5/60:train_l

100%|██████████| 127/127 [01:22<00:00,  1.53it/s]
100%|██████████| 55/55 [00:18<00:00,  2.96it/s]
2020-10-18 15:25:00,443 - pytorch_modeler.py - INFO - Epoch 41/60:train_loss:19.346520, valid_loss:23.258431
100%|██████████| 127/127 [01:22<00:00,  1.53it/s]
100%|██████████| 55/55 [00:18<00:00,  2.95it/s]
2020-10-18 15:26:41,926 - pytorch_modeler.py - INFO - Epoch 42/60:train_loss:19.273268, valid_loss:23.799083
100%|██████████| 127/127 [01:22<00:00,  1.53it/s]
100%|██████████| 55/55 [00:18<00:00,  2.96it/s]
2020-10-18 15:28:23,426 - pytorch_modeler.py - INFO - Epoch 43/60:train_loss:19.262564, valid_loss:23.369357
100%|██████████| 127/127 [01:22<00:00,  1.53it/s]
100%|██████████| 55/55 [00:18<00:00,  2.96it/s]
2020-10-18 15:30:04,893 - pytorch_modeler.py - INFO - Epoch 44/60:train_loss:19.247548, valid_loss:23.388863
100%|██████████| 127/127 [01:22<00:00,  1.53it/s]
100%|██████████| 55/55 [00:18<00:00,  2.96it/s]
2020-10-18 15:31:46,364 - pytorch_modeler.py - INFO - Epoch 45/60:train_lo

elapsed time: 6138.135067463 [sec]


2020-10-18 15:57:09,408 - 00_train.py - INFO - TARGET MACHINE_TYPE: ToyCar
2020-10-18 15:57:09,408 - 00_train.py - INFO - MAKE DATA_LOADER
2020-10-18 15:57:09,410 - 00_train.py - INFO - TRAINING
  0%|          | 0/154 [00:00<?, ?it/s]

use: cuda:0


100%|██████████| 154/154 [02:32<00:00,  1.01it/s]
100%|██████████| 66/66 [00:34<00:00,  1.91it/s]
2020-10-18 16:00:16,934 - pytorch_modeler.py - INFO - Epoch 1/60:train_loss:792.993119, valid_loss:598.192971
100%|██████████| 154/154 [01:44<00:00,  1.47it/s]
100%|██████████| 66/66 [00:24<00:00,  2.74it/s]
2020-10-18 16:02:25,834 - pytorch_modeler.py - INFO - Epoch 2/60:train_loss:379.436609, valid_loss:223.149640
100%|██████████| 154/154 [01:44<00:00,  1.47it/s]
100%|██████████| 66/66 [00:24<00:00,  2.74it/s]
2020-10-18 16:04:34,847 - pytorch_modeler.py - INFO - Epoch 3/60:train_loss:117.689321, valid_loss:64.280654
100%|██████████| 154/154 [01:44<00:00,  1.47it/s]
100%|██████████| 66/66 [00:24<00:00,  2.74it/s]
2020-10-18 16:06:43,682 - pytorch_modeler.py - INFO - Epoch 4/60:train_loss:35.763768, valid_loss:27.286321
100%|██████████| 154/154 [01:44<00:00,  1.48it/s]
100%|██████████| 66/66 [00:24<00:00,  2.75it/s]
2020-10-18 16:08:52,124 - pytorch_modeler.py - INFO - Epoch 5/60:train_lo

100%|██████████| 154/154 [01:53<00:00,  1.36it/s]
100%|██████████| 66/66 [00:24<00:00,  2.70it/s]
2020-10-18 17:28:31,083 - pytorch_modeler.py - INFO - Epoch 41/60:train_loss:25.057460, valid_loss:25.383755
100%|██████████| 154/154 [01:47<00:00,  1.43it/s]
100%|██████████| 66/66 [00:24<00:00,  2.64it/s]
2020-10-18 17:30:43,480 - pytorch_modeler.py - INFO - Epoch 42/60:train_loss:25.068847, valid_loss:25.372977
100%|██████████| 154/154 [01:49<00:00,  1.41it/s]
100%|██████████| 66/66 [00:25<00:00,  2.61it/s]
2020-10-18 17:32:57,980 - pytorch_modeler.py - INFO - Epoch 43/60:train_loss:25.028430, valid_loss:25.380947
100%|██████████| 154/154 [01:47<00:00,  1.43it/s]
100%|██████████| 66/66 [00:24<00:00,  2.68it/s]
2020-10-18 17:35:10,440 - pytorch_modeler.py - INFO - Epoch 44/60:train_loss:25.139658, valid_loss:25.296120
100%|██████████| 154/154 [01:47<00:00,  1.43it/s]
100%|██████████| 66/66 [00:24<00:00,  2.65it/s]
2020-10-18 17:37:22,816 - pytorch_modeler.py - INFO - Epoch 45/60:train_lo

elapsed time: 7999.865795851 [sec]


2020-10-18 18:10:30,592 - 00_train.py - INFO - TARGET MACHINE_TYPE: ToyConveyor
2020-10-18 18:10:30,593 - 00_train.py - INFO - MAKE DATA_LOADER
2020-10-18 18:10:30,594 - 00_train.py - INFO - TRAINING
  0%|          | 0/132 [00:00<?, ?it/s]

use: cuda:0


100%|██████████| 132/132 [02:09<00:00,  1.02it/s]
100%|██████████| 57/57 [00:28<00:00,  1.99it/s]
2020-10-18 18:13:09,746 - pytorch_modeler.py - INFO - Epoch 1/60:train_loss:417.207208, valid_loss:283.880787
100%|██████████| 132/132 [01:31<00:00,  1.45it/s]
100%|██████████| 57/57 [00:19<00:00,  2.86it/s]
2020-10-18 18:15:00,874 - pytorch_modeler.py - INFO - Epoch 2/60:train_loss:186.241324, valid_loss:91.072876
100%|██████████| 132/132 [01:28<00:00,  1.49it/s]
100%|██████████| 57/57 [00:19<00:00,  2.88it/s]
2020-10-18 18:16:49,236 - pytorch_modeler.py - INFO - Epoch 3/60:train_loss:59.208979, valid_loss:37.529847
100%|██████████| 132/132 [01:29<00:00,  1.47it/s]
100%|██████████| 57/57 [00:20<00:00,  2.73it/s]
2020-10-18 18:18:39,653 - pytorch_modeler.py - INFO - Epoch 4/60:train_loss:31.998485, valid_loss:27.934368
100%|██████████| 132/132 [01:31<00:00,  1.45it/s]
100%|██████████| 57/57 [00:19<00:00,  2.89it/s]
2020-10-18 18:20:30,441 - pytorch_modeler.py - INFO - Epoch 5/60:train_loss

100%|██████████| 132/132 [01:29<00:00,  1.47it/s]
100%|██████████| 57/57 [00:19<00:00,  2.85it/s]
2020-10-18 19:26:09,943 - pytorch_modeler.py - INFO - Epoch 41/60:train_loss:26.544431, valid_loss:26.553331
100%|██████████| 132/132 [01:28<00:00,  1.49it/s]
100%|██████████| 57/57 [00:19<00:00,  2.85it/s]
2020-10-18 19:27:58,269 - pytorch_modeler.py - INFO - Epoch 42/60:train_loss:26.536298, valid_loss:26.431415
100%|██████████| 132/132 [01:28<00:00,  1.49it/s]
100%|██████████| 57/57 [00:20<00:00,  2.79it/s]
2020-10-18 19:29:47,411 - pytorch_modeler.py - INFO - Epoch 43/60:train_loss:26.483932, valid_loss:26.521996
100%|██████████| 132/132 [01:29<00:00,  1.47it/s]
100%|██████████| 57/57 [00:20<00:00,  2.82it/s]
2020-10-18 19:31:37,355 - pytorch_modeler.py - INFO - Epoch 44/60:train_loss:26.516993, valid_loss:26.742838
100%|██████████| 132/132 [01:29<00:00,  1.48it/s]
100%|██████████| 57/57 [00:19<00:00,  2.87it/s]
2020-10-18 19:33:26,313 - pytorch_modeler.py - INFO - Epoch 45/60:train_lo

elapsed time: 6606.907669306 [sec]


2020-10-18 20:00:38,225 - 00_train.py - INFO - TARGET MACHINE_TYPE: valve
2020-10-18 20:00:38,226 - 00_train.py - INFO - MAKE DATA_LOADER
2020-10-18 20:00:38,228 - 00_train.py - INFO - TRAINING
  0%|          | 0/128 [00:00<?, ?it/s]

use: cuda:0


100%|██████████| 128/128 [02:02<00:00,  1.05it/s]
100%|██████████| 55/55 [00:25<00:00,  2.12it/s]
2020-10-18 20:03:06,890 - pytorch_modeler.py - INFO - Epoch 1/60:train_loss:901.434679, valid_loss:736.942100
100%|██████████| 128/128 [01:23<00:00,  1.53it/s]
100%|██████████| 55/55 [00:18<00:00,  2.93it/s]
2020-10-18 20:04:49,562 - pytorch_modeler.py - INFO - Epoch 2/60:train_loss:538.545937, valid_loss:357.363443
100%|██████████| 128/128 [01:24<00:00,  1.52it/s]
100%|██████████| 55/55 [00:18<00:00,  2.93it/s]
2020-10-18 20:06:32,740 - pytorch_modeler.py - INFO - Epoch 3/60:train_loss:215.129834, valid_loss:110.728017
100%|██████████| 128/128 [01:24<00:00,  1.52it/s]
100%|██████████| 55/55 [00:18<00:00,  2.93it/s]
2020-10-18 20:08:15,919 - pytorch_modeler.py - INFO - Epoch 4/60:train_loss:63.966256, valid_loss:39.115811
100%|██████████| 128/128 [01:24<00:00,  1.52it/s]
100%|██████████| 55/55 [00:18<00:00,  2.93it/s]
2020-10-18 20:09:58,708 - pytorch_modeler.py - INFO - Epoch 5/60:train_l

100%|██████████| 128/128 [01:25<00:00,  1.49it/s]
100%|██████████| 55/55 [00:19<00:00,  2.89it/s]
2020-10-18 21:12:37,682 - pytorch_modeler.py - INFO - Epoch 41/60:train_loss:26.110571, valid_loss:25.947197
100%|██████████| 128/128 [01:25<00:00,  1.49it/s]
100%|██████████| 55/55 [00:18<00:00,  2.91it/s]
2020-10-18 21:14:22,532 - pytorch_modeler.py - INFO - Epoch 42/60:train_loss:26.115503, valid_loss:26.052606
100%|██████████| 128/128 [01:25<00:00,  1.50it/s]
100%|██████████| 55/55 [00:19<00:00,  2.84it/s]
2020-10-18 21:16:07,274 - pytorch_modeler.py - INFO - Epoch 43/60:train_loss:26.113497, valid_loss:25.971311
100%|██████████| 128/128 [01:26<00:00,  1.49it/s]
100%|██████████| 55/55 [00:19<00:00,  2.85it/s]
2020-10-18 21:17:52,643 - pytorch_modeler.py - INFO - Epoch 44/60:train_loss:26.152119, valid_loss:25.953379
100%|██████████| 128/128 [01:24<00:00,  1.51it/s]
100%|██████████| 55/55 [00:19<00:00,  2.87it/s]
2020-10-18 21:19:36,740 - pytorch_modeler.py - INFO - Epoch 45/60:train_lo

elapsed time: 6314.409774542 [sec]
