# train

In [1]:
import yaml
import os

with open("./config.yaml", 'rb') as f:
    config = yaml.load(f)

In [2]:
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
os.makedirs(OUTPUT_ROOT, exist_ok=True)

## load library

In [3]:
# python default library
import os
import shutil
import datetime
import sys
import pickle

# general analysis tool-kit
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# pytorch
import torch
from torch import optim, nn
from torch.utils.tensorboard import SummaryWriter

from torchsummary import summary

# etc
import yaml
yaml.warnings({'YAMLLoadWarning': False})
import mlflow
from collections import defaultdict
from scipy.stats import zscore

# original library

import common as com
import pytorch_modeler as modeler
from pytorch_model import DAGMM as Model
from pytorch_utils import filtered_load_model
import models

import librosa
import IPython
import librosa.display

## load config and set logger

In [4]:
#with open("./config.yaml", 'rb') as f:
#    config = yaml.load(f)

log_folder = config['IO_OPTION']['OUTPUT_ROOT']+'/{0}.log'.format(datetime.date.today())
logger = com.setup_logger(log_folder, '00_train.py')

  and should_run_async(code)


## Setting

In [5]:
# Setting seed
modeler.set_seed(42)

In [6]:
############################################################################
# Setting I/O path
############################################################################
# input dirs
INPUT_ROOT = config['IO_OPTION']['INPUT_ROOT']
dev_path = INPUT_ROOT + "/dev_data"
add_dev_path = INPUT_ROOT + "/add_dev_data"
# machine type
MACHINE_TYPE = config['IO_OPTION']['MACHINE_TYPE']
machine_types = os.listdir(dev_path)
# output dirs
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
MODEL_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/models'
TB_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/tb'
PKL_DIR = OUTPUT_ROOT + '/pkl'
#os.makedirs(OUTPUT_ROOT, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(TB_DIR, exist_ok=True)
os.makedirs(PKL_DIR, exist_ok=True)
# copy config
shutil.copy('./config.yaml', OUTPUT_ROOT)

'/media/hiroki/working/research/dcase2020/result/2D/DAGMM/stable/v1/config.yaml'

## make path list and train/valid split

In [7]:
############################################################################
# make path set and train/valid split
############################################################################
'''
train_paths[machine_type]['train' or 'valid'] = path
'''
dev_train_paths = {}
add_train_paths = {}
train_paths = {}

In [8]:
for machine_type in machine_types:
    # dev train
    dev_train_paths = ["{}/{}/train/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(dev_path, machine_type))]
    dev_train_paths = sorted(dev_train_paths)
    # add_dev train
    add_train_paths = ["{}/{}/train/".format(add_dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(add_dev_path, machine_type))]
    add_train_paths = sorted(add_train_paths)
    # valid
    dev_valid_paths = ["{}/{}/test/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/test".format(dev_path, machine_type))]
    dev_valid_paths = sorted(dev_valid_paths)
    
    train_paths[machine_type] = {}
    train_paths[machine_type]['train'] = dev_train_paths + add_train_paths
    train_paths[machine_type]['valid'] = dev_valid_paths

## training

In [9]:
#############################################################################
# run
#############################################################################
def run(machine_type):
    com.tic()
    logger.info('TARGET MACHINE_TYPE: {0}'.format(machine_type))
    logger.info('MAKE DATA_LOADER')
    # dev_train_paths
    dataloaders_dict = modeler.make_dataloader(train_paths, machine_type)
    # define writer for tensorbord
    os.makedirs(TB_DIR+'/'+machine_type, exist_ok=True)         # debug
    tb_log_dir = TB_DIR + '/' + machine_type
    writer = SummaryWriter(log_dir = tb_log_dir)
    logger.info('TRAINING')
    # parameter setting
    net = Model(sample_rate=config['preprocessing']['sample_rate'],
                window_size=config['preprocessing']['window_size'],
                hop_size=config['preprocessing']['hop_size'],
                mel_bins=config['preprocessing']['mel_bins'],
                fmin=config['preprocessing']['fmin'],
                fmax=config['preprocessing']['fmax'],
                latent_size=config['fit']['latent_size'],
                mixture_size=config['fit']['mixture_size'])
    #pretrained_dict = torch.load(config['IO_OPTION']['PREMODEL_PATH'])
    #net = filtered_load_model(net, pretrained_dict)
    optimizer = optim.Adam(net.parameters(), lr=1e-3)
    num_epochs = config['fit']['num_epochs']
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, 1e3
                                              max_lr=1e-3, epochs=num_epochs, steps_per_epoch=len(dataloaders_dict['train']))
    #scheduler = ReduceLROnPlateau(optimizer, 'min')
    criterion = nn.MSELoss()
    #try:
    history = modeler.train_net(net, dataloaders_dict, criterion, optimizer, scheduler, num_epochs, writer)
    #except:
    #    return "error"
        
    # output
    model = history['model']
    model_out_path = MODEL_DIR+'/{}_model.pth'.format(machine_type)
    torch.save(model.state_dict(), model_out_path)
    logger.info('\n success:{0} \n'.format(machine_type) + \
                    'model_out_path ==> \n {0}'.format(model_out_path))
    #  close writer for tensorbord
    writer.close()
    #modeler.mlflow_log(history, config, machine_type, model_out_path, tb_log_dir)
    com.toc()
    return history

In [10]:
machine_types

['fan', 'pump', 'slider', 'ToyCar', 'ToyConveyor', 'valve']

In [11]:
for machine_type in machine_types[2:]:
    history = run(machine_type)
    #with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
    #    pickle.dump(history , file)

2020-11-27 23:40:59,986 - 00_train.py - INFO - TARGET MACHINE_TYPE: slider
2020-11-27 23:40:59,987 - 00_train.py - INFO - MAKE DATA_LOADER
2020-11-27 23:41:00,850 - 00_train.py - INFO - TRAINING


use: cuda:0


100%|██████████| 41/41 [00:11<00:00,  3.43it/s]
100%|██████████| 10/10 [00:01<00:00,  7.19it/s]
2020-11-27 23:41:15,972 - pytorch_modeler.py - INFO - Epoch 1/200:train_loss:114.100796, tr_rec:14.991098, tr_eng:86.987486, tr_covd:2424.442528, val_AUC:0.588967, val_pAUC:0.493810
100%|██████████| 41/41 [00:09<00:00,  4.22it/s]
100%|██████████| 10/10 [00:01<00:00,  8.71it/s]
2020-11-27 23:41:26,842 - pytorch_modeler.py - INFO - Epoch 2/200:train_loss:114.154912, tr_rec:14.299691, tr_eng:81.029423, tr_covd:3765.159344, val_AUC:0.540609, val_pAUC:0.495863
100%|██████████| 41/41 [00:09<00:00,  4.18it/s]
100%|██████████| 10/10 [00:01<00:00,  8.60it/s]
2020-11-27 23:41:37,823 - pytorch_modeler.py - INFO - Epoch 3/200:train_loss:97.653340, tr_rec:13.173602, tr_eng:67.526183, tr_covd:3390.711313, val_AUC:0.650621, val_pAUC:0.508492
100%|██████████| 41/41 [00:09<00:00,  4.13it/s]
100%|██████████| 10/10 [00:01<00:00,  8.41it/s]
2020-11-27 23:41:48,938 - pytorch_modeler.py - INFO - Epoch 4/200:train

100%|██████████| 41/41 [00:09<00:00,  4.16it/s]
100%|██████████| 10/10 [00:01<00:00,  8.41it/s]
2020-11-27 23:46:48,437 - pytorch_modeler.py - INFO - Epoch 31/200:train_loss:-4.533051, tr_rec:0.331335, tr_eng:-26.664207, tr_covd:4359.964294, val_AUC:0.540709, val_pAUC:0.514480
100%|██████████| 41/41 [00:09<00:00,  4.17it/s]
100%|██████████| 10/10 [00:01<00:00,  8.69it/s]
2020-11-27 23:46:59,429 - pytorch_modeler.py - INFO - Epoch 32/200:train_loss:-17.873224, tr_rec:0.305893, tr_eng:-36.864735, tr_covd:3737.123583, val_AUC:0.445402, val_pAUC:0.503111
100%|██████████| 41/41 [00:09<00:00,  4.19it/s]
100%|██████████| 10/10 [00:01<00:00,  8.78it/s]
2020-11-27 23:47:10,358 - pytorch_modeler.py - INFO - Epoch 33/200:train_loss:-16.305670, tr_rec:0.359152, tr_eng:-35.656942, tr_covd:3798.423988, val_AUC:0.565785, val_pAUC:0.508126
100%|██████████| 41/41 [00:09<00:00,  4.24it/s]
100%|██████████| 10/10 [00:01<00:00,  8.83it/s]
2020-11-27 23:47:21,180 - pytorch_modeler.py - INFO - Epoch 34/200:t

100%|██████████| 41/41 [00:09<00:00,  4.23it/s]
100%|██████████| 10/10 [00:01<00:00,  8.79it/s]
2020-11-27 23:52:25,330 - pytorch_modeler.py - INFO - Epoch 61/200:train_loss:-18.907484, tr_rec:0.239595, tr_eng:-49.084017, tr_covd:5987.387535, val_AUC:0.547482, val_pAUC:0.498725
100%|██████████| 41/41 [00:09<00:00,  4.19it/s]
100%|██████████| 10/10 [00:01<00:00,  8.72it/s]
2020-11-27 23:52:36,281 - pytorch_modeler.py - INFO - Epoch 62/200:train_loss:-18.997077, tr_rec:0.225491, tr_eng:-46.074976, tr_covd:5370.481747, val_AUC:0.422089, val_pAUC:0.491119
 34%|███▍      | 14/41 [00:03<00:07,  3.70it/s]


Traceback (most recent call last):
  File "/home/hiroki/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-f84e921ef713>", line 2, in <module>
    history = run(machine_type)
  File "<ipython-input-9-f6ad4a5104e9>", line 33, in run
    history = modeler.train_net(net, dataloaders_dict, criterion, optimizer, scheduler, num_epochs, writer)
  File "/media/hiroki/working/research/dcase2020/2D_codes/DPGMM_LSTM_CONV/stable/v1/pytorch_modeler.py", line 157, in train_net
    net, total_loss, sample_energy, recon_error, cov_diag = dagmm_step(net, input, optimizer, scheduler, device)
  File "/media/hiroki/working/research/dcase2020/2D_codes/DPGMM_LSTM_CONV/stable/v1/pytorch_modeler.py", line 258, in dagmm_step
    total_loss.backward()
  File "/home/hiroki/anaconda3/lib/python3.7/site-packages/torch/tensor.py", line 185, in backward
    torch.autograd.backward(self, gradie

TypeError: object of type 'NoneType' has no len()

history = run('ToyCar')
with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
    pickle.dump(history , file)