# train

In [1]:
import yaml
import os

with open("./config.yaml", 'rb') as f:
    config = yaml.load(f)

  config = yaml.load(f)


In [2]:
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
os.makedirs(OUTPUT_ROOT, exist_ok=True)

## load library

In [3]:
# python default library
import os
import shutil
import datetime
import sys
import pickle

# general analysis tool-kit
import numpy as np
import pandas as pd
#from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns

# pytorch
import torch
from torch import optim, nn
from torch.utils.tensorboard import SummaryWriter

#from torchsummary import summary

# etc
import yaml
yaml.warnings({'YAMLLoadWarning': False})

# original library
sys.path.append('/home/hiroki/research/dcase2021_task2/src/functions')
import common as com
import pytorch_modeler as modeler
from pytorch_model import Conditional_VAE as Model
#from pytorch_utils import filtered_load_model
#import models

import librosa
import IPython
import librosa.display

## load config and set logger

In [4]:
log_file = config['IO_OPTION']['OUTPUT_ROOT']+'/train_{0}.log'.format(datetime.date.today())
logger = com.setup_logger(log_file, '00_train.py')

## Setting

In [5]:
# Setting seed
modeler.set_seed(42)

In [6]:
############################################################################
# Setting I/O path
############################################################################
# input dirs
INPUT_ROOT = config['IO_OPTION']['INPUT_ROOT']
#dev_dir = INPUT_ROOT + "/dev_data"
#add_dev_dir = INPUT_ROOT + "/add_dev_data"
# machine type
machine_types = ['ToyCar', 'gearbox', 'valve', 'ToyTrain', 'pump', 'fan', 'slider']
# output dirs
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
MODEL_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/models'
TB_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/tb'
OUT_FEATURE_DIR = OUTPUT_ROOT + '/extraction_features'
OUT_SCORE_DIR = OUTPUT_ROOT + '/score'
OUT_PRED_DIR = OUTPUT_ROOT + '/pred'
#os.makedirs(OUTPUT_ROOT, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(TB_DIR, exist_ok=True)
os.makedirs(OUT_FEATURE_DIR, exist_ok=True)
os.makedirs(OUT_SCORE_DIR, exist_ok=True)
os.makedirs(OUT_PRED_DIR, exist_ok=True)
# copy config
shutil.copy('./config.yaml', OUTPUT_ROOT)

'/media/hiroki/HDD1TB/research/dcase2021_task2/output/ID_Conditional_VAE/baseline/config.yaml'

In [7]:
torch.cuda.is_available()

True

In [8]:
#ext_data = pd.read_pickle(f'{INPUT_ROOT}/{machine_types[0]}_features.pkl')
#dataloaders_dict = modeler.make_dataloader(ext_data)

In [9]:
#for sample in dataloaders_dict['train']:
#    hoge = sample['features']

## training

In [10]:
#############################################################################
# run
#############################################################################
def run(machine_type):
    com.tic()
    
    logger.info('TARGET MACHINE_TYPE: {0}'.format(machine_type))
    logger.info('MAKE DATA_LOADER')
    # dataloader
    ext_data = pd.read_pickle(f'{INPUT_ROOT}/{machine_type}_features.pkl')
    dataloaders_dict = modeler.make_dataloader(ext_data)
    # define writer for tensorbord
    os.makedirs(TB_DIR+'/'+machine_type, exist_ok=True)
    tb_log_dir = TB_DIR + '/' + machine_type
    writer = SummaryWriter(log_dir = tb_log_dir)
    # out path
    model_out_path = MODEL_DIR+'/{}_model.pth'.format(machine_type)
    score_out_path = OUT_SCORE_DIR + '/{}_score.csv'.format(machine_type)
    pred_out_path = OUT_PRED_DIR + '/{}_pred.csv'.format(machine_type)
    logger.info('TRAINING')
    # parameter setting
    in_features = ext_data['train']['features'].shape[1]
    mid_size = config['param']['mid_size']
    latent_size = config['param']['latent_size']
    net = Model(in_features, mid_size, latent_size)
    optimizer = optim.Adam(net.parameters(), lr=1e-3)
    num_epochs = config['param']['num_epochs']
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e-2, 
                                              max_lr=1e-3, epochs=num_epochs, steps_per_epoch=len(dataloaders_dict['train']))
    # training
    output_dicts = modeler.train_net(net, dataloaders_dict, optimizer, scheduler, num_epochs, writer, model_out_path, score_out_path, pred_out_path)
    
    com.toc()

In [11]:
machine_types

['ToyCar', 'gearbox', 'valve', 'ToyTrain', 'pump', 'fan', 'slider']

In [None]:
for machine_type in machine_types:
    run(machine_type)

2021-04-20 21:21:17,639 - 00_train.py - INFO - TARGET MACHINE_TYPE: ToyCar
2021-04-20 21:21:17,640 - 00_train.py - INFO - MAKE DATA_LOADER
2021-04-20 21:21:17,673 - 00_train.py - INFO - TRAINING


use: cuda:0


2021-04-20 21:21:19,323 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [00:03<00:00, 58.58it/s]
2021-04-20 21:21:22,552 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 19/19 [00:00<00:00, 140.96it/s]
2021-04-20 21:21:22,707 - pytorch_modeler.py - INFO - valid_target
100%|██████████| 19/19 [00:00<00:00, 158.26it/s]


Unnamed: 0,AUC,pAUC
Source_0,0.5797,0.501053
Source_1,0.4766,0.485263
Source_2,0.544,0.491053
Target_0,0.5301,0.507368
Target_1,0.5221,0.500526
Target_2,0.5272,0.516842
mean,0.52995,0.500351
h_mean,0.528167,0.500139


2021-04-20 21:21:23,476 - pytorch_modeler.py - INFO - epoch:1/100, train_losses:0.010859, val_AUC_hmean:0.528167, val_pAUC_hmean:0.500139, best_flag:True
2021-04-20 21:21:23,477 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [00:02<00:00, 69.42it/s]
2021-04-20 21:21:26,203 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 19/19 [00:00<00:00, 155.75it/s]
2021-04-20 21:21:26,342 - pytorch_modeler.py - INFO - valid_target
100%|██████████| 19/19 [00:00<00:00, 164.31it/s]


Unnamed: 0,AUC,pAUC
Source_0,0.5805,0.494737
Source_1,0.4875,0.482105
Source_2,0.5496,0.49
Target_0,0.5453,0.524737
Target_1,0.5389,0.522105
Target_2,0.5259,0.52
mean,0.53795,0.505614
h_mean,0.53646,0.505032


2021-04-20 21:21:27,139 - pytorch_modeler.py - INFO - epoch:2/100, train_losses:0.005465, val_AUC_hmean:0.536460, val_pAUC_hmean:0.505032, best_flag:True
2021-04-20 21:21:27,140 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [00:02<00:00, 70.85it/s]
2021-04-20 21:21:29,809 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 19/19 [00:00<00:00, 157.16it/s]
2021-04-20 21:21:29,948 - pytorch_modeler.py - INFO - valid_target
100%|██████████| 19/19 [00:00<00:00, 159.07it/s]
2021-04-20 21:21:30,086 - pytorch_modeler.py - INFO - epoch:3/100, train_losses:0.005239, val_AUC_hmean:0.532274, val_pAUC_hmean:0.503914, best_flag:False
2021-04-20 21:21:30,087 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [00:02<00:00, 67.24it/s]
2021-04-20 21:21:32,900 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 19/19 [00:00<00:00, 153.84it/s]
2021-04-20 21:21:33,039 - pytorch_modeler.py - INFO - valid_target
100%|██████████| 19/19 [00:00<00:00, 155.60it/s]
2021-04