# train

In [1]:
import yaml
import os

with open("./config.yaml", 'rb') as f:
    config = yaml.load(f)

  """


In [2]:
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
os.makedirs(OUTPUT_ROOT, exist_ok=True)

## load library

In [3]:
# python default library
import os
import shutil
import datetime
import sys
import pickle

# general analysis tool-kit
import numpy as np
import pandas as pd
#from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns

# pytorch
import torch
from torch import optim, nn
from torch.utils.tensorboard import SummaryWriter
import timm

#from torchvision.models import wide_resnet50_2, resnet18
#from torchsummary import summary

# etc
import yaml
yaml.warnings({'YAMLLoadWarning': False})

# original library
import common as com
import pytorch_modeler as modeler
from pytorch_model import EfficientNet_b1 as Model
#from pytorch_utils import filtered_load_model
#import models

import librosa
import IPython
import librosa.display

In [4]:
# effnet1 = timm.create_model('efficientnet_b1', pretrained=True)
# # M7:block[5], M8:block[6], M9:conv_head
# effnet1

## load config and set logger

In [5]:
log_folder = config['IO_OPTION']['OUTPUT_ROOT']+'/{0}.log'.format(datetime.date.today())
logger = com.setup_logger(log_folder, '00_train.py')

## Setting

In [6]:
# Setting seed
modeler.set_seed(42)

In [7]:
############################################################################
# Setting I/O path
############################################################################
# input dirs
INPUT_ROOT = config['IO_OPTION']['INPUT_ROOT']
dev_dir = INPUT_ROOT + "/dev_data"
add_dev_dir = INPUT_ROOT + "/add_dev_data"
# machine type
machine_types = os.listdir(dev_dir)
# output dirs
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
MODEL_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/models'

TB_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/tb'
OUT_FEATURE_DIR = OUTPUT_ROOT + '/extraction_features'
PRED_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/pred'
SCORE_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/score'
#os.makedirs(OUTPUT_ROOT, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(TB_DIR, exist_ok=True)
os.makedirs(OUT_FEATURE_DIR, exist_ok=True)
os.makedirs(PRED_DIR, exist_ok=True)
os.makedirs(SCORE_DIR, exist_ok=True)

# copy config
# shutil.copy('./config.yaml', OUTPUT_ROOT)

In [8]:
torch.cuda.is_available()

True

## make path list

- __train_paths__ ([dict]): Each Machine Type
    - __ToyCar__ ([dict]): Each dataset
        - train ([list]) : paths
        - valid_source ([list]) : paths
        - valid_target ([list]) :paths
    - __ToyTrain__ ([dict]): Each dataset
        - train ([list]) : paths
        - valid_source ([list]) : paths
        - valid_target ([list]) : paths
    - ...

In [9]:
percent = 0.1

In [10]:
dev_paths = {}

for machine_type in machine_types:
    # dev train
    dev_train_paths = [f"{dev_dir}/{machine_type}/train/" + file for file in os.listdir(f"{dev_dir}/{machine_type}/train")]
    dev_train_paths = sorted(dev_train_paths)
    
    print(f'Machine {machine_type}', end=',')
    print(f'dev_train_size : {len(dev_train_paths)}', end=', ')
    dev_train_source_paths = [path for path in dev_train_paths if 'source' in path]
    print(f'source_size : {len(dev_train_source_paths)}', end=', ')
    dev_train_target_paths = [path for path in dev_train_paths if 'target' in path]
    print(f'target_size : {len(dev_train_target_paths)}', end=', ')
    # source mode
    dev_train_paths = dev_train_source_paths + dev_train_target_paths
    
    # add_dev train
    add_dev_paths = [f"{add_dev_dir}/{machine_type}/train/" + file for file in os.listdir(f"{add_dev_dir}/{machine_type}/train")]
    add_dev_paths = sorted(add_dev_paths)
    print(f'adddev_train_size : {len(add_dev_paths)}', end=', ')
    add_dev_source_paths = [path for path in add_dev_paths if 'source' in path]
    print(f'source_size : {len(add_dev_source_paths)}', end=', ')
    add_dev_target_paths = [path for path in add_dev_paths if 'target' in path]
    print(f'target_size : {len(add_dev_target_paths)}')
    # source mode
    add_dev_paths = add_dev_source_paths + add_dev_target_paths
    print('==============================================================')
    
    # dev_source valid
    dev_source_paths = [f"{dev_dir}/{machine_type}/source_test/" + file for file in os.listdir(f"{dev_dir}/{machine_type}/source_test")]
    dev_source_paths = sorted(dev_source_paths)
    
    # dev_target valid
    dev_target_paths = [f"{dev_dir}/{machine_type}/target_test/" + file for file in os.listdir(f"{dev_dir}/{machine_type}/target_test")]
    dev_target_paths = sorted(dev_target_paths)
    
    # bundle
    dev_paths[machine_type] = {}
    dev_paths[machine_type]['train'] = dev_train_paths + add_dev_paths
    dev_paths[machine_type]['valid_source'] = dev_source_paths
    dev_paths[machine_type]['valid_target'] = dev_target_paths

Machine pump,dev_train_size : 3009, source_size : 3000, target_size : 9, adddev_train_size : 3009, source_size : 3000, target_size : 9
Machine gearbox,dev_train_size : 3026, source_size : 3017, target_size : 9, adddev_train_size : 3114, source_size : 3105, target_size : 9
Machine slider,dev_train_size : 3009, source_size : 3000, target_size : 9, adddev_train_size : 3009, source_size : 3000, target_size : 9
Machine ToyCar,dev_train_size : 3009, source_size : 3000, target_size : 9, adddev_train_size : 3009, source_size : 3000, target_size : 9
Machine valve,dev_train_size : 3009, source_size : 3000, target_size : 9, adddev_train_size : 3009, source_size : 3000, target_size : 9
Machine ToyTrain,dev_train_size : 3009, source_size : 3000, target_size : 9, adddev_train_size : 3009, source_size : 3000, target_size : 9
Machine fan,dev_train_size : 3009, source_size : 3000, target_size : 9, adddev_train_size : 3009, source_size : 3000, target_size : 9


In [11]:
machine_types

['pump', 'gearbox', 'slider', 'ToyCar', 'valve', 'ToyTrain', 'fan']

In [12]:
#############################################################################
# run
#############################################################################
def run(machine_type, dev_paths):
    com.tic()
    os.makedirs(TB_DIR+'/'+machine_type, exist_ok=True)
    tb_log_dir = TB_DIR + '/' + machine_type
    writer = SummaryWriter(log_dir = tb_log_dir)
    
    logger.info('TARGET MACHINE_TYPE: {0}'.format(machine_type))
    logger.info('MAKE DATA_LOADER')
    # dev_train_paths
    dataloaders_dict = modeler.make_dataloader(dev_paths, machine_type)
    # parameter setting
    logger.info('TRAINING')
    model = Model(n_out=36, n_centers=6)
    
    optimizer = torch.optim.Adam(model.parameters(),lr=1e-4)
    output_dict, model, pred_df, score_df = modeler.run_training(model, dataloaders_dict, writer, optimizer)
    
    # output
    feature_out_path = f'{OUT_FEATURE_DIR}/{machine_type}_features.pkl'
    model_out_path = f'{MODEL_DIR}/{machine_type}_model.pkl'
    pred_out_path = f'{PRED_DIR}/{machine_type}_pred.csv'
    score_out_path = f'{SCORE_DIR}/{machine_type}_pred.csv'
    # save
    pd.to_pickle(output_dict, feature_out_path)
    torch.save(model.state_dict(), model_out_path)
    pred_df.to_csv(pred_out_path)
    score_df.to_csv(score_out_path)
    logger.info(f'SAVE SUCCESS : {model_out_path}')
    
    com.toc()

In [13]:
#run(machine_types[0], dev_paths)

In [None]:
for machine_type in machine_types:
    run(machine_type, dev_paths)

2021-10-24 12:42:48,057 - 00_train.py - INFO - TARGET MACHINE_TYPE: pump
2021-10-24 12:42:48,058 - 00_train.py - INFO - MAKE DATA_LOADER
2021-10-24 12:42:48,063 - 00_train.py - INFO - TRAINING


use: cuda:0


100%|██████████| 48/48 [00:48<00:00,  1.01s/it]
100%|██████████| 5/5 [00:02<00:00,  2.36it/s]
100%|██████████| 5/5 [00:02<00:00,  2.45it/s]
2021-10-24 12:43:43,506 - pytorch_modeler.py - INFO - epoch:1/100, tr_loss:1.578283, src_loss:1.216158, src_mean_auc:0.454300, tgt_loss:1.182868, tgt_mean_auc:0.489000, mean_auc:0.471650,


Unnamed: 0,AUC,pAUC
Source_0,0.4451,0.496316
Source_1,0.363,0.479474
Source_2,0.5548,0.522632
Target_0,0.5281,0.496316
Target_1,0.4045,0.488947
Target_2,0.5344,0.505789
mean,0.47165,0.498246
h_mean,0.460088,0.497883


100%|██████████| 48/48 [00:48<00:00,  1.00s/it]
100%|██████████| 5/5 [00:02<00:00,  2.47it/s]
100%|██████████| 5/5 [00:02<00:00,  2.46it/s]
2021-10-24 12:44:35,818 - pytorch_modeler.py - INFO - epoch:2/100, tr_loss:0.838177, src_loss:1.101755, src_mean_auc:0.467033, tgt_loss:1.019442, tgt_mean_auc:0.521467, mean_auc:0.494250,


Unnamed: 0,AUC,pAUC
Source_0,0.4833,0.511053
Source_1,0.4232,0.499474
Source_2,0.4946,0.503684
Target_0,0.5956,0.582632
Target_1,0.4875,0.51
Target_2,0.4813,0.506316
mean,0.49425,0.51886
h_mean,0.489322,0.517401


100%|██████████| 48/48 [00:47<00:00,  1.01it/s]
100%|██████████| 5/5 [00:02<00:00,  2.39it/s]
100%|██████████| 5/5 [00:02<00:00,  2.49it/s]
2021-10-24 12:45:27,317 - pytorch_modeler.py - INFO - epoch:3/100, tr_loss:0.595007, src_loss:1.044951, src_mean_auc:0.473650, tgt_loss:0.977383, tgt_mean_auc:0.531167, mean_auc:0.502408,


Unnamed: 0,AUC,pAUC
Source_0,0.4581,0.506316
Source_1,0.4622,0.532105
Source_2,0.50065,0.500526
Target_0,0.5962,0.553158
Target_1,0.5154,0.501579
Target_2,0.4819,0.503158
mean,0.502408,0.51614
h_mean,0.498482,0.515409


100%|██████████| 48/48 [00:47<00:00,  1.02it/s]
100%|██████████| 5/5 [00:02<00:00,  2.29it/s]
100%|██████████| 5/5 [00:02<00:00,  2.47it/s]
2021-10-24 12:46:18,801 - pytorch_modeler.py - INFO - epoch:4/100, tr_loss:0.431540, src_loss:1.010416, src_mean_auc:0.435000, tgt_loss:0.922159, tgt_mean_auc:0.543267, mean_auc:0.489133,


Unnamed: 0,AUC,pAUC
Source_0,0.4053,0.506316
Source_1,0.4427,0.551053
Source_2,0.457,0.484737
Target_0,0.6075,0.567895
Target_1,0.5308,0.492105
Target_2,0.4915,0.496316
mean,0.489133,0.516404
h_mean,0.480808,0.514555


100%|██████████| 48/48 [00:47<00:00,  1.01it/s]
100%|██████████| 5/5 [00:02<00:00,  2.33it/s]
100%|██████████| 5/5 [00:02<00:00,  2.50it/s]
2021-10-24 12:47:10,305 - pytorch_modeler.py - INFO - epoch:5/100, tr_loss:0.344460, src_loss:1.090561, src_mean_auc:0.437933, tgt_loss:0.918843, tgt_mean_auc:0.525250, mean_auc:0.481592,


Unnamed: 0,AUC,pAUC
Source_0,0.3761,0.493158
Source_1,0.4618,0.547368
Source_2,0.4759,0.486316
Target_0,0.5335,0.53
Target_1,0.57075,0.489474
Target_2,0.4715,0.48
mean,0.481592,0.504386
h_mean,0.473448,0.503183


100%|██████████| 48/48 [00:47<00:00,  1.01it/s]
100%|██████████| 5/5 [00:02<00:00,  2.50it/s]
100%|██████████| 5/5 [00:02<00:00,  2.48it/s]
2021-10-24 12:48:01,668 - pytorch_modeler.py - INFO - epoch:6/100, tr_loss:0.291915, src_loss:1.099331, src_mean_auc:0.410000, tgt_loss:1.046444, tgt_mean_auc:0.484900, mean_auc:0.447450,


Unnamed: 0,AUC,pAUC
Source_0,0.4239,0.514211
Source_1,0.3646,0.523684
Source_2,0.4415,0.482105
Target_0,0.5135,0.522105
Target_1,0.4925,0.484737
Target_2,0.4487,0.481579
mean,0.44745,0.501404
h_mean,0.442044,0.500696


 29%|██▉       | 14/48 [00:14<00:34,  1.01s/it]

In [None]:
#run(machine_types[0], dev_paths)

# run