# train

In [1]:
import yaml
import os

with open("./config.yaml", 'rb') as f:
    config = yaml.load(f)

  config = yaml.load(f)


In [2]:
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
os.makedirs(OUTPUT_ROOT, exist_ok=True)

## load library

In [3]:
# python default library
import os
import shutil
import datetime
import sys
import pickle

# general analysis tool-kit
import numpy as np
import pandas as pd
#from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns

# pytorch
import torch
from torch import optim, nn
from torch.utils.tensorboard import SummaryWriter

#from torchsummary import summary

# etc
import yaml
yaml.warnings({'YAMLLoadWarning': False})

# original library
sys.path.append('/home/hiroki/research/dcase2021_task2/src/functions')
import common as com
import pytorch_modeler as modeler
from pytorch_model import ResNet38 as Model
#from pytorch_utils import filtered_load_model
#import models

import librosa
import IPython
import librosa.display

## load config and set logger

In [4]:
log_folder = config['IO_OPTION']['OUTPUT_ROOT']+'/{0}.log'.format(datetime.date.today())
logger = com.setup_logger(log_folder, '00_train.py')

## Setting

In [5]:
# Setting seed
modeler.set_seed(42)

In [6]:
############################################################################
# Setting I/O path
############################################################################
# input dirs
INPUT_ROOT = config['IO_OPTION']['INPUT_ROOT']
dev_dir = INPUT_ROOT + "/dev_data"
add_dev_dir = INPUT_ROOT + "/add_dev_data"
# machine type
machine_types = os.listdir(dev_dir)
# output dirs
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
MODEL_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/models'
TB_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/tb'
OUT_FEATURE_DIR = OUTPUT_ROOT + '/extraction_features'
#os.makedirs(OUTPUT_ROOT, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(TB_DIR, exist_ok=True)
os.makedirs(OUT_FEATURE_DIR, exist_ok=True)
# copy config
shutil.copy('./config.yaml', OUTPUT_ROOT)

'/media/hiroki/HDD1TB/research/dcase2021_task2/output/CNN_finetune/config.yaml'

In [7]:
torch.cuda.is_available()

True

## make path list

- __train_paths__ ([dict]): Each Machine Type
    - __ToyCar__ ([dict]): Each dataset
        - train ([list]) : paths
        - valid_source ([list]) : paths
        - valid_target ([list]) :paths
    - __ToyTrain__ ([dict]): Each dataset
        - train ([list]) : paths
        - valid_source ([list]) : paths
        - valid_target ([list]) : paths
    - ...

In [8]:
dev_paths = {}

for machine_type in machine_types:
    # dev train
    dev_train_paths = [f"{dev_dir}/{machine_type}/train/" + file for file in os.listdir(f"{dev_dir}/{machine_type}/train")]
    dev_train_paths = sorted(dev_train_paths)
    
    # add_dev train
    add_dev_paths = [f"{add_dev_dir}/{machine_type}/train/" + file for file in os.listdir(f"{add_dev_dir}/{machine_type}/train")]
    add_dev_paths = sorted(add_dev_paths)
    
    # dev_source valid
    dev_source_paths = [f"{dev_dir}/{machine_type}/source_test/" + file for file in os.listdir(f"{dev_dir}/{machine_type}/source_test")]
    dev_source_paths = sorted(dev_source_paths)
    
    # dev_target valid
    dev_target_paths = [f"{dev_dir}/{machine_type}/target_test/" + file for file in os.listdir(f"{dev_dir}/{machine_type}/target_test")]
    dev_target_paths = sorted(dev_target_paths)
    
    # bundle
    dev_paths[machine_type] = {}
    dev_paths[machine_type]['train'] = dev_train_paths + add_dev_paths
    dev_paths[machine_type]['valid_source'] = dev_source_paths
    dev_paths[machine_type]['valid_target'] = dev_target_paths

## training

In [9]:
#############################################################################
# run
#############################################################################
def run(machine_type, dev_paths):
    com.tic()
    
    logger.info('TARGET MACHINE_TYPE: {0}'.format(machine_type))
    logger.info('MAKE DATA_LOADER')
    # dataloader
    dataloaders_dict = modeler.make_dataloader(dev_paths, machine_type)
    # define writer for tensorbord
    os.makedirs(TB_DIR+'/'+machine_type, exist_ok=True)
    tb_log_dir = TB_DIR + '/' + machine_type
    writer = SummaryWriter(log_dir = tb_log_dir)
    # model out path
    model_out_path = MODEL_DIR+'/{}_model.pth'.format(machine_type)
    logger.info('TRAINING')
    # parameter setting
    net = Model(sample_rate=config['param']['sample_rate'],
                window_size=config['param']['window_size'],
                hop_size=config['param']['hop_size'],
                mel_bins=config['param']['mel_bins'],
                fmin=config['param']['fmin'],
                fmax=config['param']['fmax'],
                classes_num=6
                )
    optimizer = optim.Adam(net.parameters())
    criterion = nn.CrossEntropyLoss()
    num_epochs = config['param']['num_epochs']
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e2, 
                                              max_lr=1e-3, epochs=num_epochs, steps_per_epoch=len(dataloaders_dict['train']))
    # load pre-trained model
    pretrained_dict = torch.load(config['IO_OPTION']['PREMODEL_ROOT'])
    net.load_state_dict(pretrained_dict['model'], strict=False)
    # training
    output_dicts = modeler.train_net(net, dataloaders_dict, optimizer, criterion, scheduler, num_epochs, writer, model_out_path)
    
    com.toc()

In [10]:
machine_types

['fan', 'gearbox', 'pump', 'slider', 'ToyCar', 'ToyTrain', 'valve']

In [11]:
for machine_type in machine_types[4:]:
    run(machine_type, dev_paths)

2021-04-09 09:10:12,399 - 00_train.py - INFO - TARGET MACHINE_TYPE: pump
2021-04-09 09:10:12,401 - 00_train.py - INFO - MAKE DATA_LOADER
2021-04-09 09:10:12,403 - 00_train.py - INFO - TRAINING
2021-04-09 09:10:16,043 - pytorch_modeler.py - INFO - train
  0%|          | 0/189 [00:00<?, ?it/s]

use: cuda:0


100%|██████████| 189/189 [03:43<00:00,  1.18s/it]
2021-04-09 09:13:59,520 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 19/19 [00:08<00:00,  2.16it/s]
2021-04-09 09:14:10,831 - pytorch_modeler.py - INFO - Save best model
2021-04-09 09:14:10,846 - pytorch_modeler.py - INFO - valid_target
100%|██████████| 19/19 [00:08<00:00,  2.37it/s]
2021-04-09 09:14:18,851 - pytorch_modeler.py - INFO - epoch:1/40, train_losses:25.133854746818542, val_source_losses:1.437139, val_target_losses:1.322834, best_flag:True
2021-04-09 09:14:18,852 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [03:43<00:00,  1.18s/it]
2021-04-09 09:18:02,081 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 19/19 [00:07<00:00,  2.67it/s]
2021-04-09 09:18:11,741 - pytorch_modeler.py - INFO - Save best model
2021-04-09 09:18:11,758 - pytorch_modeler.py - INFO - valid_target
100%|██████████| 19/19 [00:07<00:00,  2.51it/s]
2021-04-09 09:18:19,323 - pytorch_modeler.py - INFO - epoch:2/40, trai

100%|██████████| 19/19 [00:06<00:00,  2.81it/s]
2021-04-09 10:10:19,488 - pytorch_modeler.py - INFO - epoch:16/40, train_losses:7.291762966196984, val_source_losses:0.853144, val_target_losses:0.383777, best_flag:False
2021-04-09 10:10:19,489 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [03:24<00:00,  1.08s/it]
2021-04-09 10:13:43,847 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 19/19 [00:06<00:00,  2.81it/s]
2021-04-09 10:13:50,611 - pytorch_modeler.py - INFO - valid_target
100%|██████████| 19/19 [00:06<00:00,  2.81it/s]
2021-04-09 10:13:57,378 - pytorch_modeler.py - INFO - epoch:17/40, train_losses:9.398371011018753, val_source_losses:0.874661, val_target_losses:0.494651, best_flag:False
2021-04-09 10:13:57,379 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [03:24<00:00,  1.08s/it]
2021-04-09 10:17:21,774 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 19/19 [00:06<00:00,  2.81it/s]
2021-04-09 10:17:28,548 - pytorch_modeler.py 

100%|██████████| 19/19 [00:06<00:00,  2.80it/s]
2021-04-09 11:10:05,327 - pytorch_modeler.py - INFO - epoch:32/40, train_losses:6.289684124290943, val_source_losses:0.818312, val_target_losses:0.331036, best_flag:False
2021-04-09 11:10:05,328 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [03:24<00:00,  1.08s/it]
2021-04-09 11:13:29,958 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 19/19 [00:06<00:00,  2.80it/s]
2021-04-09 11:13:36,754 - pytorch_modeler.py - INFO - valid_target
100%|██████████| 19/19 [00:06<00:00,  2.80it/s]
2021-04-09 11:13:43,532 - pytorch_modeler.py - INFO - epoch:33/40, train_losses:7.765311596449465, val_source_losses:0.899096, val_target_losses:0.408701, best_flag:False
2021-04-09 11:13:43,533 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [03:24<00:00,  1.08s/it]
2021-04-09 11:17:08,349 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 19/19 [00:06<00:00,  2.79it/s]
2021-04-09 11:17:15,165 - pytorch_modeler.py 

elapsed time: 8939.840732813 [sec]


2021-04-09 11:39:14,501 - pytorch_modeler.py - INFO - train
  0%|          | 0/189 [00:00<?, ?it/s]

use: cuda:0


100%|██████████| 189/189 [03:26<00:00,  1.09s/it]
2021-04-09 11:42:41,319 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 20/20 [00:07<00:00,  2.66it/s]
2021-04-09 11:42:51,347 - pytorch_modeler.py - INFO - Save best model
2021-04-09 11:42:51,348 - pytorch_modeler.py - INFO - valid_target
100%|██████████| 19/19 [00:07<00:00,  2.45it/s]
2021-04-09 11:42:59,120 - pytorch_modeler.py - INFO - epoch:1/40, train_losses:37.77060979604721, val_source_losses:1.415523, val_target_losses:1.987927, best_flag:True
2021-04-09 11:42:59,121 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [03:25<00:00,  1.09s/it]
2021-04-09 11:46:24,344 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 20/20 [00:06<00:00,  2.89it/s]
2021-04-09 11:46:33,761 - pytorch_modeler.py - INFO - Save best model
2021-04-09 11:46:33,762 - pytorch_modeler.py - INFO - valid_target
100%|██████████| 19/19 [00:06<00:00,  2.79it/s]
2021-04-09 11:46:40,573 - pytorch_modeler.py - INFO - epoch:2/40, train

2021-04-09 12:37:43,330 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [03:24<00:00,  1.08s/it]
2021-04-09 12:41:08,182 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 20/20 [00:06<00:00,  2.89it/s]
2021-04-09 12:41:15,108 - pytorch_modeler.py - INFO - valid_target
100%|██████████| 19/19 [00:06<00:00,  2.80it/s]
2021-04-09 12:41:21,908 - pytorch_modeler.py - INFO - epoch:17/40, train_losses:117.18955874443054, val_source_losses:1.435330, val_target_losses:6.167872, best_flag:False
2021-04-09 12:41:21,909 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [03:24<00:00,  1.08s/it]
2021-04-09 12:44:46,840 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 20/20 [00:06<00:00,  2.89it/s]
2021-04-09 12:44:53,760 - pytorch_modeler.py - INFO - valid_target
100%|██████████| 19/19 [00:06<00:00,  2.79it/s]
2021-04-09 12:45:00,577 - pytorch_modeler.py - INFO - epoch:18/40, train_losses:125.45873031020164, val_source_losses:1.408120, val_target_losses:6.

100%|██████████| 19/19 [00:06<00:00,  2.79it/s]
2021-04-09 13:36:08,393 - pytorch_modeler.py - INFO - epoch:32/40, train_losses:92.99223117530346, val_source_losses:0.512014, val_target_losses:4.894328, best_flag:False
2021-04-09 13:36:08,394 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [03:24<00:00,  1.08s/it]
2021-04-09 13:39:33,243 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 20/20 [00:06<00:00,  2.89it/s]
2021-04-09 13:39:40,166 - pytorch_modeler.py - INFO - valid_target
100%|██████████| 19/19 [00:06<00:00,  2.79it/s]
2021-04-09 13:39:46,968 - pytorch_modeler.py - INFO - epoch:33/40, train_losses:61.37569776177406, val_source_losses:0.301736, val_target_losses:3.230300, best_flag:False
2021-04-09 13:39:46,969 - pytorch_modeler.py - INFO - train
100%|██████████| 189/189 [03:24<00:00,  1.08s/it]
2021-04-09 13:43:11,852 - pytorch_modeler.py - INFO - valid_source
100%|██████████| 20/20 [00:06<00:00,  2.89it/s]
2021-04-09 13:43:18,778 - pytorch_modeler.py 

elapsed time: 8802.777592421 [sec]


2021-04-09 14:05:57,313 - pytorch_modeler.py - INFO - train
  0%|          | 0/189 [00:00<?, ?it/s]

use: cuda:0


  1%|          | 1/189 [00:05<16:09,  5.16s/it]


RuntimeError: CUDA out of memory. Tried to allocate 32.00 MiB (GPU 0; 11.77 GiB total capacity; 10.12 GiB already allocated; 79.25 MiB free; 10.22 GiB reserved in total by PyTorch)

In [None]:
machine_type = 'pump'
input_path = f'{OUT_FEATURE_DIR}/{machine_type}_features.pkl'
ext_data = pd.read_pickle(input_path)

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(ext_data['train']['features'], cmap='jet')
plt.colorbar()

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(ext_data['valid_source']['features'], cmap='jet')
plt.colorbar()

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(ext_data['train']['features'], cmap='jet')
plt.colorbar()

history = run('ToyCar')
with open('{}/{}_history.pkl'.format(PKL_DIR, machine_type), 'wb') as file:
    pickle.dump(history , file)

In [None]:
print(M_means.shape)
plt.imshow(M_means, aspect='auto', cmap='jet')
plt.title(phase)
plt.colorbar()
plt.show()