# show reconstruct image

In [1]:
import pickle
import yaml
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import torch.nn.functional as F

In [2]:
########################################################################
# load config
########################################################################
with open("./config.yaml", 'rb') as f:
    config = yaml.load(f)

  """


In [3]:
########################################################################
# Setting I/O path
########################################################################
# input dirs
INPUT_ROOT = config['IO_OPTION']['INPUT_ROOT']
dev_path = INPUT_ROOT + "/dev_data"
add_dev_path = INPUT_ROOT + "/add_dev_data"
eval_path = INPUT_ROOT + "/eval_test"
MODEL_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/models'
# machine type
MACHINE_TYPE = config['IO_OPTION']['MACHINE_TYPE']
machine_types = os.listdir(dev_path)
# output dirs
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
RESULT_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/result'
RECONS_OUTDIR = OUTPUT_ROOT +'/eval_reconstruct_img'
PKL_DIR = OUTPUT_ROOT +'/pkl'
os.makedirs(MODEL_DIR, exist_ok=True)

# eval

## import library

In [4]:
########################################################################
# import default python-library
########################################################################
import os
import glob
import csv
import re
import itertools
import sys
from collections import defaultdict
########################################################################


########################################################################
# import additional python-library
########################################################################
import numpy
from sklearn import metrics
import common as com
import pytorch_modeler as modeler
from pytorch_model import DAGMM as Model
import torch.utils.data
import yaml
yaml.warnings({'YAMLLoadWarning': False})
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
########################################################################
import eval_functions as eval_func
from pytorch_utils import to_var

## Setting

In [5]:
########################################################################
# load config
########################################################################
with open("./config.yaml", 'rb') as f:
    config = yaml.load(f)

  and should_run_async(code)


In [6]:
########################################################################
# Setting seed
########################################################################
modeler.set_seed(42)

In [7]:
########################################################################
# Setting I/O path
########################################################################
# input dirs
INPUT_ROOT = config['IO_OPTION']['INPUT_ROOT']
dev_path = INPUT_ROOT + "/dev_data"
add_dev_path = INPUT_ROOT + "/add_dev_data"
eval_path = INPUT_ROOT + "/eval_test"
MODEL_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/models'
# machine type
MACHINE_TYPE = config['IO_OPTION']['MACHINE_TYPE']
machine_types = os.listdir(dev_path)
# output dirs
OUTPUT_ROOT = config['IO_OPTION']['OUTPUT_ROOT']
RESULT_DIR = config['IO_OPTION']['OUTPUT_ROOT'] + '/result'
RECONS_OUTDIR = OUTPUT_ROOT +'/eval_reconstruct_img'
os.makedirs(MODEL_DIR, exist_ok=True)

In [8]:
########################################################################
# for original function
########################################################################
param = {}
param["dev_directory"] = dev_path
param["eval_directory"] = eval_path
param["model_directory"] = MODEL_DIR
param["result_directory"] = RESULT_DIR
param["result_file"] = 'result.csv'

from sklearn.metrics import mean_squared_error as mse

def calc_time_anomaly(x, y, label, file_name):
    fig = plt.figure(figsize=(10,5)) # width, height
    fig.suptitle('label={}'.format(int(label)))
    time_anomaly = np.zeros((x.shape[0]))
    for frame in range(x.shape[0]):
        time_anomaly[frame] = mse(y[frame,:],x[frame,:])
    plt.plot(time_anomaly)
    plt.title(f'label:{label}')

time_anomaly.mean()

plt.plot(time_anomaly)

plt.figure(figsize=(5,5))
plt.imshow(np.abs(x-y), aspect='auto')

## run eval

In [9]:
def make_reconstruct_img(x, y, label, file_name):
    fig = plt.figure(figsize=(10,5)) # width, height
    fig.suptitle('label={}'.format(int(label)))
    ax1 = fig.add_subplot(121, title='x') # 明示的にAxesを作成する
    sns.heatmap(x.T, ax=ax1) # ax1を参照するようにする
    ax2 = fig.add_subplot(122, title='y')
    sns.heatmap(y.T, ax=ax2)
    fig.savefig('{}.png'.format(file_name))

In [10]:
mode = 'dev'

In [11]:
from torchvision import transforms
import preprocessing as prep

class extract_waveform(object):
    """
    wavデータロード(波形)
    
    Attributes
    ----------
    sound_data : waveform
    """
    def __init__(self, sound_data=None):
        self.sound_data = sound_data
    
    def __call__(self, sample):
        self.sound_data = com.file_load(sample['wav_name'],
                                        sr=config['preprocessing']['sample_rate'],
                                        mono=config['preprocessing']['mono'])
        self.sound_data = self.sound_data[0]
        self.label = np.array(sample['label'])
        self.wav_name = sample['wav_name']
        
        return {'feature': self.sound_data, 'label': self.label, 'wav_name': self.wav_name}

class ToTensor(object):
    """
    Convert ndarrays in sample to Tensors.
    """

    def __call__(self, sample):
        feature, label, wav_name = sample['feature'], sample['label'], sample['wav_name']
        
        return {'feature': torch.from_numpy(feature).float(), 'label': torch.from_numpy(label), 'wav_name': wav_name}

class DCASE_task2_Dataset_test(torch.utils.data.Dataset):
    '''
    Attribute
    ----------
    
    '''
    
    def __init__(self, file_list, transform=None):
        self.transform = transform
        self.file_list = file_list
        
    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, idx):
        file_path = self.file_list[idx]
        # ファイル名でlabelを判断
        if "normal" in file_path:
            label = 0
        else:
            label = 1
        
        sample = {'wav_name':file_path, 'label':np.array(label)}
        sample = self.transform(sample)
        
        return sample

def make_dataloader_train(paths):
    transform = transforms.Compose([
        extract_waveform(),
        ToTensor()
    ])
    dataset = DCASE_task2_Dataset_test(paths, transform=transform)
    
    train_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=config['fit']['batch_size'],
        shuffle=True,
        num_workers=2,
        pin_memory=True
        )
    
    return test_loader
    
def make_dataloader_test(paths):
    transform = transforms.Compose([
        extract_waveform(),
        ToTensor()
    ])
    dataset = DCASE_task2_Dataset_test(paths, transform=transform)
    
    test_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=128,
        shuffle=False,
        num_workers=2,
        pin_memory=True
        )
    
    return test_loader

In [12]:
############################################################################
# make path set and train/valid split
############################################################################
'''
train_paths[machine_type]['train' or 'valid'] = path
'''
dev_train_paths = {}
add_train_paths = {}
train_paths = {}

for machine_type in machine_types:
    # dev train
    dev_train_paths = ["{}/{}/train/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(dev_path, machine_type))]
    dev_train_paths = sorted(dev_train_paths)
    # add_dev train
    add_train_paths = ["{}/{}/train/".format(add_dev_path, machine_type) + file for file in os.listdir("{}/{}/train".format(add_dev_path, machine_type))]
    add_train_paths = sorted(add_train_paths)
    # valid
    dev_valid_paths = ["{}/{}/test/".format(dev_path, machine_type) + file for file in os.listdir("{}/{}/test".format(dev_path, machine_type))]
    dev_valid_paths = sorted(dev_valid_paths)
    
    train_paths[machine_type] = {}
    train_paths[machine_type]['train'] = dev_train_paths + add_train_paths
    train_paths[machine_type]['valid'] = dev_valid_paths

In [13]:
gmm_base_path = '/media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver3/models'

In [14]:
#def run_eval(param, mode):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# make output result directory
os.makedirs(RESULT_DIR, exist_ok=True)

# load base directory
dirs = com.select_dirs(param=param, mode=mode)

# initialize lines in csv for AUC and pAUC
csv_lines = []


# loop of the base directory
for idx, target_dir in enumerate(dirs[2:]):
    com.logger.info("===========================")
    com.logger.info("[{idx}/{total}] {dirname}".format(
        dirname=target_dir, idx=idx+1, total=len(dirs)))

    machine_type = os.path.split(target_dir)[1]

    com.logger.info("============== MODEL LOAD ==============")

    model_file = "{model}/{machine_type}_model.pth".format(
        model=param["model_directory"],
        machine_type=machine_type)

    if not os.path.exists(model_file):
        com.logger.error("{} model not found ".format(machine_type))
        sys.exit(-1)

    # define AE model
    model = Model(sample_rate=config['preprocessing']['sample_rate'],
                  window_size=config['preprocessing']['window_size'],
                  hop_size=config['preprocessing']['hop_size'],
                  mel_bins=config['preprocessing']['mel_bins'],
                  fmin=config['preprocessing']['fmin'],
                  fmax=config['preprocessing']['fmax'],
                  latent_size=config['fit']['latent_size'],
                  mixture_size=config['fit']['mixture_size']).to(device)
    model.eval()
    model.load_state_dict(torch.load(model_file))

    if mode:
        # results by type
        csv_lines.append([machine_type])
        csv_lines.append(["id", "AUC", "pAUC"])
        performance = []

    machine_id_list = eval_func.get_machine_id_list_for_test(target_dir)
    recons_outpath = RECONS_OUTDIR + '/' + machine_type
    os.makedirs(recons_outpath, exist_ok=True)
    
    # calc train GMM param
    com.logger.info(f"============== CALC GMM PARAM : {machine_type} ==============")
    
    #train_loader = make_dataloader_test(train_paths[machine_type]['train'])
    gmm_path = gmm_base_path + f'/{machine_type}_gmm_param.pkl'
    gmm_param = pd.read_pickle(gmm_path)
    # evaluation
    for id_str in machine_id_list:

        # load list of test files
        test_files, y_true = eval_func.test_file_list_generator(target_dir, id_str, mode)

        # setup anomaly score file path
        anomaly_score_csv = \
            "{result}/anomaly_score_{machine_type}_{id_str}.csv"\
            .format(result=param["result_directory"],
                    machine_type=machine_type,
                    id_str=id_str)
        anomaly_score_list = []

        com.logger.info(
            "============== BEGIN TEST FOR A MACHINE ID ==============")

        y_pred = []
        anomaly_count = 0
        normal_count = 0
        
        test_loader = make_dataloader_test(test_files)
        start_idx = 0
        end_idx = 0
        slicing = None

        with torch.no_grad():
            for it, data in enumerate(tqdm(test_loader)):
                try:
                    feature = data['feature']
                    feature = to_var(feature)
                    label = data['label']
                    file_path = data['wav_name']
                    # reconstruction through auto encoder in pytorch
                    with torch.no_grad():
                        nn_out = model(feature)
                        z, _ = nn_out['z'], nn_out['gamma']
                        sample_energy, cov_diag = model.compute_energy(z, phi=gmm_param[0], mu=gmm_param[1], cov=gmm_param[2], size_average=False)
                        preds = sample_energy.data.cpu().numpy()
                        if it == 0:
                            y_pred = preds.copy()
                        else:
                            y_pred = np.concatenate([y_pred, preds], axis=0)

                    for idx in range(len(file_path)):
                        anomaly_score_list.append([os.path.basename(file_path[idx]), preds[idx]])
                except FileNotFoundError:
                    com.logger.error("file broken!!")

        # save anomaly score
        eval_func.save_csv(save_file_path=anomaly_score_csv,
                           save_data=anomaly_score_list)
        com.logger.info(
            "anomaly score result ->  {}".format(anomaly_score_csv))

        if mode:
            # append AUC and pAUC to lists
            auc = metrics.roc_auc_score(y_true, y_pred)
            p_auc = metrics.roc_auc_score(
                y_true, y_pred, max_fpr=config["etc"]["max_fpr"])
            csv_lines.append([id_str.split("_", 1)[1], auc, p_auc])
            performance.append([auc, p_auc])
            com.logger.info("AUC : {}".format(auc))
            com.logger.info("pAUC : {}".format(p_auc))

        com.logger.info(
            "============ END OF TEST FOR A MACHINE ID ============")

    if mode:
        # calculate averages for AUCs and pAUCs
        averaged_performance = numpy.mean(
            numpy.array(performance, dtype=float), axis=0)
        csv_lines.append(["Average"] + list(averaged_performance))
        csv_lines.append([])

if mode:
    # output results
    result_path = "{result}/{file_name}".format(
        result=param["result_directory"],
        file_name=param["result_file"])
    com.logger.info("AUC and pAUC results -> {}".format(result_path))
    eval_func.save_csv(save_file_path=result_path, save_data=csv_lines)

2020-12-01 23:23:49,476 - INFO - load_directory <- development
2020-12-01 23:23:49,479 - INFO - [1/6] /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/fan
2020-12-01 23:23:50,432 - INFO - target_dir : /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/fan_id_00
2020-12-01 23:23:50,438 - INFO - test_file  num : 489
  0%|          | 0/4 [00:00<?, ?it/s]




100%|██████████| 4/4 [00:00<00:00,  4.09it/s]
2020-12-01 23:23:51,420 - INFO - anomaly score result ->  /media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver3/result/anomaly_score_fan_id_00.csv
2020-12-01 23:23:51,424 - INFO - AUC : 0.4083804627249357
2020-12-01 23:23:51,424 - INFO - pAUC : 0.4987018888347123
2020-12-01 23:23:51,425 - INFO - target_dir : /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/fan_id_02
2020-12-01 23:23:51,432 - INFO - test_file  num : 441
  0%|          | 0/4 [00:00<?, ?it/s]




100%|██████████| 4/4 [00:00<00:00,  6.30it/s]
2020-12-01 23:23:52,072 - INFO - anomaly score result ->  /media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver3/result/anomaly_score_fan_id_02.csv
2020-12-01 23:23:52,074 - INFO - AUC : 0.8455131964809384
2020-12-01 23:23:52,075 - INFO - pAUC : 0.6541132890878222
2020-12-01 23:23:52,076 - INFO - target_dir : /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/fan_id_04
2020-12-01 23:23:52,082 - INFO - test_file  num : 430
  0%|          | 0/4 [00:00<?, ?it/s]




100%|██████████| 4/4 [00:00<00:00,  5.89it/s]
2020-12-01 23:23:52,764 - INFO - anomaly score result ->  /media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver3/result/anomaly_score_fan_id_04.csv
2020-12-01 23:23:52,767 - INFO - AUC : 0.46618181818181825
2020-12-01 23:23:52,768 - INFO - pAUC : 0.49783549783549785
2020-12-01 23:23:52,769 - INFO - target_dir : /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/fan_id_06
2020-12-01 23:23:52,775 - INFO - test_file  num : 443
  0%|          | 0/4 [00:00<?, ?it/s]




100%|██████████| 4/4 [00:00<00:00,  5.91it/s]
2020-12-01 23:23:53,455 - INFO - anomaly score result ->  /media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver3/result/anomaly_score_fan_id_06.csv
2020-12-01 23:23:53,459 - INFO - AUC : 0.7072886297376093
2020-12-01 23:23:53,460 - INFO - pAUC : 0.7075341414761394
2020-12-01 23:23:53,462 - INFO - [2/6] /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/pump
2020-12-01 23:23:54,304 - INFO - target_dir : /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/pump_id_00
2020-12-01 23:23:54,307 - INFO - test_file  num : 237
  0%|          | 0/2 [00:00<?, ?it/s]




100%|██████████| 2/2 [00:00<00:00,  4.63it/s]
2020-12-01 23:23:54,743 - INFO - anomaly score result ->  /media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver3/result/anomaly_score_pump_id_00.csv
2020-12-01 23:23:54,746 - INFO - AUC : 0.5516788321167884
2020-12-01 23:23:54,747 - INFO - pAUC : 0.5090280445639647
2020-12-01 23:23:54,748 - INFO - target_dir : /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/pump_id_02
2020-12-01 23:23:54,752 - INFO - test_file  num : 205
  0%|          | 0/2 [00:00<?, ?it/s]




100%|██████████| 2/2 [00:00<00:00,  4.55it/s]
2020-12-01 23:23:55,195 - INFO - anomaly score result ->  /media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver3/result/anomaly_score_pump_id_02.csv
2020-12-01 23:23:55,198 - INFO - AUC : 0.5903809523809525
2020-12-01 23:23:55,199 - INFO - pAUC : 0.5333333333333333
2020-12-01 23:23:55,201 - INFO - target_dir : /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/pump_id_04
2020-12-01 23:23:55,205 - INFO - test_file  num : 194
  0%|          | 0/2 [00:00<?, ?it/s]




100%|██████████| 2/2 [00:00<00:00,  4.81it/s]
2020-12-01 23:23:55,624 - INFO - anomaly score result ->  /media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver3/result/anomaly_score_pump_id_04.csv
2020-12-01 23:23:55,627 - INFO - AUC : 0.8589361702127659
2020-12-01 23:23:55,628 - INFO - pAUC : 0.8353863381858903
2020-12-01 23:23:55,629 - INFO - target_dir : /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/pump_id_06
2020-12-01 23:23:55,633 - INFO - test_file  num : 196
  0%|          | 0/2 [00:00<?, ?it/s]




100%|██████████| 2/2 [00:00<00:00,  4.77it/s]
2020-12-01 23:23:56,056 - INFO - anomaly score result ->  /media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver3/result/anomaly_score_pump_id_06.csv
2020-12-01 23:23:56,059 - INFO - AUC : 0.6140625
2020-12-01 23:23:56,060 - INFO - pAUC : 0.5148026315789473
2020-12-01 23:23:56,062 - INFO - [3/6] /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/slider
2020-12-01 23:23:57,911 - INFO - target_dir : /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/slider_id_00
2020-12-01 23:23:57,916 - INFO - test_file  num : 445
  0%|          | 0/4 [00:00<?, ?it/s]




100%|██████████| 4/4 [00:00<00:00,  5.67it/s]
2020-12-01 23:23:58,626 - INFO - anomaly score result ->  /media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver3/result/anomaly_score_slider_id_00.csv
2020-12-01 23:23:58,629 - INFO - AUC : 0.5123623188405797
2020-12-01 23:23:58,629 - INFO - pAUC : 0.49588100686498854
2020-12-01 23:23:58,630 - INFO - target_dir : /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/slider_id_02
2020-12-01 23:23:58,635 - INFO - test_file  num : 356
  0%|          | 0/3 [00:00<?, ?it/s]




100%|██████████| 3/3 [00:00<00:00,  4.44it/s]
2020-12-01 23:23:59,316 - INFO - anomaly score result ->  /media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver3/result/anomaly_score_slider_id_02.csv
2020-12-01 23:23:59,320 - INFO - AUC : 0.577578125
2020-12-01 23:23:59,321 - INFO - pAUC : 0.5078125
2020-12-01 23:23:59,322 - INFO - target_dir : /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/slider_id_04
2020-12-01 23:23:59,326 - INFO - test_file  num : 267
  0%|          | 0/3 [00:00<?, ?it/s]




100%|██████████| 3/3 [00:00<00:00,  5.86it/s]
2020-12-01 23:23:59,841 - INFO - anomaly score result ->  /media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver3/result/anomaly_score_slider_id_04.csv
2020-12-01 23:23:59,843 - INFO - AUC : 0.4381437125748503
2020-12-01 23:23:59,844 - INFO - pAUC : 0.5568862275449101
2020-12-01 23:23:59,845 - INFO - target_dir : /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/slider_id_06
2020-12-01 23:23:59,850 - INFO - test_file  num : 178
  0%|          | 0/2 [00:00<?, ?it/s]




100%|██████████| 2/2 [00:00<00:00,  4.79it/s]
2020-12-01 23:24:00,271 - INFO - anomaly score result ->  /media/hiroki/working/research/dcase2020/result/2D/DAGMM/strict_comp/latent5_mixture3_ver3/result/anomaly_score_slider_id_06.csv
2020-12-01 23:24:00,274 - INFO - AUC : 0.6009615384615384
2020-12-01 23:24:00,275 - INFO - pAUC : 0.5489203778677463
2020-12-01 23:24:00,276 - INFO - [4/6] /media/hiroki/working/research/dcase2020/datasets/DCASE2/dev_data/valve
2020-12-01 23:24:00,278 - ERROR - valve model not found 


SystemExit: -1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
