# WHOLE

In [1]:
import os
import pickle
import numpy as np
import pandas as pd
from scipy.stats import hmean
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, recall_score, precision_score, auc, roc_curve

import warnings
warnings.filterwarnings('ignore')

In [2]:
eval_train = pd.read_csv('../processed_data/evaluation_train.csv')
eval_test = pd.read_csv('../processed_data/evaluation_test.csv')
eval_eval = pd.read_csv('../processed_data/evaluation_eval.csv')

In [3]:
with open('result_emb/train_emb.pickle', 'rb') as f:
    result_train = pickle.load(f)
f.close()

with open('result_emb/test_emb.pickle', 'rb') as f:
    result_test = pickle.load(f)
f.close()

with open('result_emb/eval_emb.pickle', 'rb') as f:
    result_eval = pickle.load(f)
f.close()

In [4]:
result_train.shape, result_test.shape, result_eval.shape

((16000, 512), (1400, 512), (1800, 512))

In [5]:
def length_norm(mat):
    norm_mat = []
    for line in mat:
        temp = line / np.sqrt(sum(np.power(line, 2)))
        norm_mat.append(temp)
    norm_mat = np.array(norm_mat)
    return norm_mat

In [6]:
result_train_ln = length_norm(result_train)
result_test_ln = length_norm(result_test)
result_eval_ln = length_norm(result_eval)

In [7]:
source_train = np.array(eval_train[eval_train['domain']=='source'].index)
target_train = eval_train[eval_train['domain']=='target'].index

In [8]:
kmeans = KMeans(n_clusters=16, random_state=42).fit(result_train_ln[source_train])
centers = kmeans.cluster_centers_

In [9]:
a = np.min(1-np.dot(result_test_ln, centers.transpose()), axis=-1, keepdims=True)
b = np.min(1-np.dot(result_test_ln, result_train_ln[target_train].transpose()), axis=-1, keepdims=True)
cos = np.minimum(a, b)

eval_test['anomaly_score'] = cos

In [10]:
source_train = np.array(eval_train[eval_train['domain']=='source'].index)
target_train = np.array(eval_train[eval_train['domain']=='target'].index)

kmeans = KMeans(n_clusters=16, random_state=42).fit(result_train_ln[source_train])
centers = kmeans.cluster_centers_

a = np.min(1-np.dot(result_train_ln, centers.transpose()), axis=-1, keepdims=True)
b = np.min(1-np.dot(result_train_ln, result_train_ln[target_train].transpose()), axis=-1, keepdims=True)
cos_train = np.minimum(a, b)
eval_train['anomaly_score'] = cos_train

a = np.min(1-np.dot(result_test_ln, centers.transpose()), axis=-1, keepdims=True)
b = np.min(1-np.dot(result_test_ln, result_train_ln[target_train].transpose()), axis=-1, keepdims=True)
cos_test = np.minimum(a, b)
eval_test['anomaly_score'] = cos_test

a = np.min(1-np.dot(result_eval_ln, centers.transpose()), axis=-1, keepdims=True)
b = np.min(1-np.dot(result_eval_ln, result_train_ln[target_train].transpose()), axis=-1, keepdims=True)
cos_eval = np.minimum(a, b)
eval_eval['anomaly_score'] = cos_eval

aucs = []
p_aucs = []
aucs_source = []
p_aucs_source = []
aucs_target = []
p_aucs_target = []
ths = []

machine_list = eval_test['machine'].unique()
for machine in machine_list:
    auc_source_machine = []
    auc_target_machine = []
    p_auc_machine = []
    temp = eval_test[eval_test['machine']==machine]
    temp.drop(columns='machine', inplace=True)
    temp['audio_path'] = temp['audio_path'].apply(lambda x: x.split('/')[-1])
    temp = temp.sort_values(by='audio_path')
    temp = temp.reset_index(drop=True)
    true = temp['label'].values
    cos = temp['anomaly_score'].values
    fpr, tpr, thresholds = roc_curve(true, cos)
    J = tpr - fpr
    optimal_idx = np.argmax(J)
    optimal_threshold = thresholds[optimal_idx]
    ths.append(optimal_threshold)
    # print('threshold: {}'.format(optimal_threshold))
    decisions = (cos>optimal_threshold).astype(int)
    accuracy = accuracy_score(true, decisions)
    f1 = f1_score(true, decisions)
    recall = recall_score(true, decisions)
    precision = precision_score(true, decisions)
    # print(accuracy, f1, recall, precision)

    accuracy = (decisions==true).sum()/200
    # print('accuracy: {}%'.format(accuracy*100))
    auc = roc_auc_score(true, cos)
    p_auc = roc_auc_score(true, cos, max_fpr=0.1)
    aucs.append(auc)
    p_aucs.append(p_auc)
    # print('AUC of ' + machine + ': ' + str(auc * 100))
    # print('pAUC of ' + machine + ': ' + str(p_auc * 100))
    
        
    temp_source = temp[temp['domain']=='source']
    true_source = temp_source['label'].values
    cos_source = temp_source['anomaly_score'].values
    auc = roc_auc_score(true_source, cos_source)
    p_auc = roc_auc_score(true_source, cos_source, max_fpr=0.1)
    aucs_source.append(auc)
    p_aucs_source.append(p_auc)
    auc_source_machine.append(auc)
    p_auc_machine.append(p_auc)
    # print('AUC for source domain of ' + machine + ': ' + str(auc * 100))
    # print('pAUC for source domain of ' + machine + ': ' + str(p_auc * 100))
        
    temp_target = temp[temp['domain']=='target']
    true_target = temp_target['label'].values
    cos_target = temp_target['anomaly_score'].values
    auc = roc_auc_score(true_target, cos_target)
    p_auc = roc_auc_score(true_target, cos_target, max_fpr=0.1)
    aucs_target.append(auc)
    p_aucs_target.append(p_auc)
    auc_target_machine.append(auc)
    p_auc_machine.append(p_auc)
    # print('AUC for target domain of ' + machine + ': ' + str(auc * 100))
    # print('pAUC for target domain of ' + machine + ': ' + str(p_auc * 100))
    
    print(f'Machine: {machine}, hmean: {hmean(auc_source_machine+auc_target_machine+p_auc_machine)*100:.2f}')
    
    # print('==============================')
    # print('==============================')
    # print('==============================')

mean_auc = hmean(aucs)
# print('mean AUC: ' + str(mean_auc * 100))
mean_p_auc = hmean(p_aucs)
# print('mean pAUC: ' + str(mean_p_auc * 100))  
mean_auc_source = hmean(aucs_source)
# print('mean AUC for source domain: ' + str(mean_auc_source * 100))
mean_p_auc_source = hmean(p_aucs_source)
# print('mean pAUC for source domain: ' + str(mean_p_auc_source * 100))
mean_auc_target = hmean(aucs_target)
# print('mean AUC for target domain: ' + str(mean_auc_target * 100))
mean_p_auc_target = hmean(p_aucs_target)
# print('mean pAUC for target domain: ' + str(mean_p_auc_target * 100))

score = hmean(aucs_source + aucs_target + p_aucs)
print(f'final score : {score*100:.2f}')

Machine: gearbox, hmean: 58.85
Machine: ToyTrain, hmean: 50.98
Machine: ToyCar, hmean: 47.32
Machine: valve, hmean: 55.77
Machine: slider, hmean: 68.70
Machine: fan, hmean: 55.64
Machine: bearing, hmean: 54.22
final score : 55.25


In [11]:
baseline1 = [66.98, 33.75, 48.77, 76.63, 46.92, 47.95, 62.01, 61.4, 57.58, 67.71, 55.24, 57.53, 70.4, 69.34, 55.65, 66.51, 56.01, 61.77, 51.07, 46.25, 52.42]
baseline2 = [63.01, 37.35, 51.04, 61.99, 39.99, 48.21, 54.43, 51.58, 58.82, 79.37, 42.7, 53.44, 81.82, 74.35, 55.74, 75.35, 68.11, 49.05, 55.69, 53.61, 51.26]

print(hmean(baseline1))
print(hmean(baseline2))

55.81113469542821
55.019889950673054
