# Compare AUC 

The following code aims to compare the performances on different methods 

In [9]:
import sklearn
from sklearn import metrics
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model
import tensorflow_probability as tfp
from tensorflow.keras import backend as K

## import functions
import sys
sys.path.append("..")
from src import GetParquet
from src import Train_test_sets_maker
from src import MinMaxNormalisation
from src import CoxboxTransform
from src import Visualize
from src import MIMII_AE
from src import AE_variant1

In [10]:
def lossCalcMSE(model,data):
    reconstructions = model(data)
    return tf.keras.losses.mse(reconstructions,data)

def lossCalcMAE(model,data):
    reconstructions = model(data)
    return tf.keras.losses.mae(reconstructions,data)

def AUC(test_labels,test_set,model,metric = "mse"):
    
    if metric == "mse":
        lossValues = lossCalcMSE(model,test_set)
    if metric == "mae":
        lossValues = lossCalcMAE(model,test_set)
        
    return 1 - metrics.roc_auc_score(test_labels, lossValues)

In [11]:
def GetNormalizeData(method,ID): 
       # import data
    df_normal, df_abnormal = GetParquet.fun(1,ID,method)
    # create datasets
    train_set, test_set, test_labels = Train_test_sets_maker.fun(df_normal,df_abnormal)
    # normalize [0,1]
    train_set = MinMaxNormalisation.fun(train_set)
    test_set = MinMaxNormalisation.fun(test_set)
    
    return train_set,test_set,test_labels

In [12]:
def GetTF_casted_datasets(train_set,test_set,CB_train_set,CB_test_set):
    
        # save an instance as tensorflow for training
        CB_train_set = tf.cast(CB_train_set, tf.float32)
        CB_test_set = tf.cast(CB_test_set, tf.float32)
        train_set = tf.cast(train_set, tf.float32)
        test_set = tf.cast(test_set, tf.float32)
        
        return train_set, test_set, CB_train_set, CB_test_set

In [15]:
def MIMII_AUC_calc(AUC_list,InputSize,train_set,test_set,test_labels):
    
    """ uses the MSE as loss fun"""

    
    autoencoder = MIMII_AE.fun(InputSize)
        # Train algorithm
    history = autoencoder.fit(train_set, train_set, 
                                  epochs=50, 
                                  batch_size=512,
                                  validation_data=(test_set,test_set),
                                  verbose = 0,
                                  shuffle=True)
                        
    AUC_list.append(AUC(test_labels, test_set, autoencoder))
    
    """ uses the MAE as loss fun"""

    
    autoencoder = MIMII_AE.fun(InputSize,'mae')
        # Train algorithm
    history = autoencoder.fit(train_set, train_set, 
                                  epochs=50, 
                                  batch_size=512,
                                  validation_data=(test_set,test_set),
                                  verbose = 0,
                                  shuffle=True)
    AUC_list.append(AUC(test_labels, test_set, autoencoder, 'mae'))
    print("AUC computed")
    return AUC_list

In [16]:
def Variant_AUC_calc(AUC_list,InputSize,train_set,test_set,test_labels):
    
    """ uses the MSE as loss fun"""
    
    autoencoder = AE_variant1.fun(InputSize,128,8)
        # Train algorithm
    history = autoencoder.fit(train_set, train_set, 
                                  epochs=50, 
                                  batch_size=512,
                                  validation_data=(test_set,test_set),
                                  verbose = 0,
                                  shuffle=True)
                        
    AUC_list.append(AUC(test_labels, test_set, autoencoder))
    
    """ uses the MAE as loss fun"""

    
    autoencoder = AE_variant1.fun(InputSize,128,8,'mae')
        # Train algorithm
    history = autoencoder.fit(train_set, train_set, 
                                  epochs=50, 
                                  batch_size=512,
                                  validation_data=(test_set,test_set),
                                  verbose = 0,
                                  shuffle=True)
    AUC_list.append(AUC(test_labels, test_set, autoencoder, 'mae'))
    print("AUC computed !")
    return AUC_list

In [17]:
method_to_test = [0,1] 
ID_to_test = [1,2,3,4]

AUCs = []
for method in method_to_test:
    for ID in ID_to_test:
        AUC_list = []
        # Get datasets
        train_set, test_set, test_labels = GetNormalizeData(method,ID)
            
        # All datapoints are shifter by 1, as the coxbox supports only STRICTLY positive numbers
        CB_train_set = CoxboxTransform.fun(train_set+1)
        CB_test_set = CoxboxTransform.fun(test_set+1)
        
        # Cast all functions as Tensorflow
        train_set, test_set, CB_train_set, CB_test_set = GetTF_casted_datasets(train_set, test_set, CB_train_set, CB_test_set)
        
        InputSize = train_set.shape[1]
        # Get MIMII_AE for normal datasets
        AUC_list = MIMII_AUC_calc(AUC_list,InputSize,train_set,test_set,test_labels)
        # Get MIMII_AE for gaussian distributed datasets
        AUC_list = MIMII_AUC_calc(AUC_list,InputSize,CB_train_set,CB_test_set,test_labels)
        # Get Variant AE for normal datasets
        AUC_list = Variant_AUC_calc(AUC_list,InputSize,train_set,test_set,test_labels)
        # Get Variant AE for gaussian distributed datasets
        AUC_list = Variant_AUC_calc(AUC_list,InputSize,CB_train_set,CB_test_set,test_labels)
        print("ID" + str(ID) + "completed with method" + str(method) + "!")
        AUCs.append(AUC_list)



Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID1completed with method0!
Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID2completed with method0!
Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID3completed with method0!
Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID4completed with method0!
Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID1completed with method1!
Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID2completed with method1!
Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID3completed with method1!
Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID4complete

In [19]:
import pandas as pd
columns = ['MSE_MIMII', 'MAE_MIMII', 'MSE_Variant','MAE_Variant','CB_MSE_MIMII', 'CB_MAE_MIMII', 'CB_MSE_Variant','CB_MAE_Variant']
df = pd.DataFrame(AUCs, columns = columns)
df

Unnamed: 0,MSE_MIMII,MAE_MIMII,MSE_Variant,MAE_Variant,CB_MSE_MIMII,CB_MAE_MIMII,CB_MSE_Variant,CB_MAE_Variant
0,0.658376,0.658794,0.697815,0.709815,0.654572,0.633375,0.718884,0.70538
1,0.656652,0.76644,0.76771,0.77625,0.652631,0.72356,0.770589,0.799243
2,0.664462,0.75513,0.78606,0.791674,0.647458,0.687185,0.784089,0.775587
3,0.616241,0.628409,0.850743,0.850128,0.594517,0.612972,0.856252,0.858874
4,0.626283,0.629068,0.647632,0.660456,0.625155,0.619015,0.654845,0.672208
5,0.667222,0.724,0.711917,0.734226,0.667555,0.725838,0.737936,0.713585
6,0.617155,0.648621,0.721041,0.72882,0.618214,0.652368,0.731168,0.76388
7,0.61872,0.605303,0.780351,0.805656,0.593731,0.627727,0.782617,0.818941


In [21]:
df['CB_MAE_Variant'].iloc[4:8].mean()

0.7421535831469654

## Changer the microphone to mic 4 

In [22]:
method_to_test = [1] 
ID_to_test = [1,2,3,4]

AUCs = []
for method in method_to_test:
    for ID in ID_to_test:
        AUC_list = []
        # Get datasets
        train_set, test_set, test_labels = GetNormalizeData(method,ID)
            
        # All datapoints are shifter by 1, as the coxbox supports only STRICTLY positive numbers
        CB_train_set = CoxboxTransform.fun(train_set+1)
        CB_test_set = CoxboxTransform.fun(test_set+1)
        
        # Cast all functions as Tensorflow
        train_set, test_set, CB_train_set, CB_test_set = GetTF_casted_datasets(train_set, test_set, CB_train_set, CB_test_set)
        
        InputSize = train_set.shape[1]
        # Get MIMII_AE for normal datasets
        AUC_list = MIMII_AUC_calc(AUC_list,InputSize,train_set,test_set,test_labels)
        # Get MIMII_AE for gaussian distributed datasets
        AUC_list = MIMII_AUC_calc(AUC_list,InputSize,CB_train_set,CB_test_set,test_labels)
        # Get Variant AE for normal datasets
        AUC_list = Variant_AUC_calc(AUC_list,InputSize,train_set,test_set,test_labels)
        # Get Variant AE for gaussian distributed datasets
        AUC_list = Variant_AUC_calc(AUC_list,InputSize,CB_train_set,CB_test_set,test_labels)
        print("ID" + str(ID) + "completed with method" + str(method) + "!")
        AUCs.append(AUC_list)

Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID1completed with method1!
Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID2completed with method1!
Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID3completed with method1!
Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID4completed with method1!


In [23]:
import pandas as pd
columns = ['MSE_MIMII', 'MAE_MIMII', 'MSE_Variant','MAE_Variant','CB_MSE_MIMII', 'CB_MAE_MIMII', 'CB_MSE_Variant','CB_MAE_Variant']
df = pd.DataFrame(AUCs, columns = columns)
df

Unnamed: 0,MSE_MIMII,MAE_MIMII,MSE_Variant,MAE_Variant,CB_MSE_MIMII,CB_MAE_MIMII,CB_MSE_Variant,CB_MAE_Variant
0,0.531656,0.494194,0.745881,0.744024,0.539409,0.495875,0.734724,0.747488
1,0.802469,0.816643,0.780297,0.784318,0.615531,0.782164,0.810141,0.789025
2,0.614326,0.81723,0.776543,0.799687,0.62115,0.691028,0.785975,0.801703
3,0.532356,0.613134,0.85802,0.893967,0.524989,0.617816,0.883189,0.894154


In [24]:
method_to_test = [0] 
ID_to_test = [1,2,3,4]

AUCs = []
for method in method_to_test:
    for ID in ID_to_test:
        AUC_list = []
        # Get datasets
        train_set, test_set, test_labels = GetNormalizeData(method,ID)
            
        # All datapoints are shifter by 1, as the coxbox supports only STRICTLY positive numbers
        CB_train_set = CoxboxTransform.fun(train_set+1)
        CB_test_set = CoxboxTransform.fun(test_set+1)
        
        # Cast all functions as Tensorflow
        train_set, test_set, CB_train_set, CB_test_set = GetTF_casted_datasets(train_set, test_set, CB_train_set, CB_test_set)
        
        InputSize = train_set.shape[1]
        # Get MIMII_AE for normal datasets
        AUC_list = MIMII_AUC_calc(AUC_list,InputSize,train_set,test_set,test_labels)
        # Get MIMII_AE for gaussian distributed datasets
        AUC_list = MIMII_AUC_calc(AUC_list,InputSize,CB_train_set,CB_test_set,test_labels)
        # Get Variant AE for normal datasets
        AUC_list = Variant_AUC_calc(AUC_list,InputSize,train_set,test_set,test_labels)
        # Get Variant AE for gaussian distributed datasets
        AUC_list = Variant_AUC_calc(AUC_list,InputSize,CB_train_set,CB_test_set,test_labels)
        print("ID" + str(ID) + "completed with method" + str(method) + "!")
        AUCs.append(AUC_list)
        
import pandas as pd
columns = ['MSE_MIMII', 'MAE_MIMII', 'MSE_Variant','MAE_Variant','CB_MSE_MIMII', 'CB_MAE_MIMII', 'CB_MSE_Variant','CB_MAE_Variant']
df = pd.DataFrame(AUCs, columns = columns)
df

Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID1completed with method0!
Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID2completed with method0!
Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID3completed with method0!
Data acquired !
Train & test sets created !
AUC computed
AUC computed
AUC computed !
AUC computed !
ID4completed with method0!


Unnamed: 0,MSE_MIMII,MAE_MIMII,MSE_Variant,MAE_Variant,CB_MSE_MIMII,CB_MAE_MIMII,CB_MSE_Variant,CB_MAE_Variant
0,0.670527,0.66056,0.710136,0.711362,0.666632,0.669101,0.696668,0.720765
1,0.664711,0.745462,0.765957,0.782114,0.662544,0.761098,0.767605,0.764801
2,0.622368,0.75859,0.806469,0.78624,0.621579,0.663932,0.795818,0.785096
3,0.598351,0.638657,0.848534,0.861617,0.586029,0.596058,0.864501,0.868289
