In [1]:
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score, multilabel_confusion_matrix 
from sklearn.model_selection import train_test_split
from ModelHandler import ModelHandler
import pickle
import h5py
from sklearn.metrics import f1_score, precision_score, recall_score     
from tqdm import tqdm

configs = {
    "N_GRIDS": 5, 
    "SIGNAL_BASE_LENGTH": 12800, 
    "N_CLASS": 26, 
    "USE_NO_LOAD": False, 
    "AUGMENTATION_RATIO": 5, 
    "MARGIN_RATIO": 0.15, 
    "DATASET_PATH": "Synthetic_Full_iHall.hdf5",
    "TRAIN_SIZE": 0.8,
    "FOLDER_PATH": "tmp/aug2_newloss_kfold/", 
    "FOLDER_DATA_PATH": "tmp/aug2_newloss_kfold/", 
    "N_EPOCHS_TRAINING": 250,
    "INITIAL_EPOCH": 0,
    "TOTAL_MAX_EPOCHS": 250,
    "SNRdb": None # Nível de ruído em db
}


folderPath = "tmp/aug2_newloss_kfold/"
folderDataPath = "tmp/aug2_newloss_kfold/"
signalBaseLength = 12800
trainSize = 0.8
ngrids = 5

FOLD = True

dict_data = pickle.load(open(folderDataPath + "sorted_aug_data_" + str(ngrids) + "_" + str(signalBaseLength) + ".p", "rb")) # Load data
x_train = dict_data["x_train"]
x_test = dict_data["x_test"]
y_train = dict_data["y_train"]
y_test = dict_data["y_test"]

datasetPath = "Synthetic_Full_iHall.hdf5"

all_labels = []

arq = h5py.File(datasetPath, "r")
loads_list = ["1", "2", "3", "8"]
for load_qtd in loads_list:
    labels = arq[load_qtd]["labels"]  
    events = arq[load_qtd]["events"]    
    for waveform_labels, event in zip(labels, events):
        event_index = np.argwhere(event != 0)
        # events = event[event_index]
        for label, ev in zip(waveform_labels, event_index):
            all_labels.append(event[ev][0] * (label + 1))

copia_all_labels = all_labels.copy()
for i in range(int((len(y_test["classification"]) + len(y_train["classification"]))/len(copia_all_labels)) - 1):
    for label in copia_all_labels:
        all_labels.append(label)

labels_train, labels_test = train_test_split(all_labels, train_size=int(trainSize * len(all_labels)), random_state = 42)

In [3]:
from PostProcessing import PostProcessing
from DataHandler import DataHandler

postProcessing = PostProcessing(configs=configs)
dataHandler = DataHandler(configs=configs)
bestModel = ModelHandler.loadModel(folderPath + "best_model.h5", type_weights=None) # Load model

group_distribution = {
    "1": 4139,
    "2": 6916,
    "3": 7128,
    "8": 2629
}

#_, general_qtd = dataHandler.generateAcquisitionType(trainSize, distribution=group_distribution)
general_qtd = np.load("general_test_qtd.npy")

pcMetric_fold, dMetric_fold = postProcessing.checkModel(bestModel, x_test, y_test, general_qtd=general_qtd, print_error=False)

Total time: 166.09219209699586, Average Time: 0.0398972356706692
LIT-SYN-1 PCmetric: (1.0, 0.9875, 0.9935064935064936)
LIT-SYN-1 Dmetric: (0.7837837837837838, 0.5949367088607594, 0.6862745098039216)
LIT-SYN-2 PCmetric: (0.9766536964980544, 0.981203007518797, 0.9789674952198852)
LIT-SYN-2 Dmetric: (0.7729083665338645, 0.5747126436781609, 0.671875)
LIT-SYN-3 PCmetric: (0.9339622641509434, 0.9768211920529801, 0.9548387096774194)
LIT-SYN-3 Dmetric: (0.8316498316498316, 0.7423728813559322, 0.7871621621621622)
LIT-SYN-8 PCmetric: (0.8729281767955801, 0.9551282051282052, 0.9109792284866469)
LIT-SYN-8 Dmetric: (1.0506329113924051, 0.9664429530201343, 1.009771986970684)
LIT-SYN-All PCmetric: (0.9397590361445783, 0.9751243781094527, 0.9571603427172583)
LIT-SYN-All Dmetric: (0.8525641025641025, 0.7142857142857143, 0.7832480818414322)


In [15]:
if FOLD == False:
    threshold = 0.5
    final_prediction = []
    final_prediction_with_detection = []
    final_groundTruth = []

    bestModel = ModelHandler.loadModel(folderPath + "best_model.h5", type_weights=None) # Load model
    for xi, yclass, ytype in tqdm(zip(x_test, y_test["classification"], y_test["type"])):
        pred = bestModel.predict(np.expand_dims(xi, axis=0))
        prediction = np.max(pred[2][0],axis=0) # > threshold
        groundTruth = np.max(yclass,axis=0) # > threshold

        det = np.array([np.argmax(i) for i in ytype])
        prediction_with_detection = np.max(pred[2][0],axis=0) > 2 # Gambiarra
        if (det != 2).any():
            prediction_with_detection = np.max(pred[2][0],axis=0) # > threshold

        final_prediction.append(prediction)
        final_groundTruth.append(groundTruth)
        final_prediction_with_detection.append(prediction_with_detection)
    
    final_groundTruth = np.array(final_groundTruth)
    final_prediction = np.array(final_prediction)
    final_prediction_with_detection = np.array(final_prediction_with_detection)

4163it [03:32, 19.59it/s]


In [16]:
from sklearn.metrics import f1_score, precision_score, recall_score   

threshold = 0.5
f1_score(final_groundTruth > threshold, final_prediction > threshold, average='macro')

# train: 98,53% (0.9852785999750767 - F1 Macro)

0.9753754774236904

In [17]:
correct = np.zeros((26, 1))
total = np.zeros((26, 1))

threshold = 0.5
for ytrue, ypred in zip(final_groundTruth, final_prediction):
    correct[np.bitwise_and((ytrue > threshold), (ypred > threshold))] += 1
    total[ytrue > threshold] += 1

np.average(correct / total)

0.9801652438196159

In [17]:
if FOLD == False:
    correct_on = np.zeros((26,1))
    total_on = np.zeros((26,1))
    correct_off = np.zeros((26,1))
    total_off = np.zeros((26,1))

    for ytrue, ypred in zip(labels_test, final_prediction):
        if ytrue > 0:
            ytrue = ytrue - 1
            total_on[ytrue] += 1
            if ypred[ytrue] == 1:
                correct_on[ytrue] += 1
        elif ytrue < 0:
            ytrue = -1 * ytrue - 1 # Multiplica por -1 para voltar a ser positivo e subtrai 1 para voltar a começar do índice 0
            total_off[ytrue] += 1
            if ypred[ytrue] == 1:
                correct_off[ytrue] += 1

    acc_on = np.average(correct_on/total_on) % 100
    acc_off = np.average(correct_off/total_off) % 100
    acc = np.average((correct_on + correct_off)/(total_on + total_off)) % 100
    print(f"Acc total: {acc}, Acc on: {acc_on}, Acc off: {acc_off}")

Acc total: 0.03395798747846221, Acc on: 0.027627572503155458, Acc off: 0.04152930059182529


In [2]:
from tqdm import tqdm

if FOLD == True:
    X_all = np.vstack((x_train, x_test))
    ydet_all = np.vstack((y_train["detection"], y_test["detection"]))
    ytype_all = np.vstack((y_train["type"], y_test["type"]))
    yclass_all = np.vstack((y_train["classification"], y_test["classification"]))
    all_labels = np.vstack((labels_train, labels_test))

    final_acc_on, final_acc_off, final_acc = [], [], []
    y = {}
    for fold in tqdm(range(1, 11)):
        foldFolderPath = folderPath + str(fold) + "/"
        
        train_index = np.load(foldFolderPath + "train_index.npy")
        test_index = np.load(foldFolderPath + "test_index.npy")

        bestModel = ModelHandler.loadModel(foldFolderPath + "best_model.h5", type_weights=None) # Load model

        x_train = X_all[train_index]
        x_test = X_all[test_index]
        y_train["detection"] = ydet_all[train_index]
        y_test["detection"] = ydet_all[test_index]
        y_train["type"] = ytype_all[train_index]
        y_test["type"] = ytype_all[test_index]
        y_train["classification"] = yclass_all[train_index]
        y_test["classification"] = yclass_all[test_index]

        final_prediction = []
        final_prediction_with_detection = []
        final_groundTruth = []
        for xi, yclass, ytype in zip(x_test, y_test["classification"], y_test["type"]):
            pred = bestModel.predict(np.expand_dims(xi, axis=0))
            prediction = np.max(pred[2][0],axis=0)
            groundTruth = np.max(yclass,axis=0)

            det = np.array([np.argmax(i) for i in ytype])
            prediction_with_detection = np.max(pred[2][0],axis=0) > 2 # Gambiarra
            if (det != 2).any():
                prediction_with_detection = np.max(pred[2][0],axis=0)

            final_prediction.append(prediction)
            final_groundTruth.append(groundTruth) 
            final_prediction_with_detection.append(prediction_with_detection)

            del xi, yclass, ytype

        y[fold] = {}
        y[fold]["true"] = final_groundTruth.copy()
        y[fold]["pred"] = final_prediction.copy()
        y[fold]["pred_with_detection"] = final_prediction_with_detection.copy()

        # print(f"Predicted fold {fold}")

100%|██████████| 10/10 [12:34<00:00, 75.43s/it]


In [3]:
if FOLD == True:
    from sklearn.metrics import f1_score, precision_score, recall_score   
    from PostProcessing import PostProcessing

    postProcessing = PostProcessing(configs=configs)

    threshold = 0.5

    f1_macro, f1_micro = [], []
    # f1_macro_det, f1_micro_det = [], []

    for fold in range(1, 11):
        # fold_f1_macro_with_detection, fold_f1_micro_with_detection = postProcessing.f1_with_detection(bestModel, x_test, y_test, print_error=False)
        # f1_macro_det.append(fold_f1_macro_with_detection)
        # f1_micro_det.append(fold_f1_micro_with_detection)

        f1_macro.append(f1_score(np.array(y[fold]["true"]) > threshold, np.array(y[fold]["pred"]) > threshold, average='macro'))
        f1_micro.append(f1_score(np.array(y[fold]["true"]) > threshold, np.array(y[fold]["pred"]) > threshold, average='micro'))

        # print(f"Fold {fold}: F1 Macro: {f1_macro[-1] * 100:.2f}, F1 Micro: {f1_micro[-1] * 100:.2f}, F1 Macro det: {f1_macro_det[-1] * 100:.2f}, F1 Micro det: {f1_micro_det[-1] * 100:.2f}")
        print(f"Fold {fold}: F1 Macro: {f1_macro[-1] * 100:.1f}, F1 Micro: {f1_micro[-1] * 100:.1f}")

    print(f"Average: F1 Macro: {np.average(f1_macro) * 100:.1f}, F1 Micro: {np.average(f1_micro) * 100:.1f}")

Fold 1: F1 Macro: 96.9, F1 Micro: 97.3
Fold 2: F1 Macro: 96.6, F1 Micro: 96.3
Fold 3: F1 Macro: 96.5, F1 Micro: 96.5
Fold 4: F1 Macro: 97.1, F1 Micro: 97.1
Fold 5: F1 Macro: 97.8, F1 Micro: 97.4
Fold 6: F1 Macro: 96.8, F1 Micro: 97.2
Fold 7: F1 Macro: 96.8, F1 Micro: 96.6
Fold 8: F1 Macro: 96.5, F1 Micro: 96.5
Fold 9: F1 Macro: 97.6, F1 Micro: 97.1
Fold 10: F1 Macro: 95.5, F1 Micro: 96.1
Average: F1 Macro: 96.8, F1 Micro: 96.8


In [2]:
if FOLD == True:
    from PostProcessing import PostProcessing
    from DataHandler import DataHandler

    postProcessing = PostProcessing(configs=configs)
    dataHandler = DataHandler(configs=configs)

    group_distribution = {
        "1": 4139,
        "2": 6916,
        "3": 7128,
        "8": 2629
    }

    general_qtd_train, general_qtd_test = dataHandler.generateAcquisitionType(trainSize, distribution=group_distribution)
    X_all = np.vstack((x_train, x_test))
    ydet_all = np.vstack((y_train["detection"], y_test["detection"]))
    ytype_all = np.vstack((y_train["type"], y_test["type"]))
    yclass_all = np.vstack((y_train["classification"], y_test["classification"]))
    all_labels = np.vstack((labels_train, labels_test))

    general_qtd = np.vstack((np.expand_dims(general_qtd_train, axis=1), np.expand_dims(general_qtd_test, axis=1)))

    pcMetric, dMetric = [], []
    for fold in range(1, 11):
        foldFolderPath = folderPath + str(fold) + "/"
        
        train_index = np.load(foldFolderPath + "train_index.npy")
        test_index = np.load(foldFolderPath + "test_index.npy")

        bestModel = ModelHandler.loadModel(foldFolderPath + "best_model.h5", type_weights=None) # Load model

        x_train = X_all[train_index]
        x_test = X_all[test_index]
        y_train["detection"] = ydet_all[train_index]
        y_test["detection"] = ydet_all[test_index]
        y_train["type"] = ytype_all[train_index]
        y_test["type"] = ytype_all[test_index]
        y_train["classification"] = yclass_all[train_index]
        y_test["classification"] = yclass_all[test_index]

        general_qtd_test = general_qtd[test_index]

        print(f"-------------- FOLD {fold} ---------------")
        pcMetric_fold, dMetric_fold = postProcessing.checkModel(bestModel, x_test, y_test, general_qtd=general_qtd_test, print_error=False)
        pcMetric.append(pcMetric_fold)
        dMetric.append(dMetric_fold)

    print("------------ AVERAGE --------------")
    print(f"Average, PCMetric: {np.average(pcMetric)}, dMetric: {np.average(dMetric)}")

-------------- FOLD 1 ---------------
Total time: 125.0754116000644, Average Time: 0.059959449472705845
LIT-SYN-1 PCmetric: (1.0, 1.0, 1.0)
LIT-SYN-1 Dmetric: (0.7837837837837838, 0.71875, 0.7536231884057971)
LIT-SYN-2 PCmetric: (0.991304347826087, 0.9791666666666666, 0.9845559845559846)
LIT-SYN-2 Dmetric: (0.7456140350877193, 0.6808510638297872, 0.7098039215686275)
LIT-SYN-3 PCmetric: (0.9870967741935484, 0.9840425531914894, 0.9854227405247813)
LIT-SYN-3 Dmetric: (0.8431372549019608, 0.8324324324324325, 0.8372781065088757)
LIT-SYN-8 PCmetric: (0.9142857142857143, 0.9324324324324325, 0.9236111111111112)
LIT-SYN-8 Dmetric: (1.25, 1.1014492753623188, 1.1729323308270676)
LIT-SYN-All PCmetric: (0.9761273209549072, 0.9748858447488584, 0.9754601226993865)
LIT-SYN-All Dmetric: (0.8777173913043478, 0.8173302107728337, 0.8452830188679246)
-------------- FOLD 2 ---------------
Total time: 129.35549321994768, Average Time: 0.06183340976096925
LIT-SYN-1 PCmetric: (0.96875, 1.0, 0.9855072463768116)

In [8]:
print("------------ AVERAGE --------------")
avgPCMetric = np.average(pcMetric, axis=0) * 100
avgDMetric = np.average(dMetric, axis=0)
for i, subset in enumerate(["1", "2", "3", "8", "All"]):
    print(f"Average, LIT-SYN-{subset}, PCMetric - On: {avgPCMetric[i][0]:.1f}, Off: {avgPCMetric[i][1]:.1f}, Total: {avgPCMetric[i][2]:.1f}")
    print(f"Average, LIT-SYN-{subset}, DMetric - On: {avgDMetric[i][0]:.1f}, Off: {avgDMetric[i][1]:.1f}, Total: {avgDMetric[i][2]:.1f}")

------------ AVERAGE --------------
Average, LIT-SYN-1, PCMetric - On: 98.8, Off: 99.6, Total: 99.2
Average, LIT-SYN-1, DMetric - On: 0.8, Off: 0.6, Total: 0.7
Average, LIT-SYN-2, PCMetric - On: 97.9, Off: 98.7, Total: 98.3
Average, LIT-SYN-2, DMetric - On: 0.7, Off: 0.6, Total: 0.7
Average, LIT-SYN-3, PCMetric - On: 94.2, Off: 98.3, Total: 96.3
Average, LIT-SYN-3, DMetric - On: 0.8, Off: 0.8, Total: 0.8
Average, LIT-SYN-8, PCMetric - On: 89.0, Off: 90.2, Total: 89.6
Average, LIT-SYN-8, DMetric - On: 1.0, Off: 1.0, Total: 1.0
Average, LIT-SYN-All, PCMetric - On: 94.8, Off: 96.9, Total: 95.8
Average, LIT-SYN-All, DMetric - On: 0.8, Off: 0.8, Total: 0.8


### Cálculo da acurácia 

Acurácia a definição e considerando cada saída da rede de classificação como um classificador binário

$$
\begin{gather*}
Acc_i = \frac{TP}{TP + FP} \\ \\
Acc = \frac{1}{N} \sum_{i = 1}^{N} Acc_i
\end{gather*}
$$

- Acc_i: Acurácia para a carga i

In [13]:
threshold = 0.5

ytype_all = np.vstack((y_train["type"], y_test["type"]))

acc_on, acc_off, acc_no_event, acc_total = [], [], [], []
for fold in range(1, 11):
    correct_on = np.zeros((26,1))
    total_on = np.zeros((26,1))
    correct_off = np.zeros((26,1))
    total_off = np.zeros((26,1))
    correct_no_event = np.zeros((26,1))
    total_no_event = np.zeros((26,1))

    train_index = np.load(folderPath + str(fold) + "/train_index.npy")
    test_index = np.load(folderPath + str(fold) + "/test_index.npy")

    ytype_train = ytype_all[train_index]
    ytype_test = ytype_all[test_index]

    for ytype, ytrue, ypred in zip(ytype_test, y[fold]["true"], y[fold]["pred"]):
        event_type = np.min(np.argmax(ytype, axis=1))
        if event_type == 0:
            correct_on[np.bitwise_and(ytrue > threshold, ypred > threshold)] += 1
            total_on[ytrue > threshold] += 1
        elif event_type == 1:
            correct_off[np.bitwise_and(ytrue > threshold, ypred > threshold)] += 1
            total_off[ytrue > threshold] += 1
        else:
            correct_no_event[np.bitwise_and(ytrue > threshold, ypred > threshold)] += 1
            total_no_event[ytrue > threshold] += 1
    
    acc_on.append(100 * np.average(np.nan_to_num(correct_on/total_on)))
    acc_off.append(100 * np.average(np.nan_to_num(correct_off/total_off)))
    acc_no_event.append(100 * np.average(np.nan_to_num(correct_no_event/total_no_event)))
    acc_total.append(100 * np.average(np.nan_to_num((correct_on + correct_off + correct_no_event)/(total_on + total_off + total_no_event))))

    print(f"Fold {fold}, Acc on: {acc_on[-1]:.1f}, Acc off: {acc_off[-1]:.1f}, Acc no event: {acc_no_event[-1]:.1f} Acc total: {acc_total[-1]:.1f}")

print(f"Total, Acc on: {np.average(acc_on):.1f}, Acc off: {np.average(acc_off):.1f}, Acc no event: {np.average(acc_no_event):.1f}, Acc total: {np.average(acc_total):.1f}")

Fold 1, Acc on: 99.2, Acc off: 97.6, Acc no event: 95.4 Acc total: 96.7
Fold 2, Acc on: 95.7, Acc off: 93.8, Acc no event: 97.4 Acc total: 97.0
Fold 3, Acc on: 95.6, Acc off: 97.6, Acc no event: 96.9 Acc total: 96.9
Fold 4, Acc on: 97.3, Acc off: 96.8, Acc no event: 97.1 Acc total: 97.1
Fold 5, Acc on: 99.0, Acc off: 97.9, Acc no event: 98.5 Acc total: 98.5
Fold 6, Acc on: 97.3, Acc off: 97.5, Acc no event: 96.7 Acc total: 97.0
Fold 7, Acc on: 97.8, Acc off: 97.6, Acc no event: 96.8 Acc total: 97.1
Fold 8, Acc on: 95.8, Acc off: 94.7, Acc no event: 97.7 Acc total: 96.7
Fold 9, Acc on: 96.9, Acc off: 94.9, Acc no event: 97.6 Acc total: 97.7
Fold 10, Acc on: 95.6, Acc off: 92.4, Acc no event: 96.3 Acc total: 95.8
Total, Acc on: 97.0, Acc off: 96.1, Acc no event: 97.0, Acc total: 97.0


In [3]:
threshold = 0.5

all_labels = np.vstack((labels_train, labels_test))

acc_on, acc_off, acc_total = [], [], []
for fold in range(1, 11):
    correct_on = np.zeros((26,1))
    total_on = np.zeros((26,1))
    correct_off = np.zeros((26,1))
    total_off = np.zeros((26,1))

    train_index = np.load(folderPath + str(fold) + "/train_index.npy")
    test_index = np.load(folderPath + str(fold) + "/test_index.npy")

    labels_train = all_labels[train_index]
    labels_test = all_labels[test_index]

    for label, ytrue, ypred in zip(labels_test, y[fold]["true"], y[fold]["pred"]):
        if label > 0:
            correct_on[(ytrue > threshold) == (ypred > threshold)] += 1
            total_on[ytrue > threshold] += 1
        elif label < 0:
            correct_off[np.bitwise_and(ytrue > threshold, ypred > threshold)] += 1
            total_off[ytrue > threshold] += 1

    # total_on = np.where(total_on == 0, correct_on, total_on)
    # total_off = np.where(total_off == 0, correct_off, total_off)    
    
    acc_on.append(np.average(np.nan_to_num(correct_on/total_on)))
    acc_off.append(np.average(np.nan_to_num(correct_off/total_off)))
    acc_total.append(np.average(np.nan_to_num((correct_on + correct_off)/(total_on + total_off))))

    print(f"Fold {fold}, Acc on: {acc_on[-1]}, Acc off: {acc_off[-1]}, Acc total: {acc_total[-1]}")

print(f"Total, Acc on: {np.average(acc_on)}, Acc off: {np.average(acc_off)}, Acc total: {np.average(acc_total)}")

Fold 1, Acc on: 0.9431281226120443, Acc off: 0.9870077802926042, Acc total: 0.9758275127936348
Fold 2, Acc on: 0.9543069157601172, Acc off: 0.9930085296482326, Acc total: 0.9684629239937494
Fold 3, Acc on: 0.9526084057536229, Acc off: 0.9862392079219131, Acc total: 0.9690908704370735
Fold 4, Acc on: 0.9502807985347455, Acc off: 0.9889651665328318, Acc total: 0.9809545914084766
Fold 5, Acc on: 0.9816589508098907, Acc off: 0.9825139185044682, Acc total: 0.9824282046641915
Fold 6, Acc on: 0.9743425143263184, Acc off: 0.9890447656456492, Acc total: 0.981763816654446
Fold 7, Acc on: 0.9613606732632907, Acc off: 0.9840428812431075, Acc total: 0.9723644291135973
Fold 8, Acc on: 0.9533552515979224, Acc off: 0.985447421355359, Acc total: 0.9623539480934623
Fold 9, Acc on: 0.9844494129346484, Acc off: 0.9892635081876955, Acc total: 0.9864418885754864
Fold 10, Acc on: 0.9313551116986496, Acc off: 0.9921141804938952, Acc total: 0.9696432031934634
Total, Acc on: 0.958684615729125, Acc off: 0.987764