# Metrics

Evaluates a trained model accordingly to the metrics specified on the paper

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
!pip install tensorflow==2.4.0
!pip install keras==2.4.0
!pip install kymatio
!pip install tqdm
!pip install iterative-stratification

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow==2.4.0
  Downloading tensorflow-2.4.0-cp37-cp37m-manylinux2010_x86_64.whl (394.7 MB)
[K     |████████████████████████████████| 394.7 MB 17 kB/s 
[?25hCollecting wrapt~=1.12.1
  Downloading wrapt-1.12.1.tar.gz (27 kB)
Collecting gast==0.3.3
  Downloading gast-0.3.3-py2.py3-none-any.whl (9.7 kB)
Collecting numpy~=1.19.2
  Downloading numpy-1.19.5-cp37-cp37m-manylinux2010_x86_64.whl (14.8 MB)
[K     |████████████████████████████████| 14.8 MB 52.0 MB/s 
Collecting typing-extensions~=3.7.4
  Downloading typing_extensions-3.7.4.3-py3-none-any.whl (22 kB)
Collecting grpcio~=1.32.0
  Downloading grpcio-1.32.0-cp37-cp37m-manylinux2014_x86_64.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 44.5 MB/s 
Collecting absl-py~=0.10
  Downloading absl_py-0.15.0-py3-none-any.whl (132 kB)
[K     |████████████████████████████████| 132 kB 63.5 MB/s 
Collecting tensorflo

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keras==2.4.0
  Downloading Keras-2.4.0-py2.py3-none-any.whl (170 kB)
[K     |████████████████████████████████| 170 kB 8.3 MB/s 
Installing collected packages: keras
  Attempting uninstall: keras
    Found existing installation: keras 2.8.0
    Uninstalling keras-2.8.0:
      Successfully uninstalled keras-2.8.0
Successfully installed keras-2.4.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting kymatio
  Downloading kymatio-0.2.1-py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 1.5 MB/s 
Collecting configparser
  Downloading configparser-5.2.0-py3-none-any.whl (19 kB)
Installing collected packages: configparser, kymatio
Successfully installed configparser-5.2.0 kymatio-0.2.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: h

In [None]:
import sys
sys.path.append("drive/MyDrive/Scattering_Novo/src")

import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score, multilabel_confusion_matrix 
from sklearn.model_selection import train_test_split
from ModelHandler import ModelHandler
import pickle
import h5py
from sklearn.metrics import f1_score, precision_score, recall_score     
from tqdm import tqdm

configs = {
    "N_GRIDS": 5, 
    "SIGNAL_BASE_LENGTH": 12800, 
    "N_CLASS": 26, 
    "USE_NO_LOAD": False, 
    "AUGMENTATION_RATIO": 5, 
    "MARGIN_RATIO": 0.15, 
    "DATASET_PATH": "../Synthetic_Full_iHall.hdf5",
    "TRAIN_SIZE": 0.8,
    "FOLDER_PATH": "drive/MyDrive/Scattering_Novo/tmp/DIFDUAL/tests/AND75_4/", 
    "FOLDER_DATA_PATH": "drive/MyDrive/Scattering_Novo/tmp/Without_Detection_Without_HAND/ND100/", 
    "N_EPOCHS_TRAINING": 250,
    "INITIAL_EPOCH": 0,
    "TOTAL_MAX_EPOCHS": 250,
    "SNRdb": None # Noise level on db
}

folderPath = configs["FOLDER_PATH"]
folderDataPath = configs["FOLDER_DATA_PATH"]
signalBaseLength = configs["SIGNAL_BASE_LENGTH"]
ngrids = configs["N_GRIDS"]
trainSize = configs["TRAIN_SIZE"]

dict_data = pickle.load(open(folderDataPath + "data.p", "rb")) # Load data

Using TensorFlow backend.


## Choose best performing model

At this point, the model with best performance under the validation set is chosen.

In order to make this choice, the average between f1 macro is verified.

$$
F_1 = \frac{F1_{ON} + F1_{OFF} + F1_{NO EVENT}}{3}
$$

In [None]:
def choose_model(dict_data, folderPath):
    from tqdm import tqdm
    from sklearn.preprocessing import MaxAbsScaler
    from sklearn.metrics import f1_score, precision_score, recall_score   
    from PostProcessing import PostProcessing

    scattering_extract = ModelHandler.loadModel(configs["FOLDER_PATH"] + 'scattering_model.h5') # Load scattering model

    threshold = 0.5
    f1_macro, f1_micro = [], []
    for fold in tqdm(range(1, 11)):
        foldFolderPath = folderPath + str(fold) + "/"
        
        train_index = np.load(foldFolderPath + "train_index.npy")
        validation_index = np.load(foldFolderPath + "validation_index.npy")

        bestModel = ModelHandler.loadModel(foldFolderPath + "model_without_detection.h5", type_weights=None) # Load model

        scaler = MaxAbsScaler()
        scaler.fit(np.squeeze(dict_data["x_train"][train_index], axis=2))
        x_validation = np.expand_dims(scaler.transform(np.squeeze(dict_data["x_train"][validation_index], axis=2)), axis=2)


        x_validation_type, x_validation_class = scattering_extract.predict(x_validation)

        # Normalizing

        transformer = MaxAbsScaler().fit(x_validation_type)
        x_validation_type = transformer.transform(x_validation_type)

        transformer = MaxAbsScaler().fit(x_validation_class)
        x_validation_class = transformer.transform(x_validation_class)


        final_prediction = []
        final_groundTruth = []
        for xi, xi_nd, yclass, ytype in zip(x_validation_type, x_validation_class, dict_data["y_train"]["classification"][validation_index], dict_data["y_train"]["type"][validation_index]):
          
            pred = bestModel.predict([np.expand_dims(xi, axis=0),np.expand_dims(xi_nd, axis=0)])
            prediction = np.max(pred[1][0],axis=0) # Withou detection, the first index must be one (Related to classification)
            groundTruth = np.max(yclass,axis=0)

            final_prediction.append(prediction)
            final_groundTruth.append(groundTruth) 

            del xi, yclass, ytype

        event_type = np.min(np.argmax(dict_data["y_train"]["type"][validation_index], axis=2), axis=1)

        final_groundTruth = np.array(final_groundTruth)
        final_prediction = np.array(final_prediction)
    
        # TODO: Handle scenarios with and without negative examples (without events). The current approach only makes sense for scenarios without negative examples
        f1_macro.append([f1_score(final_groundTruth[event_type == 0] > threshold, final_prediction[event_type == 0] > threshold, average='macro', zero_division=0), 
                         f1_score(final_groundTruth[event_type == 1] > threshold, final_prediction[event_type == 1] > threshold, average='macro', zero_division=0)])
        print(f"Fold {fold}: F1 Macro avg: {np.average(f1_macro[-1]) * 100:.1f}") 

    return np.argmax(np.average(f1_macro, axis=1)) + 1

fold = choose_model(dict_data, folderPath)




 10%|█         | 1/10 [00:32<04:54, 32.69s/it]

Fold 1: F1 Macro avg: 89.0


 20%|██        | 2/10 [00:56<03:41, 27.72s/it]

Fold 2: F1 Macro avg: 87.0


 30%|███       | 3/10 [01:20<02:59, 25.62s/it]

Fold 3: F1 Macro avg: 92.3


 40%|████      | 4/10 [01:44<02:30, 25.14s/it]

Fold 4: F1 Macro avg: 86.6


 50%|█████     | 5/10 [02:07<02:02, 24.48s/it]

Fold 5: F1 Macro avg: 88.9


 60%|██████    | 6/10 [02:30<01:36, 24.02s/it]

Fold 6: F1 Macro avg: 88.7


 70%|███████   | 7/10 [02:55<01:12, 24.21s/it]

Fold 7: F1 Macro avg: 89.3


 80%|████████  | 8/10 [03:23<00:50, 25.31s/it]

Fold 8: F1 Macro avg: 91.1


 90%|█████████ | 9/10 [03:47<00:25, 25.05s/it]

Fold 9: F1 Macro avg: 85.9


100%|██████████| 10/10 [04:11<00:00, 25.15s/it]

Fold 10: F1 Macro avg: 86.0





## Evaluates the identification

This step generates a dict with the ground truth and the prediction for each test example

In [None]:
from tqdm import tqdm
from sklearn.preprocessing import MaxAbsScaler

foldFolderPath = folderPath + str(fold) + "/"

train_index = np.load(foldFolderPath + "train_index.npy")
validation_index = np.load(foldFolderPath + "validation_index.npy")

bestModel = ModelHandler.loadModel(foldFolderPath + "model_without_detection.h5", type_weights=None) # Load model

scattering_extract = ModelHandler.loadModel(configs["FOLDER_PATH"] + 'scattering_model.h5')

scaler = MaxAbsScaler()
scaler.fit(np.squeeze(dict_data["x_train"][train_index], axis=2))
x_train = np.expand_dims(scaler.transform(np.squeeze(dict_data["x_train"][train_index], axis=2)), axis=2)
x_validation = np.expand_dims(scaler.transform(np.squeeze(dict_data["x_train"][validation_index], axis=2)), axis=2)
x_test = np.expand_dims(scaler.transform(np.squeeze(dict_data["x_test"], axis=2)), axis=2)


x_test_type, x_test_class = scattering_extract.predict(x_test)

# Normalizing

transformer = MaxAbsScaler().fit(x_test_type)
x_test_type = transformer.transform(x_test_type)
        
transformer = MaxAbsScaler().fit(x_test_class)
x_test_class = transformer.transform(x_test_class)


final_prediction = []
final_groundTruth = []
for xi, xi_nd, yclass, ytype in zip(x_test_type, x_test_class, dict_data["y_test"]["classification"], dict_data["y_test"]["type"]):
    pred = bestModel.predict([np.expand_dims(xi, axis=0),np.expand_dims(xi_nd, axis=0)])
    prediction = np.max(pred[1][0],axis=0)
    groundTruth = np.max(yclass,axis=0)

    final_prediction.append(prediction)
    final_groundTruth.append(groundTruth) 

    del xi, yclass, ytype

y = {}
y["true"] = final_groundTruth.copy()
y["pred"] = final_prediction.copy()



### F1 Score

#### F1 Macro:
$$
\begin{gather*}
F1_{Macro} = \frac{1}{Y} \sum_{i=1}^{Y} \frac{2 \cdot tp_i}{2 \cdot tp_i + fp_i + fn_i}
\end{gather*}
$$

#### F1 Micro:
$$
\begin{gather*}
F1_{Micro} = \frac{2 \cdot \sum_{i=1}^{Y} tp_i}{\sum_{i=1}^{Y} 2 \cdot tp_i + fp_i + fn_i}
\end{gather*}
$$

- $tp_i$: True positives classifications for appliance $i$
- $fp_i$: False positives classifications for appliance $i$
- $fn_i$: False negatives classifications for appliance $i$

In [None]:
from sklearn.metrics import f1_score

threshold = 0.5
f1_macro = f1_score(np.array(y["true"]) > threshold, np.array(y["pred"]) > threshold, average='macro')
f1_micro = f1_score(np.array(y["true"]) > threshold, np.array(y["pred"]) > threshold, average='micro')

print(f"Fold {fold} - F1 Macro: {f1_macro * 100:.1f}, F1 Micro: {f1_micro * 100:.1f}")

Fold 3 - F1 Macro: 77.7, F1 Micro: 76.4


### Accuracy (ACC)

$$
\begin{gather*}
ACC_i = \frac{CCE_i}{TNE_i} \\ \\
ACC = \frac{1}{Y} \sum_{i = 1}^{Y} ACC_i
\end{gather*}
$$

- $ACC_i$: Accuracy for appliance $i$
- $CCE_i$: Load connected successfully identified
- $TNE_i$: Total of connected events

In [None]:
threshold = 0.5

correct_on = np.zeros((26,1))
total_on = np.zeros((26,1))
correct_off = np.zeros((26,1))
total_off = np.zeros((26,1))
correct_no_event = np.zeros((26,1))
total_no_event = np.zeros((26,1))

for ytype, ytrue, ypred in zip(dict_data["y_test"]["type"], y["true"], y["pred"]):
    event_type = np.min(np.argmax(ytype, axis=1))
    if event_type == 0:
        correct_on[np.bitwise_and(ytrue > threshold, ypred > threshold)] += 1
        total_on[ytrue > threshold] += 1
    elif event_type == 1:
        correct_off[np.bitwise_and(ytrue > threshold, ypred > threshold)] += 1
        total_off[ytrue > threshold] += 1
    else:
        correct_no_event[np.bitwise_and(ytrue > threshold, ypred > threshold)] += 1
        total_no_event[ytrue > threshold] += 1

acc_on = 100 * np.average(np.nan_to_num(correct_on/total_on))
acc_off = 100 * np.average(np.nan_to_num(correct_off/total_off))
acc_no_event = 100 * np.average(np.nan_to_num(correct_no_event/total_no_event))
acc_total = 100 * np.average(np.nan_to_num((correct_on + correct_off + correct_no_event)/(total_on + total_off + total_no_event)))

print(f"Fold {fold} - Acc on: {acc_on:.1f}, Acc off: {acc_off:.1f}, Acc no event: {acc_no_event:.1f} Acc total: {acc_total:.1f}")

Fold 3 - Acc on: 74.1, Acc off: 75.8, Acc no event: 0.0 Acc total: 76.4




## Detection Metrics

### D
$$
\begin{gather*}
D = \frac{ \sum_{i=1}^{A} |d(i) - ev(i)|}{A}
\end{gather*}
$$

- `A`: Total of events correctly detected ($\pm$ 10 semi cycles tolerance)
- `d(i)`: Detection for appliance $i$
- `ev(i)`: Ground truth detection for appliance $i$

## PC

$$
\begin{gather*}
PC = \frac{A}{N}
\end{gather*}
$$

- `A`: Total of events correctly detected ($\pm$ 10 semi cycles tolerance)
- `N`: Total of events

In [None]:
from PostProcessing import PostProcessing
from DataHandler import DataHandler

postProcessing = PostProcessing(configs=configs)
dataHandler = DataHandler(configs=configs)

general_qtd_test = dict_data["y_test"]["group"]

foldFolderPath = folderPath + str(fold) + "/"

train_index = np.load(foldFolderPath + "train_index.npy")

bestModel = ModelHandler.loadModel(foldFolderPath + "model_without_detection.h5", type_weights=None) # Load model

scaler = MaxAbsScaler()
scaler.fit(np.squeeze(dict_data["x_train"][train_index], axis=2))
x_test = np.expand_dims(scaler.transform(np.squeeze(dict_data["x_test"], axis=2)), axis=2)
x_test_type, x_test_class = scattering_extract.predict(x_test)


# Normalizing

transformer = MaxAbsScaler().fit(x_test_type)
x_test_type = transformer.transform(x_test_type)
        
transformer = MaxAbsScaler().fit(x_test_class)
x_test_class = transformer.transform(x_test_class)


print(f"-------------- FOLD {fold} ---------------")
pcMetric = postProcessing.checkModel2(bestModel, x_test_type, x_test_class, dict_data["y_test"], general_qtd=general_qtd_test, print_error=False)

-------------- FOLD 3 ---------------
Total time: 30.28241867400152, Average Time: 0.03600763219262963
LIT-SYN-1 PCmetric: (1.0, 1.0, 1.0)
LIT-SYN-2 PCmetric: (0.9854014598540146, 0.8913043478260869, 0.9381818181818182)
LIT-SYN-3 PCmetric: (0.9463087248322147, 0.9056603773584906, 0.9253246753246753)
LIT-SYN-8 PCmetric: (0.9069767441860465, 0.5842696629213483, 0.7428571428571429)
LIT-SYN-All PCmetric: (0.9568345323741008, 0.8419811320754716, 0.8989298454221165)


In [None]:
pcMetric

pc_on = pcMetric[4][0]
pc_off = pcMetric[4][1]
pc_all = pcMetric[4][2]

In [None]:
# Saving the Results

import tables
import numpy as np

row = [acc_on*0.01, acc_off*0.01, acc_total*0.01, f1_macro, f1_micro, pc_on, pc_off, pc_all]

print(np.array(row))




[0.74072448 0.75806357 0.76396519 0.77668157 0.76444444 0.95683453
 0.84198113 0.89892985]


In [None]:
fold

3