# Imports

In [None]:
import numpy as np
import pyarrow.parquet as pq
import pandas as pd
import pyarrow as pa
from pathlib import Path
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import importlib
import contextlib
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from tqdm.notebook import tqdm

# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, TensorDataset
import torch.nn.init as init
import copy

# Check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Utilities
import TEP_utils as TEP_utils
import PCA_chiang_utils as PCA_chiang_utils
from df_to_table_utils import dataframe_to_latex, highlight_table

import models as models
import linearization_utils as linearization_utils

# Update libraries without restarting the kernel
importlib.reload(models)
importlib.reload(linearization_utils)

%load_ext autoreload
%autoreload 2

# Introduction

The code bellow shows a step-by-step guide of using our proposed method. Additionally, at the end there is a loop for each fault that could be easily adapted to fit any dataset.

# 1 - Loading Dataset

This sections loads the dataset for one fault and plot one of its variables, also displaying the dataset shape.

In [5]:
# Dataset from Chiang
df_train_chiang, df_test_chiang = TEP_utils.load_dataset_chiang(fault=20)
px.scatter(df_train_chiang['XMEAS(41)'])

In [21]:
# Dataset from Chiang
# Load parquet dataset
df_train, df_val = TEP_utils.load_dataset_chiang(fault=20)

# Check the shape of the dataframes
print(f'Train shape: {df_train.shape}')
print(f'Validation shape: {df_val.shape}')

Train shape: (1940, 53)
Validation shape: (960, 53)


# 2 - Metrics

This section fits a simple model and calculates the metrics (FAR, MDR and TTD).

In [14]:
# The dataframes last column is the target class, 0 or 1
# Train a logistic regression model to predict the target class
# Train the model
model = LogisticRegression()
model.fit(df_train.iloc[:,:-1], df_train.iloc[:,-1])

# Predict the target class
y_train_pred = model.predict(df_train.iloc[:,:-1])
y_val_pred = model.predict(df_val.iloc[:,:-1])


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



In [15]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=y_train_pred[0:2000], mode='markers', name='Train predictions'))
fig.add_trace(go.Scatter(y=df_train.iloc[:,-1][0:2000], mode='markers', name='Train true values'))

In [17]:
# Calculate the accuracy
train_acc = accuracy_score(df_train.iloc[:,-1], y_train_pred)
val_acc = accuracy_score(df_val.iloc[:,-1], y_val_pred)

print(f'Train accuracy: {train_acc}')
print(f'Validation accuracy: {val_acc}')

Train accuracy: 0.9329896907216495
Validation accuracy: 0.7583333333333333


In [22]:
# Calculate the false alarm rate (FAR), missed detection rate (MDR) and time to detection (TTD)
performance_logistic_regression = TEP_utils.calculate_FAR_TTD_MDR(df_train.iloc[:,-1], y_train_pred)
performance_logistic_regression

FAR (%)     1.438356
TTD        22.000000
MDR (%)    22.708333
dtype: float64

# 3 - Training with L1/L2

This section shows how the L1/L2 model can be trained, also analysing the weights and feature importante. Moreover, the is a sample use of the ANN linearization function.

## 3.1 - Dataloader

In [24]:
inputs_names = list(df_train.columns[:-1].values)
outputs_names = list([df_train.columns[-1]])

dataloaders, treino_scaled, val_scaled, teste_scaled, scalers = TEP_utils.get_dataloader(dataset_train_df=df_train, dataset_test_df=df_val, 
                                                                                              dataset_val_df=df_val,
                                                                                              outputs=outputs_names, inputs=inputs_names, 
                                                                                              batch_size=2**10, 
                                                                                              val_split=None,
                                                                                              shuffle_train_val=True, scale_output=False,
                                                                                              )
print('Treino:')
print(f'Total de amostras: {len(treino_scaled[0])}')
print(f'Tamanho do batch: {dataloaders[0].batch_size}')
print(f'Número de batches: {len(dataloaders[0])}')

Treino:
Total de amostras: 1940
Tamanho do batch: 1024
Número de batches: 2


## 3.2 - Training

In [25]:
melhor_modelo_l1_l2, melhor_epoch_l1_l2, last_model_l1_l2, epochs_plot_l1_l2, train_loss_array_l1_l2, val_loss_array_l1_l2, test_loss_array_l1_l2 = models.NN_L1_L2_training_class(
                dataloaders=dataloaders, dataset_train=treino_scaled, dataset_val=val_scaled, dataset_test=teste_scaled, scalers=scalers, 
                hidden_layer_size=500, learning_rate=1e-3, L1_weight=(1.77e-3)/10, L2_weight=(3.16e-4)/10, activation_function='tanh',
                num_epochs_save = 1, num_epochs = 200, 
                print_tqdm=True, print_results=True)

Using device: cuda:0


100%|██████████| 200/200 [00:04<00:00, 44.21it/s]


In [26]:
# Figure
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Plots
fig.add_trace(go.Scatter(x=epochs_plot_l1_l2, y=train_loss_array_l1_l2, name='Train loss'), secondary_y=False)
fig.add_trace(go.Scatter(x=epochs_plot_l1_l2, y=val_loss_array_l1_l2, name='Val loss'), secondary_y=True)
fig.add_vline(x=melhor_epoch_l1_l2, line_width=1, line_dash="dash", line_color="green")

# Title
fig.update_layout({'title': 'Training and Validation Losses'})

# axis
fig.update_xaxes(title_text="Epoch")
fig.update_yaxes(title_text="Train Loss", secondary_y=False)
fig.update_yaxes(title_text="Val loss", secondary_y=True)

# Show
fig.show()

In [29]:
y_val_gt = df_val.iloc[:,-1]
y_val_pred = np.round(TEP_utils.get_dataset_prediction_per_batch(melhor_modelo_l1_l2, teste_scaled[0]))
y_val_pred = pd.Series(y_val_pred, index=y_val_gt.index, dtype=np.float64)
performance_neural_network_l1_l2 =  TEP_utils.calculate_FAR_TTD_MDR(y_true = y_val_gt, y_pred = y_val_pred)
performance_neural_network_l1_l2

FAR (%)     0.0
TTD        69.0
MDR (%)    30.5
dtype: float64

## 3.3 - Wheight Analysis

In [30]:
# Extract feature importance
feature_importance = torch.square(melhor_modelo_l1_l2.fc1.weight).mean(axis=0).cpu().detach().numpy()

TEP_utils.plot_feature_importance(feature_importance, inputs_names)

## 3.4 - Linearization

### 3.4.1 - Comparison with linear model

### 3.4.2 - L1/L2 ANN linearization

In [None]:
modelo_linearizado = linearization_utils.linearize_ANN(melhor_modelo_l1_l2, treino_scaled, val_scaled, scalers, activation_function='tanh', mode='Classification', percent_erro=0.1, num_desvios=10)

Desvio: 0.00, Erro de validação: 0.6978 - Acc: 0.7458333333333333 - Num neurônios lineares: 0
Desvio: 0.68, Erro de validação: 0.7014 - Acc: 0.7447916666666666 - Num neurônios lineares: 180
Desvio: 1.37, Erro de validação: 0.7149 - Acc: 0.7427083333333333 - Num neurônios lineares: 325
Desvio: 2.05, Erro de validação: 0.7322 - Acc: 0.7416666666666667 - Num neurônios lineares: 434
Desvio: 2.74, Erro de validação: 0.7320 - Acc: 0.74375 - Num neurônios lineares: 464
Desvio: 3.42, Erro de validação: 0.7259 - Acc: 0.7354166666666667 - Num neurônios lineares: 480
Desvio: 4.10, Erro de validação: 0.7474 - Acc: 0.725 - Num neurônios lineares: 489
Desvio: 4.79, Erro de validação: 0.7802 - Acc: 0.7197916666666667 - Num neurônios lineares: 496
Desvio: 5.47, Erro de validação: 0.7695 - Acc: 0.7145833333333333 - Num neurônios lineares: 498
Desvio: 6.16, Erro de validação: 0.7713 - Acc: 0.715625 - Num neurônios lineares: 500


In [32]:
modelo_linearizado

Rede_linear_e_nao_linear(
  (fc1_linear): Linear(in_features=52, out_features=1, bias=True)
  (fc1_nao_linear): Linear(in_features=52, out_features=11, bias=True)
  (fc2): Linear(in_features=11, out_features=1, bias=True)
)

In [34]:
y_val_gt = df_val.iloc[:,-1]
y_val_pred = np.round(modelo_linearizado(teste_scaled[0]).cpu().detach().numpy().flatten())
y_val_pred = pd.Series(y_val_pred, index=y_val_gt.index, dtype=np.float64)
TEP_utils.calculate_FAR_TTD_MDR(y_true = y_val_gt, y_pred = y_val_pred)

FAR (%)     0.0
TTD        70.0
MDR (%)    33.0
dtype: float64

In [35]:
# Extract feature importance
feature_importance_linear = torch.square(modelo_linearizado.fc1_linear.weight).mean(axis=0).cpu().detach().numpy()
feature_importance_non_linear = torch.square(modelo_linearizado.fc1_nao_linear.weight).mean(axis=0).cpu().detach().numpy()
feature_importance = feature_importance_linear + feature_importance_non_linear

# TEP_utils.plot_feature_importance(feature_importance, inputs_names, title='Feature Importance')
TEP_utils.plot_feature_importance(feature_importance_linear, inputs_names, title='Feature Importance (Linear)')
TEP_utils.plot_feature_importance(feature_importance_non_linear, inputs_names, title='Feature Importance (Non-linear)')


# 4 - Loop for all fault types

This section presents a loop through all methods presented, resulting in our final proposed method. Run this section the to reproduce our results.

In [58]:
from contextlib import contextmanager,redirect_stderr,redirect_stdout
from os import devnull

@contextmanager
def suppress_stdout_stderr():
    """A context manager that redirects stdout and stderr to devnull"""
    with open(devnull, 'w') as fnull:
        with redirect_stderr(fnull) as err, redirect_stdout(fnull) as out:
            yield (err, out)

## 4.1 - Loop

In [61]:
# Data path
fault_data_path = Path('Data/Chiang/')

# Results folder
main_results_folder = Path('Results_Chiang')

# Number of repetitions
n_repetitions = 3

# NN params
neurons = 500
learning_rate = 1e-3
L1_weight = (1.77e-3)/1
L2_weight = (3.16e-4)/10
num_epochs = 2000

In [60]:
# Get all fault numbers
fault_numbers = [e.name[1:3] for e in list(fault_data_path.glob('*.dat')) if (('d00' not in e.name) and ('te' not in e.name))]
fault_numbers.sort()
fault_numbers = np.unique(np.array(fault_numbers)).astype(int)

# Loop for all fault types
for fault_number in fault_numbers:
    # Get the fault number and name
    fault_name = f'idv{fault_number:02d}'
    print(f'Current Processing Fault type: {fault_name}')

    # if there is a result/Performance_Comparison_Table.png file, skip this fault
    if (main_results_folder / fault_name / 'Performance_Comparison_Table.png').exists():
        print('\tSkipping')
        continue

    # Check if result folder exists
    result_folder = main_results_folder / fault_name
    result_folder.mkdir(parents=True, exist_ok=True)
                            
    # Load data
    print('\tLoading data')
    df_train, df_test = TEP_utils.load_dataset_chiang(int(fault_number))

    # Get dataloaders
    inputs_names = list(df_train.columns[:-1].values)
    outputs_names = list([df_train.columns[-1]])
    dataloaders, treino_scaled, val_scaled, teste_scaled, scalers = TEP_utils.get_dataloader(dataset_train_df=df_train, dataset_test_df=df_test,
                                                                                                outputs=outputs_names, inputs=inputs_names, 
                                                                                                batch_size=2**10, 
                                                                                                val_split=.2,
                                                                                                shuffle_train_val=True, scale_output=False,
                                                                                                )

    # Initialize array to store the results
    plot_df_array = []

    num_neurons_non_linear_array = []
    num_neurons_linear_array = []

    train_loss_array_l1_l2_array = []
    val_loss_array_l1_l2_array = []

    feature_importance_logistic_regression_array = []
    feature_importance_neural_network_l1_l2_array = []
    feature_importance_linearized_network_linear_array = []
    feature_importance_linearized_network_non_linear_array = []


    # Loop for all repetitions
    for repetition in tqdm(range(n_repetitions)):
        dataloaders, treino_scaled, val_scaled, teste_scaled, scalers = TEP_utils.get_dataloader(dataset_train_df=df_train, dataset_test_df=df_test,
                                                                                                outputs=outputs_names, inputs=inputs_names, 
                                                                                                batch_size=2**10, 
                                                                                                val_split=.2,
                                                                                                shuffle_train_val=True, scale_output=False,
                                                                                                )

        # print(f'\tRepetition {repetition+1}/{n_repetitions}')
        # Logistic regression
        # print('\tTraining Logistic Regression')
        # Train the model
        model_logistic_regression = LogisticRegression(verbose=0)
        with ignore_warnings(category=ConvergenceWarning):
            model_logistic_regression.fit(df_train.iloc[:,:-1], df_train.iloc[:,-1])

        # Predict the target class
        y_train_pred_logistic_regression = model_logistic_regression.predict(df_train.iloc[:,:-1])
        # y_val_pred_logistic_regression = model_logistic_regression.predict(df_val.iloc[:,:-1])
        y_test_pred_logistic_regression = model_logistic_regression.predict(df_test.iloc[:,:-1])

        performance_logistic_regression = TEP_utils.calculate_FAR_TTD_MDR(df_test.iloc[:,-1], y_test_pred_logistic_regression)

        # Get model weights
        feature_importance_logistic_regression = np.square(model_logistic_regression.coef_).flatten()
        feature_importance_logistic_regression_array.append(feature_importance_logistic_regression)
        
        # Neural Network
        # print('\tTraining Neural Network')
        # Training
        melhor_modelo_l1_l2, melhor_epoch_l1_l2, last_model_l1_l2, epochs_plot_l1_l2, train_loss_array_l1_l2, val_loss_array_l1_l2, test_loss_array_l1_l2 = models.NN_L1_L2_training_class(
                        dataloaders=dataloaders, dataset_train=treino_scaled, dataset_val=val_scaled, dataset_test=teste_scaled, scalers=scalers,
                        hidden_layer_size=neurons, learning_rate=learning_rate, L1_weight=L1_weight, L2_weight=L2_weight, activation_function='tanh',
                        num_epochs_save = 1, num_epochs = num_epochs, 
                        print_tqdm=False, print_results=False)
        
        # Save training loss plot
        train_loss_array_l1_l2_array.append(train_loss_array_l1_l2)
        val_loss_array_l1_l2_array.append(val_loss_array_l1_l2)

        # Calculate L1 L2 performance
        y_test_gt = df_test.iloc[:,-1]
        y_test_pred = np.round(TEP_utils.get_dataset_prediction_per_batch(melhor_modelo_l1_l2, teste_scaled[0]))
        y_test_pred = pd.Series(y_test_pred, index=y_test_gt.index, dtype=np.float64)
        performance_neural_network_l1_l2 = TEP_utils.calculate_FAR_TTD_MDR(y_true = y_test_gt, y_pred = y_test_pred)

        # Get importance
        feature_importance_neural_network_l1_l2 = torch.square(melhor_modelo_l1_l2.fc1.weight).mean(axis=0).cpu().detach().numpy()
        feature_importance_neural_network_l1_l2_array.append(feature_importance_neural_network_l1_l2)

        # Linearization
        # print('\tLinearizing Neural Network')
        with suppress_stdout_stderr():
            modelo_linearizado = linearization_utils.linearize_ANN(melhor_modelo_l1_l2, treino_scaled, val_scaled, scalers, activation_function='tanh', mode='Classification', percent_erro=0.1, num_desvios=10)

        # Save linearized model architecture
        if modelo_linearizado.fc1_nao_linear is not None:
            num_neurons_non_linear = modelo_linearizado.fc1_nao_linear.weight.shape[0]
        else:
            num_neurons_non_linear = 0
        num_neurons_non_linear_array.append(num_neurons_non_linear)

        if modelo_linearizado.fc1_linear is not None:
            num_neurons_linear = 1
        else:
            num_neurons_linear = 0
        num_neurons_linear_array.append(num_neurons_linear)

        # Calculate linearization performance
        y_test_gt = df_test.iloc[:,-1]
        y_test_pred = TEP_utils.get_dataset_prediction_per_batch(modelo_linearizado, teste_scaled[0])
        y_test_pred = pd.Series(y_test_pred, index=y_test_gt.index, dtype=np.float64)
        performance_linearization = TEP_utils.calculate_FAR_TTD_MDR(y_true = y_test_gt, y_pred = y_test_pred)

        # Extract feature importance
        linearization_utils.extract_feature_importance_from_linear_non_linear_model(modelo_linearizado, inputs_names)
        feature_importance_linearized_network_linear, feature_importance_linearized_network_non_linear = linearization_utils.extract_feature_importance_from_linear_non_linear_model(modelo_linearizado, inputs_names)
            
        feature_importance_linearized_network_linear_array.append(feature_importance_linearized_network_linear)
        feature_importance_linearized_network_non_linear_array.append(feature_importance_linearized_network_non_linear)

        # Plot performance_linearization, performance_neural_network_l1_l2 and performance_logistic_regression together in a single table
        performance_logistic_regression['Model'] = 'Logistic Regression'
        performance_neural_network_l1_l2['Model'] = 'Neural Network'
        performance_linearization['Model'] = 'Linearization'

        plot_df = pd.DataFrame(index = ['FAR (%)', 'TTD', 'MDR (%)'], columns = ['Logistic Regression', 'Neural Network', 'Linearization'])
        plot_df['Logistic Regression'] = performance_logistic_regression[['FAR (%)', 'TTD', 'MDR (%)']].values
        plot_df['Neural Network'] = performance_neural_network_l1_l2[['FAR (%)', 'TTD', 'MDR (%)']].values
        plot_df['Linearization'] = performance_linearization[['FAR (%)', 'TTD', 'MDR (%)']].values

        # Add to array
        plot_df_array.append(plot_df)
    
    # Save the mean architecture
    num_neurons_non_linear = np.mean(num_neurons_non_linear_array)
    num_neurons_linear = np.mean(num_neurons_linear_array)
    with open(result_folder/'Linearized_Model_Architecture.txt', 'w') as f:
        f.write(f'Initial number of neurons: {neurons}\n\tMean number of non-linear neurons: {num_neurons_non_linear}\n\tMean number of linear neurons: {num_neurons_linear}')

    # Save the mean training loss plot with std
    train_loss_array_l1_l2_array = np.array(train_loss_array_l1_l2_array)
    val_loss_array_l1_l2_array = np.array(val_loss_array_l1_l2_array)
    train_loss_mean = train_loss_array_l1_l2_array.mean(axis=0)
    val_loss_mean = val_loss_array_l1_l2_array.mean(axis=0)
    train_loss_std = train_loss_array_l1_l2_array.std(axis=0)
    val_loss_std = val_loss_array_l1_l2_array.std(axis=0)
    TEP_utils.plot_training_and_val_losses_with_std(epochs_plot_l1_l2, train_loss_mean, train_loss_std, val_loss_mean, val_loss_std, path=result_folder/'Training_Loss_Neural_Network_L1_L2.png')

    # Save the mean feature importance for logistic regression
    TEP_utils.plot_feature_importance_array(feature_importance_logistic_regression_array, inputs_names, title='Feature Importance (Logistic Regression)',
                                            plot_std=False, plot=False, save_path=result_folder/'Feature_Importance_Logistic_Regression.png'
                                        )
    TEP_utils.plot_feature_importance_array(feature_importance_logistic_regression_array, inputs_names, title='Feature Importance (Logistic Regression)',
                                            plot_std=True, plot=False, save_path=result_folder/'Feature_Importance_Logistic_Regression_std.png'
                                        )

    # Save the mean feature importance for neural network
    TEP_utils.plot_feature_importance_array(feature_importance_neural_network_l1_l2_array, inputs_names, title='Feature Importance (Neural Network)',
                                            plot_std=False, plot=False, save_path=result_folder/'Feature_Importance_Neural_Network_L1_L2.png'
                                        )
    TEP_utils.plot_feature_importance_array(feature_importance_neural_network_l1_l2_array, inputs_names, title='Feature Importance (Neural Network)',
                                            plot_std=True, plot=False, save_path=result_folder/'Feature_Importance_Neural_Network_L1_L2_std.png'
                                        )
    
    # Save the mean feature importance for linearized neurons
    TEP_utils.plot_feature_importance_array(feature_importance_linearized_network_linear_array, inputs_names, title='Feature Importance (Linear)',
                                            plot_std=False, plot=False, save_path=result_folder/'Feature_Importance_Linearized_Network_Linear.png'
                                        )
    TEP_utils.plot_feature_importance_array(feature_importance_linearized_network_linear_array, inputs_names, title='Feature Importance (Linear)',
                                            plot_std=True, plot=False, save_path=result_folder/'Feature_Importance_Linearized_Network_Linear_std.png'
                                        )
    
    # Save the mean feature importance for non linearized neurons
    TEP_utils.plot_feature_importance_array(feature_importance_linearized_network_non_linear_array, inputs_names, title='Feature Importance (Non-linear)',
                                            plot_std=False, plot=False, save_path=result_folder/'Feature_Importance_Linearized_Network_Non_Linear.png'
                                        )
    TEP_utils.plot_feature_importance_array(feature_importance_linearized_network_non_linear_array, inputs_names, title='Feature Importance (Non-linear)',
                                            plot_std=True, plot=False, save_path=result_folder/'Feature_Importance_Linearized_Network_Non_Linear_std.png'
                                        )


    # Calculate the mean of the performance
    plot_df = TEP_utils.calculate_mean_of_dataframe_array(plot_df_array)

    # Apply style
    plot_df = plot_df.style.set_properties(**{'background-color': 'black','color': 'white'}).apply(TEP_utils.apply_style, axis=1).set_caption(f'Performance Comparison ({fault_name})')

    # Save table as png
    import dataframe_image as dfi
    dfi.export(plot_df, result_folder/'Performance_Comparison_Table.png')

    # Calculate the mean and std of the performance
    plot_df = TEP_utils.calculate_mean_and_var_of_dataframe_array(plot_df_array)
    plot_df = plot_df.style.set_properties(**{'background-color': 'black','color': 'white'}).apply(TEP_utils.apply_style_std, axis=1)
    dfi.export(plot_df, result_folder/'Performance_Comparison_Table_Std.png')

    # Calculate the mean and std of the performance and save as csv
    plot_df = TEP_utils.calculate_mean_and_var_of_dataframe_array(plot_df_array)
    plot_df.to_csv(result_folder/'Performance_Comparison_Table.csv')
plot_df

Current Processing Fault type: idv01
	Skipping
Current Processing Fault type: idv02
	Skipping
Current Processing Fault type: idv03
	Skipping
Current Processing Fault type: idv04
	Skipping
Current Processing Fault type: idv05
	Skipping
Current Processing Fault type: idv06
	Skipping
Current Processing Fault type: idv07
	Skipping
Current Processing Fault type: idv08
	Skipping
Current Processing Fault type: idv09
	Skipping
Current Processing Fault type: idv10
	Skipping
Current Processing Fault type: idv11
	Skipping
Current Processing Fault type: idv12
	Skipping
Current Processing Fault type: idv13
	Skipping
Current Processing Fault type: idv14
	Skipping
Current Processing Fault type: idv15
	Skipping
Current Processing Fault type: idv16
	Skipping
Current Processing Fault type: idv17
	Skipping
Current Processing Fault type: idv18
	Skipping
Current Processing Fault type: idv19
	Skipping
Current Processing Fault type: idv20
	Skipping
Current Processing Fault type: idv21
	Skipping


Unnamed: 0,Logistic Regression,Neural Network,Linearization,RL (Arch1),RL (Arch2)
FAR (%),1.875,0.0,0.0,1.31,3.55
TTD,65.0,74.0,74.0,33.77,32.36
MDR (%),24.0,34.375,34.875,8.79,5.93


# 5 - Result Compilation and table formating

Bellow are several cells that export the results table as latex and solve errors of the pd.DataFrame to latex function.

In [18]:
# Getting neural results from folders
# Results folder
main_results_folder = Path('Results_Chiang')

results_MDR_neural = pd.DataFrame(index=range(1,21), columns=['Logistic Regression', 'Neural Network', 'Linearization'])
results_FAR_neural = pd.DataFrame(index=range(1,21), columns=['Logistic Regression', 'Neural Network', 'Linearization'])
results_TTD_neural = pd.DataFrame(index=range(1,21), columns=['Logistic Regression', 'Neural Network', 'Linearization'])

for fault_number in fault_numbers:
    # Get the fault number and name
    fault_name = f'idv{fault_number:02d}'

    # Check if result folder exists
    result_folder = main_results_folder / fault_name

    # Load the table
    plot_df = pd.read_csv(result_folder/'Performance_Comparison_Table.csv', index_col=0)

    results_MDR_neural.loc[fault_number] = plot_df.loc['MDR (%)'].values
    results_FAR_neural.loc[fault_number] = plot_df.loc['FAR (%)'].values
    results_TTD_neural.loc[fault_number] = plot_df.loc['TTD'].values

print('MDR:')
display(results_MDR_neural)

print('FAR:')
display(results_FAR_neural)

print('TTD: ')
display(results_TTD_neural)

MDR:


Unnamed: 0,Logistic Regression,Neural Network,Linearization
1,0.625,0.350 ± 0.175,0.350 ± 0.175
2,2.25,2.188 ± 0.151,2.200 ± 0.150
3,93.75,77.362 ± 2.383,77.787 ± 3.381
4,0.0,0.000 ± 0.000,0.000 ± 0.000
5,38.0,0.188 ± 0.084,0.175 ± 0.083
6,0.25,0.000 ± 0.000,0.000 ± 0.000
7,0.125,0.000 ± 0.000,0.000 ± 0.000
8,69.625,22.167 ± 2.777,25.167 ± 1.515
9,95.625,77.175 ± 1.429,77.513 ± 1.381
10,98.0,45.038 ± 1.519,44.575 ± 1.930


FAR:


Unnamed: 0,Logistic Regression,Neural Network,Linearization
1,0.0,0.000 ± 0.000,0.000 ± 0.000
2,0.0,0.875 ± 0.306,0.812 ± 0.400
3,4.375,30.688 ± 6.240,33.750 ± 7.032
4,0.0,0.000 ± 0.000,0.000 ± 0.000
5,7.5,0.000 ± 0.000,0.000 ± 0.000
6,0.0,0.000 ± 0.000,0.000 ± 0.000
7,0.0,0.000 ± 0.000,0.000 ± 0.000
8,4.375,0.000 ± 0.000,0.000 ± 0.000
9,10.0,46.000 ± 3.414,43.500 ± 2.880
10,0.0,0.188 ± 0.286,0.250 ± 0.415


TTD: 


Unnamed: 0,Logistic Regression,Neural Network,Linearization
1,5.0,3.000 ± 1.612,3.000 ± 1.612
2,18.0,17.900 ± 1.513,18.200 ± 1.600
3,538.0,inf ± nan,inf ± nan
4,0.0,0.000 ± 0.000,0.000 ± 0.000
5,26.0,5.000 ± 1.732,4.900 ± 1.700
6,4.0,0.000 ± 0.000,0.000 ± 0.000
7,1.0,0.000 ± 0.000,0.000 ± 0.000
8,18.0,23.667 ± 0.943,23.667 ± 0.943
9,618.0,inf ± nan,inf ± nan
10,inf,25.400 ± 1.744,23.500 ± 5.084


In [19]:
# Getting PCA results from function
results_MDR_PCA, results_FAR_PCA, results_TTD_PCA = PCA_chiang_utils.get_metrics_chiang_PCA_T2_and_Q(explained_variance=.85)

print('MDR:')
display(results_MDR_PCA)

print('FAR:')
display(results_FAR_PCA)

print('TTD: ')
display(results_TTD_PCA)

MDR:


Unnamed: 0,PCA (T²),PCA (Q)
1,0.00625,0.0025
2,0.0175,0.0125
3,0.99,0.98375
4,0.71,0.0225
5,0.755,0.76375
6,0.01,0.0
7,0.0,0.0
8,0.0275,0.0375
9,0.98875,0.975
10,0.6375,0.6


FAR:


Unnamed: 0,PCA (T²),PCA (Q)
1,0.0,0.0
2,0.001271,0.005038
3,0.0,0.35
4,0.004292,0.002551
5,0.005076,0.010471
6,0.0,0.002494
7,0.0,0.001248
8,0.0,0.0
9,0.181818,0.090909
10,0.0,0.0


TTD: 


Unnamed: 0,PCA (T²),PCA (Q)
1,21.0,9.0
2,45.0,33.0
3,inf,inf
4,447.0,15.0
5,33.0,21.0
6,27.0,3.0
7,3.0,3.0
8,69.0,66.0
9,inf,inf
10,294.0,144.0


In [20]:
# Concat all results
results_MDR = pd.concat([results_MDR_neural, results_MDR_PCA*100], axis=1)
results_FAR = pd.concat([results_FAR_neural, results_FAR_PCA*100], axis=1)
results_TTD = pd.concat([results_TTD_neural, results_TTD_PCA/3-1], axis=1)

print('FAR:')
display(results_FAR)

print('MDR:')
display(results_MDR)

print('TTD: ')
display(results_TTD)

FAR:


Unnamed: 0,Logistic Regression,Neural Network,Linearization,PCA (T²),PCA (Q)
1,0.0,0.000 ± 0.000,0.000 ± 0.000,0.0,0.0
2,0.0,0.875 ± 0.306,0.812 ± 0.400,0.127065,0.503778
3,4.375,30.688 ± 6.240,33.750 ± 7.032,0.0,35.0
4,0.0,0.000 ± 0.000,0.000 ± 0.000,0.429185,0.255102
5,7.5,0.000 ± 0.000,0.000 ± 0.000,0.507614,1.04712
6,0.0,0.000 ± 0.000,0.000 ± 0.000,0.0,0.249377
7,0.0,0.000 ± 0.000,0.000 ± 0.000,0.0,0.124844
8,4.375,0.000 ± 0.000,0.000 ± 0.000,0.0,0.0
9,10.0,46.000 ± 3.414,43.500 ± 2.880,18.181818,9.090909
10,0.0,0.188 ± 0.286,0.250 ± 0.415,0.0,0.0


MDR:


Unnamed: 0,Logistic Regression,Neural Network,Linearization,PCA (T²),PCA (Q)
1,0.625,0.350 ± 0.175,0.350 ± 0.175,0.625,0.25
2,2.25,2.188 ± 0.151,2.200 ± 0.150,1.75,1.25
3,93.75,77.362 ± 2.383,77.787 ± 3.381,99.0,98.375
4,0.0,0.000 ± 0.000,0.000 ± 0.000,71.0,2.25
5,38.0,0.188 ± 0.084,0.175 ± 0.083,75.5,76.375
6,0.25,0.000 ± 0.000,0.000 ± 0.000,1.0,0.0
7,0.125,0.000 ± 0.000,0.000 ± 0.000,0.0,0.0
8,69.625,22.167 ± 2.777,25.167 ± 1.515,2.75,3.75
9,95.625,77.175 ± 1.429,77.513 ± 1.381,98.875,97.5
10,98.0,45.038 ± 1.519,44.575 ± 1.930,63.75,60.0


TTD: 


Unnamed: 0,Logistic Regression,Neural Network,Linearization,PCA (T²),PCA (Q)
1,5.0,3.000 ± 1.612,3.000 ± 1.612,6.0,2.0
2,18.0,17.900 ± 1.513,18.200 ± 1.600,14.0,10.0
3,538.0,inf ± nan,inf ± nan,inf,inf
4,0.0,0.000 ± 0.000,0.000 ± 0.000,148.0,4.0
5,26.0,5.000 ± 1.732,4.900 ± 1.700,10.0,6.0
6,4.0,0.000 ± 0.000,0.000 ± 0.000,8.0,0.0
7,1.0,0.000 ± 0.000,0.000 ± 0.000,0.0,0.0
8,18.0,23.667 ± 0.943,23.667 ± 0.943,22.0,21.0
9,618.0,inf ± nan,inf ± nan,inf,inf
10,inf,25.400 ± 1.744,23.500 ± 5.084,97.0,47.0


In [21]:
# Example usage:
data = {'Col1': ['9.119 ± 1', '3 ± 0.5', '5 ± 2'],
        'Col2': ['2 ± 0.1', '9 ± 1', '1 ± 0.3'],
        'Col3': ['2', '4 ± 0.8', '123']} 
df = pd.DataFrame(data)
display(df)

# Display the styled DataFrame
highlight_table(df, df)

Unnamed: 0,Col1,Col2,Col3
0,9.119 ± 1,2 ± 0.1,2
1,3 ± 0.5,9 ± 1,4 ± 0.8
2,5 ± 2,1 ± 0.3,123


Unnamed: 0,Col1,Col2,Col3
0,9.12 ± 1.00,2.00 ± 0.10,2.00
1,3.00 ± 0.50,9.00 ± 1.00,4.00 ± 0.80
2,5.00 ± 2.00,1.00 ± 0.30,123.00


In [22]:
results_FAR_styled = highlight_table(results_FAR, results_FAR)
dataframe_to_latex(results_FAR_styled, filename='FAR.tex', label='tab:FAR', caption='FAR (\%) metric for the PCA and ANN-based approaches. The results with FAR greater than 5\% are shown in gray. For each fault, best results are bold and second best are have underline.')
display(results_FAR_styled)

Unnamed: 0,Logistic Regression,Neural Network,Linearization,PCA (T²),PCA (Q)
1,0.0,0.00 ± 0.00,0.00 ± 0.00,0.0,0.0
2,0.0,0.88 ± 0.31,0.81 ± 0.40,0.13,0.5
3,4.38,30.69 ± 6.24,33.75 ± 7.03,0.0,35.0
4,0.0,0.00 ± 0.00,0.00 ± 0.00,0.43,0.26
5,7.5,0.00 ± 0.00,0.00 ± 0.00,0.51,1.05
6,0.0,0.00 ± 0.00,0.00 ± 0.00,0.0,0.25
7,0.0,0.00 ± 0.00,0.00 ± 0.00,0.0,0.12
8,4.38,0.00 ± 0.00,0.00 ± 0.00,0.0,0.0
9,10.0,46.00 ± 3.41,43.50 ± 2.88,18.18,9.09
10,0.0,0.19 ± 0.29,0.25 ± 0.41,0.0,0.0


In [23]:
results_MDR_styled = highlight_table(results_MDR, color_df=results_FAR)
dataframe_to_latex(results_MDR_styled, filename='MDR.tex', label='tab:MDR', caption='MDR (\%) metric for the PCA and ANN-based approaches. The results with FAR greater than 5\% are shown in gray. For each fault, best results are bold and second best are have underline.')
display(results_MDR_styled)

Unnamed: 0,Logistic Regression,Neural Network,Linearization,PCA (T²),PCA (Q)
1,0.62,0.35 ± 0.17,0.35 ± 0.17,0.62,0.25
2,2.25,2.19 ± 0.15,2.20 ± 0.15,1.75,1.25
3,93.75,77.36 ± 2.38,77.79 ± 3.38,99.0,98.38
4,0.0,0.00 ± 0.00,0.00 ± 0.00,71.0,2.25
5,38.0,0.19 ± 0.08,0.17 ± 0.08,75.5,76.38
6,0.25,0.00 ± 0.00,0.00 ± 0.00,1.0,0.0
7,0.12,0.00 ± 0.00,0.00 ± 0.00,0.0,0.0
8,69.62,22.17 ± 2.78,25.17 ± 1.51,2.75,3.75
9,95.62,77.17 ± 1.43,77.51 ± 1.38,98.88,97.5
10,98.0,45.04 ± 1.52,44.58 ± 1.93,63.75,60.0


In [24]:
results_TTD_styled = highlight_table(results_TTD, color_df=results_FAR)
dataframe_to_latex(results_TTD_styled, filename='TTD.tex', label='tab:TTD', caption='TTD metric for the PCA and ANN-based approaches. The results with FAR greater than 5\% are shown in gray. For each fault, best results are bold and second best are have underline.')
display(results_TTD_styled)

Unnamed: 0,Logistic Regression,Neural Network,Linearization,PCA (T²),PCA (Q)
1,5.0,3.00 ± 1.61,3.00 ± 1.61,6.0,2.0
2,18.0,17.90 ± 1.51,18.20 ± 1.60,14.0,10.0
3,538.0,inf ± nan,inf ± nan,inf,inf
4,0.0,0.00 ± 0.00,0.00 ± 0.00,148.0,4.0
5,26.0,5.00 ± 1.73,4.90 ± 1.70,10.0,6.0
6,4.0,0.00 ± 0.00,0.00 ± 0.00,8.0,0.0
7,1.0,0.00 ± 0.00,0.00 ± 0.00,0.0,0.0
8,18.0,23.67 ± 0.94,23.67 ± 0.94,22.0,21.0
9,618.0,inf ± nan,inf ± nan,inf,inf
10,inf,25.40 ± 1.74,23.50 ± 5.08,97.0,47.0


In [35]:
# Go to each result folder from Chiang and get the Linearized_Model_Architecture.txt file
df_neurons = pd.DataFrame(index=range(1,21), columns=['Non-linear', 'Linear'])
for fault_number in fault_numbers:
    # Get the fault number and name
    fault_name = f'idv{fault_number:02d}'

    # Check if result folder exists
    result_folder = main_results_folder / fault_name

    # Load the table
    with open(result_folder/'Linearized_Model_Architecture.txt', 'r') as f:
        file = f.read()
    num_neurons_non_linear = float(file.split('\n')[1].split(': ')[1])
    num_neurons_linear = float(file.split('\n')[2].split(': ')[1])
    df_neurons.loc[fault_number] = [num_neurons_non_linear, num_neurons_linear]

# Put in the order 1,2,4,6,7,8,12,13,14 and 11,17,18 and 5,10,16,19,20,21 and 3,9,15
# df_neurons = df_neurons.loc[[1,2,4,6,7,8,12,13,14,11,17,18,5,10,16,19,20,21,3,9,15]]
df_neurons

Unnamed: 0,Non-linear,Linear
1,0.0,1.0
2,1.5,1.0
3,63.1,1.0
4,0.0,1.0
5,0.0,1.0
6,0.0,1.0
7,0.0,1.0
8,11.0,1.0
9,60.0,1.0
10,414.0,0.2


In [38]:
A = np.array([x.split(' ± ')[0] for x in results_MDR_styled.data.Linearization.values]).astype(float)
B = df_neurons['Non-linear'].values
px.scatter(x=A, y=B, labels={'x':'MDR (%)', 'y':'Number of Non-linear Neurons'}, title='MDR (%) vs Number of Non-linear Neurons')