In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
from anomaly_detection import HybridAnomalyDetector  # Ensure this import is correct
import json

# Load data
results_dir = Path("results")

# Load ADAPT results
with open(results_dir / 'adapt_results.json', 'r') as f:
    adapt_results = json.load(f)

# Load NGAFID results
with open(results_dir / 'ngafid_results.json', 'r') as f:
    ngafid_results = json.load(f)

# Print lengths of arrays to debug
print("Length of timeSeriesData:", len(adapt_results['timeSeriesData']))
print("Length of anomalyScores:", len(adapt_results['anomalyScores']))
print("Length of anomalies:", len(adapt_results['anomalies']))

# Truncate the longer array to match the length of the shorter arrays
min_length = min(len(adapt_results['timeSeriesData']), len(adapt_results['anomalyScores']), len(adapt_results['anomalies']))
adapt_results['timeSeriesData'] = adapt_results['timeSeriesData'][:min_length]
adapt_results['anomalyScores'] = adapt_results['anomalyScores'][:min_length]
adapt_results['anomalies'] = adapt_results['anomalies'][:min_length]

# Convert data into DataFrame
adapt_df = pd.DataFrame({
    "Time": pd.to_datetime(adapt_results['timeSeriesData'], errors='coerce'),
    "Anomaly Scores": adapt_results['anomalyScores'],
    "Anomalies": adapt_results['anomalies']
})

# Separate data into 2007 and 2008
adapt_df_2007 = adapt_df[adapt_df['Time'].dt.year == 2007]
adapt_df_2008 = adapt_df[adapt_df['Time'].dt.year == 2008]

# Visualize ADAPT anomaly scores for 2007
fig_adapt_2007 = px.line(
    adapt_df_2007,
    x="Time",
    y="Anomaly Scores",
    title="ADAPT Anomaly Scores Over Time (2007)",
    labels={"Time": "Time", "Anomaly Scores": "Anomaly Scores"},
    template="plotly_dark"
)
fig_adapt_2007.add_trace(go.Scatter(
    x=adapt_df_2007["Time"],
    y=adapt_df_2007["Anomaly Scores"],
    mode="markers",
    marker=dict(
        color=np.where(adapt_df_2007["Anomalies"] == 1, 'red', 'blue'),
        size=8
    ),
    name="Anomalies"
))
fig_adapt_2007.show()

# Visualize ADAPT anomaly scores for 2008
fig_adapt_2008 = px.line(
    adapt_df_2008,
    x="Time",
    y="Anomaly Scores",
    title="ADAPT Anomaly Scores Over Time (2008)",
    labels={"Time": "Time", "Anomaly Scores": "Anomaly Scores"},
    template="plotly_dark"
)
fig_adapt_2008.add_trace(go.Scatter(
    x=adapt_df_2008["Time"],
    y=adapt_df_2008["Anomaly Scores"],
    mode="markers",
    marker=dict(
        color=np.where(adapt_df_2008["Anomalies"] == 1, 'red', 'blue'),
        size=8
    ),
    name="Anomalies"
))
fig_adapt_2008.show()

# Visualize NGAFID anomaly scores per flight
ngafid_df = pd.DataFrame({
    "Flight Index": ngafid_results['flightData'],
    "Anomaly Scores": ngafid_results['anomalyScores'],
    "Anomalies": ngafid_results['anomalies']
})

fig_ngafid = px.scatter(
    ngafid_df,
    x="Flight Index",
    y="Anomaly Scores",
    color="Anomalies",
    title="NGAFID Anomaly Scores by Flight",
    labels={"Flight Index": "Flight Index", "Anomaly Scores": "Anomaly Scores"},
    template="plotly_dark",
    color_discrete_sequence=["blue", "red"]
)
fig_ngafid.show()

# Summary visualization
summary = {
    "ADAPT": adapt_results['summary'],
    "NGAFID": ngafid_results['summary']
}

summary_table = pd.DataFrame.from_dict(summary, orient="index")
summary_table.reset_index(inplace=True)
summary_table.rename(columns={"index": "Dataset"}, inplace=True)

fig_summary = px.bar(
    summary_table,
    x="Dataset",
    y=["totalAnomalies", "detectionRate", "healthScore"],
    barmode="group",
    title="Summary of Anomaly Detection",
    labels={"value": "Count/Percentage", "variable": "Metric"},
    template="plotly_dark"
)
fig_summary.show()


Length of timeSeriesData: 2884
Length of anomalyScores: 2835
Length of anomalies: 2835


# Aircraft Health Monitoring: Anomaly Detection Evaluation

## 1. Setup and Imports
First, let's import the necessary libraries and set up logging:


In [2]:
import pandas as pd
import numpy as np
from pathlib import Path
import json
import logging
from evaluation import AnomalyEvaluator
from anomaly_detection import HybridAnomalyDetector, load_adapt_data

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


## 2. Helper Functions
These functions help us load and validate the results:

In [3]:
def load_results(file_path):
    """Load and validate results from JSON"""
    with open(file_path, 'r') as f:
        data = json.load(f)
        # Convert lists to numpy arrays and ensure correct types
        data['anomalies'] = np.array(data['anomalies'])
        data['anomalyScores'] = np.array(data['anomalyScores'])
        if 'timeSeriesData' in data:
            data['timeSeriesData'] = np.array(data['timeSeriesData'])
    return data

def validate_data(data, name):
    """Validate data shapes and types"""
    logger.info(f"Validating {name} data:")
    logger.info(f"Anomalies shape: {data['anomalies'].shape}")
    logger.info(f"Scores shape: {data['anomalyScores'].shape}")
    if 'timeSeriesData' in data:
        logger.info(f"Time series shape: {data['timeSeriesData'].shape}")

    # Ensure non-zero lengths
    assert len(data['anomalies']) > 0, f"No anomalies data for {name}"
    assert len(data['anomalyScores']) > 0, f"No scores data for {name}"

## 3. Load and Validate Results
Now we'll load the detection results from our models:

In [4]:
try:
    # Load results
    logger.info("Loading results files...")
    adapt_results = load_results("results/adapt_results.json")
    ngafid_results = load_results("results/ngafid_results.json")

    # Validate data
    validate_data(adapt_results, "ADAPT")
    validate_data(ngafid_results, "NGAFID")

    # Initialize evaluator
    evaluator = AnomalyEvaluator(output_dir="evaluation_results")

    # Basic evaluation with error checking
    logger.info("Performing ADAPT evaluation...")
    adapt_metrics = evaluator.evaluate(
        y_true=adapt_results['anomalies'],
        scores=adapt_results['anomalyScores'],
        dataset_name="ADAPT",
        save_results=True
    )

    logger.info("Performing NGAFID evaluation...")
    ngafid_metrics = evaluator.evaluate(
        y_true=ngafid_results['anomalies'],
        scores=ngafid_results['anomalyScores'],
        dataset_name="NGAFID",
        save_results=True
    )
except Exception as e:
    logger.error(f"An error occurred: {e}")

INFO:__main__:Loading results files...
INFO:__main__:Validating ADAPT data:
INFO:__main__:Anomalies shape: (2835,)
INFO:__main__:Scores shape: (2835,)
INFO:__main__:Time series shape: (2884,)
INFO:__main__:Validating NGAFID data:
INFO:__main__:Anomalies shape: (11446,)
INFO:__main__:Scores shape: (11446,)
INFO:__main__:Performing ADAPT evaluation...
INFO:__main__:Performing NGAFID evaluation...


## 4. Initialize Evaluator
Set up the evaluator with output directory:

In [5]:
# Initialize evaluator
evaluator = AnomalyEvaluator(output_dir="evaluation_results")

## 5. Basic Evaluation
Perform initial evaluation of both datasets:

In [6]:
# Basic evaluation with error checking
logger.info("Performing ADAPT evaluation...")
adapt_metrics = evaluator.evaluate(
    y_true=adapt_results['anomalies'],
    scores=adapt_results['anomalyScores'],
    dataset_name="ADAPT",
    save_results=True
)

logger.info("Performing NGAFID evaluation...")
ngafid_metrics = evaluator.evaluate(
    y_true=ngafid_results['anomalies'],
    scores=ngafid_results['anomalyScores'],
    dataset_name="NGAFID",
    save_results=True
)

INFO:__main__:Performing ADAPT evaluation...
INFO:__main__:Performing NGAFID evaluation...


## 6. Load Raw Data for Cross-Validation
Load the original datasets for cross-validation:

In [7]:
logger.info("Loading raw data for cross-validation...")
adapt_data_dir = Path("../data/ADAPT/processed")
ngafid_data_path = Path("../data/NGAFID/processed/processed_flight_header.csv")

adapt_data = load_adapt_data(adapt_data_dir)
ngafid_data = pd.read_csv(ngafid_data_path)

INFO:__main__:Loading raw data for cross-validation...

'S' is deprecated and will be removed in a future version, please use 's' instead.


'S' is deprecated and will be removed in a future version, please use 's' instead.


'S' is deprecated and will be removed in a future version, please use 's' instead.


'S' is deprecated and will be removed in a future version, please use 's' instead.


'S' is deprecated and will be removed in a future version, please use 's' instead.


'S' is deprecated and will be removed in a future version, please use 's' instead.


'S' is deprecated and will be removed in a future version, please use 's' instead.


'S' is deprecated and will be removed in a future version, please use 's' instead.


'S' is deprecated and will be removed in a future version, please use 's' instead.


'S' is deprecated and will be removed in a future version, please use 's' instead.


'S' is deprecated and will be removed in a future version, please use 's' instead.


'S' is de

## 7. Cross-Validation
Perform cross-validation on both datasets:

In [8]:

# Initialize detector
adapt_input_dim = len(adapt_data.columns) - 1  # Exclude timestamp
detector = HybridAnomalyDetector(adapt_input_dim=adapt_input_dim)

# ADAPT Cross-validation
logger.info("Performing cross-validation for ADAPT...")
cv_scores_adapt = evaluator.cross_validate(
    anomaly_detector=detector,
    X=adapt_data.drop(columns=['Time']).values,
    y=adapt_results['anomalies'],
    n_splits=5,
    time_series=True
)

# Reset detector for NGAFID
detector = HybridAnomalyDetector(adapt_input_dim=adapt_input_dim)

# NGAFID Cross-validation
logger.info("Performing cross-validation for NGAFID...")
cv_scores_ngafid = evaluator.cross_validate(
    anomaly_detector=detector,
    X=ngafid_data.select_dtypes(include=[np.number]).values,
    y=ngafid_results['anomalies'],
    n_splits=5,
    time_series=False
)

INFO:__main__:Performing cross-validation for ADAPT...
INFO:evaluation:Training fold 1
INFO:anomaly_detection:ADAPT Epoch [5/50], Loss: 0.7400
INFO:anomaly_detection:ADAPT Epoch [10/50], Loss: 0.5444
INFO:anomaly_detection:ADAPT Epoch [15/50], Loss: 0.4324
INFO:anomaly_detection:ADAPT Epoch [20/50], Loss: 0.3514
INFO:anomaly_detection:ADAPT Epoch [25/50], Loss: 0.2893
INFO:anomaly_detection:ADAPT Epoch [30/50], Loss: 0.2378
INFO:anomaly_detection:ADAPT Epoch [35/50], Loss: 0.1948
INFO:anomaly_detection:ADAPT Epoch [40/50], Loss: 0.1611
INFO:anomaly_detection:ADAPT Epoch [45/50], Loss: 0.1356
INFO:anomaly_detection:ADAPT Epoch [50/50], Loss: 0.1157
ERROR:evaluation:Error in fold 1: Found input variables with inconsistent numbers of samples: [241, 192]
INFO:evaluation:Training fold 2
INFO:anomaly_detection:ADAPT Epoch [5/50], Loss: 0.1103
INFO:anomaly_detection:ADAPT Epoch [10/50], Loss: 0.0758
INFO:anomaly_detection:ADAPT Epoch [15/50], Loss: 0.0582
INFO:anomaly_detection:ADAPT Epoch [2

## 8. Print Results
Display comprehensive evaluation results:

In [9]:
# Print evaluation summaries
print("\nADAPT Dataset Evaluation:")
print("=" * 50)
evaluator.print_summary("ADAPT")
print("\nCross-validation Results (ADAPT):")
for metric in ['roc_auc', 'f1', 'mcc']:
    if cv_scores_adapt[metric]:  # Check if metric exists
        mean_val = np.mean(cv_scores_adapt[metric])
        std_val = np.std(cv_scores_adapt[metric])
        print(f"Mean {metric.upper()}: {mean_val:.3f} ± {std_val:.3f}")

print("\nNGAFID Dataset Evaluation:")
print("=" * 50)
evaluator.print_summary("NGAFID")
print("\nCross-validation Results (NGAFID):")
for metric in ['roc_auc', 'f1', 'mcc']:
    if cv_scores_ngafid[metric]:  # Check if metric exists
        mean_val = np.mean(cv_scores_ngafid[metric])
        std_val = np.std(cv_scores_ngafid[metric])
        print(f"Mean {metric.upper()}: {mean_val:.3f} ± {std_val:.3f}")


ADAPT Dataset Evaluation:
Confusion Matrix for ADAPT:
[[2691    2]
 [   0  142]]

Classification Report:
False:
  precision: 1.000
  recall: 0.999
  f1-score: 1.000
  support: 2693.000
True:
  precision: 0.986
  recall: 1.000
  f1-score: 0.993
  support: 142.000
accuracy: 0.999
macro avg:
  precision: 0.993
  recall: 1.000
  f1-score: 0.996
  support: 2835.000
weighted avg:
  precision: 0.999
  recall: 0.999
  f1-score: 0.999
  support: 2835.000

Additional Metrics:
ROC-AUC Score: 1.000
Average Precision Score: 1.000
Matthews Correlation Coefficient: 0.993
Specificity: 0.999
Sensitivity: 1.000
F1 Score: 0.993

Cross-validation Results (ADAPT):

NGAFID Dataset Evaluation:
Confusion Matrix for NGAFID:
[[    0 10301]
 [    1  1144]]

Classification Report:
False:
  precision: 0.000
  recall: 0.000
  f1-score: 0.000
  support: 10301.000
True:
  precision: 0.100
  recall: 0.999
  f1-score: 0.182
  support: 1145.000
accuracy: 0.100
macro avg:
  precision: 0.050
  recall: 0.500
  f1-score: 0

## 9. Save Results
Save comprehensive results to JSON:

In [10]:

# Save comprehensive results
logger.info("Saving comprehensive results...")
results_summary = {
    'ADAPT': {
        'metrics': adapt_metrics,
        'cv_scores': cv_scores_adapt,
    },
    'NGAFID': {
        'metrics': ngafid_metrics,
        'cv_scores': cv_scores_ngafid,
    }
}

# Convert numpy arrays to lists for JSON serialization
results_summary = json.loads(
    json.dumps(results_summary, default=lambda x: x.tolist() if isinstance(x, np.ndarray) else x)
)

with open("evaluation_results/comprehensive_results.json", 'w') as f:
    json.dump(results_summary, f, indent=4)

logger.info("Evaluation completed successfully!")

INFO:__main__:Saving comprehensive results...
INFO:__main__:Evaluation completed successfully!


## 10. Visualization
The evaluator automatically generates several plots in the evaluation_results directory:
- Confusion matrices
- ROC curves
- Precision-Recall curves
- Score distributions
- Threshold sensitivity analysis

These visualizations can be found in the `evaluation_results` directory with the following naming convention:
- `confusion_matrix_ADAPT.png`
- `roc_curve_ADAPT.png`
- `precision_recall_ADAPT.png`
etc.