## Coverage Calculations

## Import Libraries

In [1]:
import pandas as pd
import numpy as np

## Wind Farm A

In [2]:

# List of datasets with their event start and end IDs
datasets = [
    (0, 52436, 54447),
    (10, 52611, 53591),
    (22, 51888, 52892),
    (26, 52261, 53269),
    (40, 51363, 55870),
    (42, 52303, 53309),
    (68, 52063, 54076),
    (72, 52497, 53505),
    (73, 52745, 53753),
]
# Status IDs
normal_status_id = 0
abnormal_status_ids = [1, 3, 4, 5]  # Adjust based on your definition


# F-beta parameter
beta = 0.5


f_beta_scores = []

for dataset_id, event_start, event_end in datasets:
    print(f"\nðŸ“‚ Processing Dataset {dataset_id}...")

    # Construct file paths
    ground_truth_path = fr"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm A\Wind Farm A\datasets\{dataset_id}.csv"
    predicted_path = fr"D:\Master Thesis New Data Set\Final Processed Dataset\Wind Farm A\{dataset_id}_WindFarm_A_predictions_lgb_smoothed.csv"

    # Load CSVs
    gt_df = pd.read_csv(ground_truth_path, delimiter=';')
    pred_df = pd.read_csv(predicted_path)

    # Filter to prediction data
    gt_df = gt_df[gt_df['train_test'].str.lower() == 'prediction'].copy()
    pred_df = pred_df[pred_df['train_test'].str.lower() == 'prediction'].copy()

    # Remove abnormal status types from ground truth
    gt_df = gt_df[~gt_df['status_type_id'].isin(abnormal_status_ids)]

    # Assign anomaly labels
    gt_df['anomaly_label'] = 0
    event_mask = (gt_df['id'] >= event_start) & (gt_df['id'] <= event_end)
    gt_df.loc[event_mask, 'anomaly_label'] = 1

    # Merge ground truth with predictions
    merged_df = pd.merge(
        gt_df[['id', 'anomaly_label']],
        pred_df[['id', 'predicted_status_type_binary_smooth']],
        on='id',
        how='inner'
    )

    # Map predictions to binary anomaly labels
    merged_df['pred_label'] = merged_df['predicted_status_type_binary_smooth'].apply(
        lambda x: 1 if x != normal_status_id else 0
    )

    # Compute TP, FN, FP
    tp = np.sum((merged_df['anomaly_label'] == 1) & (merged_df['pred_label'] == 1))
    fn = np.sum((merged_df['anomaly_label'] == 1) & (merged_df['pred_label'] == 0))
    fp = np.sum((merged_df['anomaly_label'] == 0) & (merged_df['pred_label'] == 1))

    # Compute F_beta score
    numerator = (1 + beta ** 2) * tp
    denominator = numerator + (beta ** 2) * fn + fp
    coverage_score = numerator / denominator if denominator > 0 else 0

    f_beta_scores.append(coverage_score)

    print(f"âœ… Dataset {dataset_id}: F_{beta}-Score = {coverage_score:.4f} | TP={tp}, FN={fn}, FP={fp}")

# Average F_beta Score
avg_f_beta_a = np.mean(f_beta_scores)
print(f"\nðŸ”¥ Average F_{beta}-Score across all datasets of wind farm A: {avg_f_beta_a:.4f}")



ðŸ“‚ Processing Dataset 0...
âœ… Dataset 0: F_0.5-Score = 0.5657 | TP=658, FN=1354, FP=293

ðŸ“‚ Processing Dataset 10...
âœ… Dataset 10: F_0.5-Score = 0.2975 | TP=89, FN=891, FP=40

ðŸ“‚ Processing Dataset 22...
âœ… Dataset 22: F_0.5-Score = 0.5589 | TP=203, FN=801, FP=0

ðŸ“‚ Processing Dataset 26...
âœ… Dataset 26: F_0.5-Score = 0.1330 | TP=30, FN=978, FP=0

ðŸ“‚ Processing Dataset 40...
âœ… Dataset 40: F_0.5-Score = 0.7625 | TP=1565, FN=2437, FP=0

ðŸ“‚ Processing Dataset 42...
âœ… Dataset 42: F_0.5-Score = 0.5804 | TP=218, FN=788, FP=0

ðŸ“‚ Processing Dataset 68...
âœ… Dataset 68: F_0.5-Score = 0.9286 | TP=1455, FN=559, FP=0

ðŸ“‚ Processing Dataset 72...
âœ… Dataset 72: F_0.5-Score = 0.0414 | TP=11, FN=997, FP=69

ðŸ“‚ Processing Dataset 73...
âœ… Dataset 73: F_0.5-Score = 0.4632 | TP=249, FN=759, FP=171

ðŸ”¥ Average F_0.5-Score across all datasets of wind farm A: 0.4812


## Wind Farm B

In [3]:

# List of datasets with their event start and end IDs
datasets = [
    (7, 52703, 57167),
    (19, 52673, 55553),
    (27, 52619, 61403),
    (34, 52531, 55699),
    (53, 52559, 58606),
    (77, 52991, 61631),
]

# Status IDs
normal_status_id = 0
abnormal_status_ids = [1, 3, 4, 5]  # Adjust based on your definition

# F-beta parameter
beta = 0.5

f_beta_scores = []

for dataset_id, event_start, event_end in datasets:
    print(f"\nðŸ“‚ Processing Dataset {dataset_id}...")

    # Construct file paths
    ground_truth_path = fr"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm B\Wind Farm B\datasets\{dataset_id}.csv"
    predicted_path = fr"D:\Master Thesis New Data Set\Final Processed Dataset\Wind Farm B\{dataset_id}_WindFarm_B_predictions_lgb_smoothed.csv"

    # Load CSVs
    gt_df = pd.read_csv(ground_truth_path, delimiter=';')
    pred_df = pd.read_csv(predicted_path)

    # Filter to prediction data
    gt_df = gt_df[gt_df['train_test'].str.lower() == 'prediction'].copy()
    pred_df = pred_df[pred_df['train_test'].str.lower() == 'prediction'].copy()

    # Remove abnormal status types from ground truth
    gt_df = gt_df[~gt_df['status_type_id'].isin(abnormal_status_ids)]

    # Assign anomaly labels
    gt_df['anomaly_label'] = 0
    event_mask = (gt_df['id'] >= event_start) & (gt_df['id'] <= event_end)
    gt_df.loc[event_mask, 'anomaly_label'] = 1

    # Merge ground truth with predictions
    merged_df = pd.merge(
        gt_df[['id', 'anomaly_label']],
        pred_df[['id', 'predicted_status_type_binary_smooth']],
        on='id',
        how='inner'
    )

    # Map predictions to binary anomaly labels
    merged_df['pred_label'] = merged_df['predicted_status_type_binary_smooth'].apply(
        lambda x: 1 if x != normal_status_id else 0
    )

    # Compute TP, FN, FP
    tp = np.sum((merged_df['anomaly_label'] == 1) & (merged_df['pred_label'] == 1))
    fn = np.sum((merged_df['anomaly_label'] == 1) & (merged_df['pred_label'] == 0))
    fp = np.sum((merged_df['anomaly_label'] == 0) & (merged_df['pred_label'] == 1))

    # Compute F_beta score
    numerator = (1 + beta ** 2) * tp
    denominator = numerator + (beta ** 2) * fn + fp
    coverage_score = numerator / denominator if denominator > 0 else 0

    f_beta_scores.append(coverage_score)

    print(f"âœ… Dataset {dataset_id}: F_{beta}-Score = {coverage_score:.4f} | TP={tp}, FN={fn}, FP={fp}")

# Average F_beta Score
avg_f_beta_b = np.mean(f_beta_scores)
print(f"\nðŸ”¥ Average F_{beta}-Score across all datasets of wind farm B: {avg_f_beta_b:.4f}")



ðŸ“‚ Processing Dataset 7...
âœ… Dataset 7: F_0.5-Score = 0.4882 | TP=775, FN=3435, FP=157

ðŸ“‚ Processing Dataset 19...
âœ… Dataset 19: F_0.5-Score = 0.8093 | TP=2855, FN=0, FP=841

ðŸ“‚ Processing Dataset 27...
âœ… Dataset 27: F_0.5-Score = 0.6237 | TP=2092, FN=6312, FP=0

ðŸ“‚ Processing Dataset 34...
âœ… Dataset 34: F_0.5-Score = 0.6528 | TP=1754, FN=1292, FP=843

ðŸ“‚ Processing Dataset 53...
âœ… Dataset 53: F_0.5-Score = 0.9386 | TP=4303, FN=1408, FP=0

ðŸ“‚ Processing Dataset 77...
âœ… Dataset 77: F_0.5-Score = 0.9646 | TP=7862, FN=0, FP=361

ðŸ”¥ Average F_0.5-Score across all datasets of wind farm B: 0.7462


## Wind Farm C

In [4]:
import pandas as pd
import numpy as np

# List of datasets with their event start and end IDs
datasets = [
    (4, 52992, 55728),
    (5, 52272, 52794),
    (9, 52992, 56028),
    (11, 52416, 55572),
    (12, 52560, 55818),
    (15, 51984, 54432),
    (16, 51264, 53423),
    (18, 51408, 51983),
    (28, 52704, 55629),
    (30, 52560, 55822),
    (31, 52848, 53868),
    (33, 52848, 55728),
    (35, 51696, 52614),
    (39, 52848, 53582),
    (44, 52704, 62138),
    (47, 52416, 53128),
    (49, 51840, 52437),
    (55, 52848, 55320),
    (66, 51696, 52638),
    (67, 52704, 61056),
    (70, 52560, 55461),
    (76, 51552, 51797),
    (78, 52560, 52857),
    (79, 52704, 52992),
    (81, 52704, 53067),
    (90, 52848, 54591),
    (91, 52704, 55599),
]

# Status IDs
normal_status_id = 0
abnormal_status_ids = [1, 3, 4, 5]  # Adjust based on your definition


# F-beta parameter
beta = 0.5

f_beta_scores = []

for dataset_id, event_start, event_end in datasets:
    print(f"\nðŸ“‚ Processing Dataset {dataset_id}...")


    # Construct file paths
    ground_truth_path = fr"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm C\Wind Farm C\datasets\{dataset_id}.csv"
    predicted_path = fr"D:\Master Thesis New Data Set\Final Processed Dataset\Wind Farm C\{dataset_id}_WindFarm_C_predictions_lgb_smoothed.csv"

    # Load CSVs
    gt_df = pd.read_csv(ground_truth_path, delimiter=';')
    pred_df = pd.read_csv(predicted_path)

    # Filter to prediction data
    gt_df = gt_df[gt_df['train_test'].str.lower() == 'prediction'].copy()
    pred_df = pred_df[pred_df['train_test'].str.lower() == 'prediction'].copy()

    # Remove abnormal status types from ground truth
    gt_df = gt_df[~gt_df['status_type_id'].isin(abnormal_status_ids)]

    # Assign anomaly labels
    gt_df['anomaly_label'] = 0
    event_mask = (gt_df['id'] >= event_start) & (gt_df['id'] <= event_end)
    gt_df.loc[event_mask, 'anomaly_label'] = 1

    # Merge ground truth with predictions
    merged_df = pd.merge(
        gt_df[['id', 'anomaly_label']],
        pred_df[['id', 'predicted_status_type_binary_smooth']],
        on='id',
        how='inner'
    )

    # Map predictions to binary anomaly labels
    merged_df['pred_label'] = merged_df['predicted_status_type_binary_smooth'].apply(
        lambda x: 1 if x != normal_status_id else 0
    )

    # Compute TP, FN, FP
    tp = np.sum((merged_df['anomaly_label'] == 1) & (merged_df['pred_label'] == 1))
    fn = np.sum((merged_df['anomaly_label'] == 1) & (merged_df['pred_label'] == 0))
    fp = np.sum((merged_df['anomaly_label'] == 0) & (merged_df['pred_label'] == 1))

    # Compute F_beta score
    numerator = (1 + beta ** 2) * tp
    denominator = numerator + (beta ** 2) * fn + fp
    coverage_score = numerator / denominator if denominator > 0 else 0

    f_beta_scores.append(coverage_score)

    print(f"âœ… Dataset {dataset_id}: F_{beta}-Score = {coverage_score:.4f} | TP={tp}, FN={fn}, FP={fp}")

# Average F_beta Score
avg_f_beta_c = np.mean(f_beta_scores)
print(f"\nðŸ”¥ Average F_{beta}-Score across all datasets of wind farm C: {avg_f_beta_c:.4f}")



ðŸ“‚ Processing Dataset 4...
âœ… Dataset 4: F_0.5-Score = 0.2289 | TP=236, FN=2359, FP=404

ðŸ“‚ Processing Dataset 5...
âœ… Dataset 5: F_0.5-Score = 0.5920 | TP=110, FN=135, FP=61

ðŸ“‚ Processing Dataset 9...
âœ… Dataset 9: F_0.5-Score = 0.5154 | TP=514, FN=2372, FP=11

ðŸ“‚ Processing Dataset 11...
âœ… Dataset 11: F_0.5-Score = 1.0000 | TP=2101, FN=0, FP=0

ðŸ“‚ Processing Dataset 12...
âœ… Dataset 12: F_0.5-Score = 0.9285 | TP=2949, FN=0, FP=284

ðŸ“‚ Processing Dataset 15...
âœ… Dataset 15: F_0.5-Score = 0.5908 | TP=608, FN=1818, FP=72

ðŸ“‚ Processing Dataset 16...
âœ… Dataset 16: F_0.5-Score = 0.7203 | TP=137, FN=166, FP=25

ðŸ“‚ Processing Dataset 18...
âœ… Dataset 18: F_0.5-Score = 0.7161 | TP=227, FN=326, FP=31

ðŸ“‚ Processing Dataset 28...
âœ… Dataset 28: F_0.5-Score = 0.3418 | TP=257, FN=2118, FP=89

ðŸ“‚ Processing Dataset 30...
âœ… Dataset 30: F_0.5-Score = 0.2727 | TP=74, FN=359, FP=157

ðŸ“‚ Processing Dataset 31...
âœ… Dataset 31: F_0.5-Score = 0.0647 | TP=32, FN=192

## Overall Coverage

In [5]:
avg_f_beta_final = ((avg_f_beta_a * 9) + (avg_f_beta_b * 6) + (avg_f_beta_c * 27))/42     ##For all windfarms
print(f"\nðŸ”¥ Average F_{beta}-Score across all datasets: {avg_f_beta_final:.4f}")


ðŸ”¥ Average F_0.5-Score across all datasets: 0.5023
