## Accuracy Calculations

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import os

## Wind Farm A

In [2]:

normal_status_id = 0
abnormal_status_ids = [1, 3, 4, 5]

# === Dataset event table ===
events = [
    {"dataset": 3, "start": 52185, "end": 55198},
    {"dataset": 13, "start": 50859, "end": 53865},
    {"dataset": 14, "start": 52584, "end": 53620},
    {"dataset": 17, "start": 52597, "end": 54513},
    {"dataset": 24, "start": 52720, "end": 54714},
    {"dataset": 25, "start": 52289, "end": 54135},
    {"dataset": 38, "start": 52723, "end": 54546},
    {"dataset": 51, "start": 52331, "end": 54003},
    {"dataset": 69, "start": 52364, "end": 54380},
    {"dataset": 71, "start": 52439, "end": 54455},
    {"dataset": 92, "start": 52437, "end": 53778},
]


# === Base paths ===
gt_base = r"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm A\Wind Farm A\datasets" ##CARE Dataset for wind farm A
pred_base = r"D:\Master Thesis New Data Set\Final Processed Dataset\Wind Farm A"    ##Our Processed Dataset for wind farm A


# === Storage for average computation ===
accuracies = []
total_tn = 0
total_fp = 0

# === Loop through all events ===
for event in events:
    dataset_id = event["dataset"]
    event_start = event["start"]
    event_end = event["end"]
    
    # Construct file paths
    gt_path = os.path.join(gt_base, f"{dataset_id}.csv")
    pred_path = os.path.join(pred_base, f"{dataset_id}_WindFarm_A_predictions_lgb_smoothed.csv")
    
    # Load CSVs
    try:
        gt_df = pd.read_csv(gt_path, delimiter=';')
        pred_df = pd.read_csv(pred_path)
    except FileNotFoundError:
        print(f"⚠️ Files for dataset {dataset_id} not found. Skipping.")
        continue

    # Filter to prediction phase
    gt_df = gt_df[gt_df['train_test'].str.lower() == 'prediction'].copy()
    pred_df = pred_df[pred_df['train_test'].str.lower() == 'prediction'].copy()

    # Remove all abnormal status ids from ground truth
    gt_df = gt_df[~gt_df['status_type_id'].isin(abnormal_status_ids)].copy()

    # Optional: Force GT inside event window to normal (if event assumed normal)
    event_mask = (gt_df['id'] >= event_start) & (gt_df['id'] <= event_end)
    gt_df.loc[event_mask, 'status_type_id'] = normal_status_id

    # Assign anomaly label (0 = normal, 1 = anomaly)
    gt_df['anomaly_label'] = gt_df['status_type_id'].apply(lambda x: 0 if x == normal_status_id else 1)

    # Evaluate over entire prediction time frame
    gt_eval_df = gt_df.copy()
    pred_eval_df = pred_df.copy()

    # Merge and classify predictions
    merged_df = pd.merge(
        gt_eval_df[['id', 'anomaly_label']],
        pred_eval_df[['id', 'predicted_status_type_binary_smooth']],
        on='id',
        how='inner'
    )
    merged_df['pred_label'] = merged_df['predicted_status_type_binary_smooth'].apply(lambda x: 0 if x == normal_status_id else 1)

    # Compute TN, FP, Accuracy
    tn = np.sum((merged_df['anomaly_label'] == 0) & (merged_df['pred_label'] == 0))
    fp = np.sum((merged_df['anomaly_label'] == 0) & (merged_df['pred_label'] == 1))
    accuracy = tn / (tn + fp) if (tn + fp) > 0 else 0

    # Store results
    accuracies.append(accuracy)
    total_tn += tn
    total_fp += fp

    # Output per dataset
    print(f"✅ Dataset {dataset_id} | Accuracy: {accuracy:.4f} | TN: {tn}, FP: {fp}")

# === Final average results ===
avg_accuracy_a = np.mean(accuracies) if accuracies else 0.0
print("\n📊 === Final Results ===")
print(f"Average Accuracy: {avg_accuracy_a:.4f}")
print(f"Total TN: {total_tn}, Total FP: {total_fp}")


✅ Dataset 3 | Accuracy: 0.9227 | TN: 2518, FP: 211
✅ Dataset 13 | Accuracy: 0.9523 | TN: 2996, FP: 150
✅ Dataset 14 | Accuracy: 0.8741 | TN: 1395, FP: 201
✅ Dataset 17 | Accuracy: 0.6494 | TN: 1343, FP: 725
✅ Dataset 24 | Accuracy: 0.9007 | TN: 1987, FP: 219
✅ Dataset 25 | Accuracy: 0.9686 | TN: 1881, FP: 61
✅ Dataset 38 | Accuracy: 0.8426 | TN: 1788, FP: 334
✅ Dataset 51 | Accuracy: 0.9247 | TN: 1793, FP: 146
✅ Dataset 69 | Accuracy: 0.2721 | TN: 560, FP: 1498
✅ Dataset 71 | Accuracy: 0.9653 | TN: 1837, FP: 66
✅ Dataset 92 | Accuracy: 0.9076 | TN: 1375, FP: 140

📊 === Final Results ===
Average Accuracy: 0.8346
Total TN: 19473, Total FP: 3751


## Wind Farm B

In [3]:

# === Constants ===
normal_status_id = 0
abnormal_status_ids = [1, 3, 4, 5]

# === Dataset event table ===
events = [
    {"dataset": 2, "start": 52703, "end": 54629},
    {"dataset": 21, "start": 52217, "end": 53513},
    {"dataset": 23, "start": 52559, "end": 53965},
    {"dataset": 52, "start": 52675, "end": 54691},
    {"dataset": 74, "start": 52817, "end": 54737},
    {"dataset": 82, "start": 52975, "end": 54703},
    {"dataset": 83, "start": 52329, "end": 65433},
    {"dataset": 86, "start": 52703, "end": 54621},
    {"dataset": 87, "start": 52475, "end": 54779},
]


# === Base paths ===
gt_base = r"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm B\Wind Farm B\datasets" ## CARE Dataset for wind farm B
pred_base = r"D:\Master Thesis New Data Set\Final Processed Dataset\Wind Farm B"      ## Our Processed Dataset for wind farm B

accuracies = []
total_tn = 0
total_fp = 0

# === Loop through all events ===
for event in events:
    dataset_id = event["dataset"]
    event_start = event["start"]
    event_end = event["end"]
    
    # Construct file paths
    gt_path = os.path.join(gt_base, f"{dataset_id}.csv")
    pred_path = os.path.join(pred_base, f"{dataset_id}_WindFarm_B_predictions_lgb_smoothed.csv")
    
    # Load CSVs
    try:
        gt_df = pd.read_csv(gt_path, delimiter=';')
        pred_df = pd.read_csv(pred_path)
    except FileNotFoundError:
        print(f"⚠️ Files for dataset {dataset_id} not found. Skipping.")
        continue

    # Filter to prediction phase
    gt_df = gt_df[gt_df['train_test'].str.lower() == 'prediction'].copy()
    pred_df = pred_df[pred_df['train_test'].str.lower() == 'prediction'].copy()

    # Remove all abnormal status ids from ground truth
    gt_df = gt_df[~gt_df['status_type_id'].isin(abnormal_status_ids)].copy()

    # Optional: Force GT inside event window to normal (if event assumed normal)
    event_mask = (gt_df['id'] >= event_start) & (gt_df['id'] <= event_end)
    gt_df.loc[event_mask, 'status_type_id'] = normal_status_id

    # Assign anomaly label (0 = normal, 1 = anomaly)
    gt_df['anomaly_label'] = gt_df['status_type_id'].apply(lambda x: 0 if x == normal_status_id else 1)

    # Evaluate over entire prediction time frame
    gt_eval_df = gt_df.copy()
    pred_eval_df = pred_df.copy()

    # Merge and classify predictions
    merged_df = pd.merge(
        gt_eval_df[['id', 'anomaly_label']],
        pred_eval_df[['id', 'predicted_status_type_binary_smooth']],
        on='id',
        how='inner'
    )
    merged_df['pred_label'] = merged_df['predicted_status_type_binary_smooth'].apply(lambda x: 0 if x == normal_status_id else 1)

    # Compute TN, FP, Accuracy
    tn = np.sum((merged_df['anomaly_label'] == 0) & (merged_df['pred_label'] == 0))
    fp = np.sum((merged_df['anomaly_label'] == 0) & (merged_df['pred_label'] == 1))
    accuracy = tn / (tn + fp) if (tn + fp) > 0 else 0

    # Store results
    accuracies.append(accuracy)
    total_tn += tn
    total_fp += fp

    # Output per dataset
    print(f"✅ Dataset {dataset_id} | Accuracy: {accuracy:.4f} | TN: {tn}, FP: {fp}")

# === Final average results ===
avg_accuracy_b = np.mean(accuracies) if accuracies else 0.0
print("\n📊 === Final Results ===")
print(f"Average Accuracy: {avg_accuracy_b:.4f}")
print(f"Total TN: {total_tn}, Total FP: {total_fp}")






✅ Dataset 2 | Accuracy: 1.0000 | TN: 2160, FP: 0
✅ Dataset 21 | Accuracy: 1.0000 | TN: 1269, FP: 0
✅ Dataset 23 | Accuracy: 0.7281 | TN: 1269, FP: 474
✅ Dataset 52 | Accuracy: 0.9993 | TN: 2728, FP: 2
✅ Dataset 74 | Accuracy: 0.9990 | TN: 3016, FP: 3
✅ Dataset 82 | Accuracy: 0.7382 | TN: 1762, FP: 625
✅ Dataset 83 | Accuracy: 0.8213 | TN: 10561, FP: 2298
✅ Dataset 86 | Accuracy: 1.0000 | TN: 2856, FP: 0
✅ Dataset 87 | Accuracy: 0.8081 | TN: 2371, FP: 563

📊 === Final Results ===
Average Accuracy: 0.8993
Total TN: 27992, Total FP: 3965


## Wind Farm C

In [4]:
import pandas as pd
import numpy as np
import os

# === Constants ===
normal_status_id = 0
abnormal_status_ids = [1, 3, 4, 5]

# === Dataset event table ===
events = [
    {"dataset": 1, "start": 51552, "end": 53136},
    {"dataset": 6, "start": 52560, "end": 54576},
    {"dataset": 8, "start": 52560, "end": 54513},
    {"dataset": 20, "start": 51696, "end": 53136},
    {"dataset": 29, "start": 52704, "end": 54720},
    {"dataset": 32, "start": 52560, "end": 54432},
    {"dataset": 36, "start": 52704, "end": 54439},
    {"dataset": 37, "start": 51552, "end": 53280},
    {"dataset": 41, "start": 51552, "end": 55521},
    {"dataset": 43, "start": 52992, "end": 54864},
    {"dataset": 46, "start": 52560, "end": 54556},
    {"dataset": 48, "start": 52848, "end": 54720},
    {"dataset": 50, "start": 52128, "end": 54144},
    {"dataset": 54, "start": 52992, "end": 54576},
    {"dataset": 56, "start": 51120, "end": 53127},
    {"dataset": 57, "start": 52992, "end": 54720},
    {"dataset": 58, "start": 52992, "end": 54432},
    {"dataset": 59, "start": 52128, "end": 54144},
    {"dataset": 60, "start": 52416, "end": 54000},
    {"dataset": 61, "start": 52992, "end": 55008},
    {"dataset": 62, "start": 52416, "end": 53447},
    {"dataset": 63, "start": 52704, "end": 54144},
    {"dataset": 64, "start": 52560, "end": 54000},
    {"dataset": 65, "start": 52992, "end": 55053},
    {"dataset": 75, "start": 52992, "end": 55728},
    {"dataset": 80, "start": 52560, "end": 54624},
    {"dataset": 85, "start": 50832, "end": 52272},
    {"dataset": 88, "start": 52704, "end": 54720},
    {"dataset": 89, "start": 51840, "end": 53712},
    {"dataset": 93, "start": 52704, "end": 55872},
    {"dataset": 94, "start": 52416, "end": 53856},
]



# === Base paths ===
gt_base = r"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm C\Wind Farm C\datasets" ## CARE Dataset for wind farm C
pred_base = r"D:\Master Thesis New Data Set\Final Processed Dataset\Wind Farm C"  ## Our Final Processed DataSet for wind farm C


# === Storage for average computation ===
accuracies = []
total_tn = 0
total_fp = 0

# === Loop through all events ===
for event in events:
    dataset_id = event["dataset"]
    event_start = event["start"]
    event_end = event["end"]
    
    # Construct file paths
    gt_path = os.path.join(gt_base, f"{dataset_id}.csv")
    pred_path = os.path.join(pred_base, f"{dataset_id}_WindFarm_C_predictions_lgb_smoothed.csv")
    
    # Load CSVs
    try:
        gt_df = pd.read_csv(gt_path, delimiter=';')
        pred_df = pd.read_csv(pred_path)
    except FileNotFoundError:
        print(f"⚠️ Files for dataset {dataset_id} not found. Skipping.")
        continue

    # Filter to prediction phase
    gt_df = gt_df[gt_df['train_test'].str.lower() == 'prediction'].copy()
    pred_df = pred_df[pred_df['train_test'].str.lower() == 'prediction'].copy()

    # Remove all abnormal status ids from ground truth
    gt_df = gt_df[~gt_df['status_type_id'].isin(abnormal_status_ids)].copy()

    # Optional: Force GT inside event window to normal (if event assumed normal)
    event_mask = (gt_df['id'] >= event_start) & (gt_df['id'] <= event_end)
    gt_df.loc[event_mask, 'status_type_id'] = normal_status_id

    # Assign anomaly label (0 = normal, 1 = anomaly)
    gt_df['anomaly_label'] = gt_df['status_type_id'].apply(lambda x: 0 if x == normal_status_id else 1)

    # Evaluate over entire prediction time frame
    gt_eval_df = gt_df.copy()
    pred_eval_df = pred_df.copy()

    # Merge and classify predictions
    merged_df = pd.merge(
        gt_eval_df[['id', 'anomaly_label']],
        pred_eval_df[['id', 'predicted_status_type_binary_smooth']],
        on='id',
        how='inner'
    )
    merged_df['pred_label'] = merged_df['predicted_status_type_binary_smooth'].apply(lambda x: 0 if x == normal_status_id else 1)

    # Compute TN, FP, Accuracy
    tn = np.sum((merged_df['anomaly_label'] == 0) & (merged_df['pred_label'] == 0))
    fp = np.sum((merged_df['anomaly_label'] == 0) & (merged_df['pred_label'] == 1))
    accuracy = tn / (tn + fp) if (tn + fp) > 0 else 0

    # Store results
    accuracies.append(accuracy)
    total_tn += tn
    total_fp += fp

    # Output per dataset
    print(f"✅ Dataset {dataset_id} | Accuracy: {accuracy:.4f} | TN: {tn}, FP: {fp}")

# === Final average results ===
avg_accuracy_c = np.mean(accuracies) if accuracies else 0.0
print("\n📊 === Final Results ===")
print(f"Average Accuracy: {avg_accuracy_c:.4f}")
print(f"Total TN: {total_tn}, Total FP: {total_fp}")


✅ Dataset 1 | Accuracy: 0.8887 | TN: 1885, FP: 236
✅ Dataset 6 | Accuracy: 1.0000 | TN: 2118, FP: 0
✅ Dataset 8 | Accuracy: 0.6841 | TN: 1516, FP: 700
✅ Dataset 20 | Accuracy: 0.9691 | TN: 2415, FP: 77
✅ Dataset 29 | Accuracy: 0.6494 | TN: 1482, FP: 800
✅ Dataset 32 | Accuracy: 0.7191 | TN: 1743, FP: 681
✅ Dataset 36 | Accuracy: 1.0000 | TN: 2847, FP: 0
✅ Dataset 37 | Accuracy: 0.8426 | TN: 2040, FP: 381
✅ Dataset 41 | Accuracy: 0.6561 | TN: 2749, FP: 1441
✅ Dataset 43 | Accuracy: 0.7805 | TN: 1988, FP: 559
✅ Dataset 46 | Accuracy: 1.0000 | TN: 2554, FP: 0
✅ Dataset 48 | Accuracy: 0.8497 | TN: 2295, FP: 406
✅ Dataset 50 | Accuracy: 0.5126 | TN: 1748, FP: 1662
✅ Dataset 54 | Accuracy: 0.3563 | TN: 1054, FP: 1904
✅ Dataset 56 | Accuracy: 0.9846 | TN: 2109, FP: 33
✅ Dataset 57 | Accuracy: 0.9384 | TN: 2254, FP: 148
✅ Dataset 58 | Accuracy: 0.8885 | TN: 1650, FP: 207
✅ Dataset 59 | Accuracy: 0.7997 | TN: 2172, FP: 544
✅ Dataset 60 | Accuracy: 0.4444 | TN: 1056, FP: 1320
✅ Dataset 61 | Accu

## Overall Accuracy

In [5]:
avg_accuracy = ((avg_accuracy_a * 11) + (avg_accuracy_b * 9) + (avg_accuracy_c * 31))/51
print("\n📊 === Final Results ===")
print(f"Average Accuracy: {avg_accuracy:.4f}")


📊 === Final Results ===
Average Accuracy: 0.81
