## Earliness Calculations

## Import Libraries

In [1]:
import pandas as pd
import numpy as np

## Wind Farm A

In [2]:

# List of datasets with their event start and end IDs
datasets = [
    (0, 52436, 54447),
    (10, 52611, 53591),
    (22, 51888, 52892),
    (26, 52261, 53269),
    (40, 51363, 55870),
    (42, 52303, 53309),
    (45, 52731, 53738),
    (68, 52063, 54076),
    (72, 52497, 53505),
    (73, 52745, 53753),
    (84, 52623, 53627),
]

def weight_function(relative_pos):
    # Piecewise weight function as per author:
    # weight = 1 for first half (relative_pos <= 0.5)
    # weight decreases linearly from 1 to 0 in second half
    if relative_pos <= 0.5:
        return 1.0
    else:
        return 2 * (1 - relative_pos)  # linear decrease from 1 to 0 between 0.5 and 1

ws_scores = []


for dataset_id, event_start, event_end in datasets:
    ground_truth_path = fr"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm A\Wind Farm A\datasets\{dataset_id}.csv"
    predicted_path = fr"D:\Master Thesis New Data Set\Final Processed Dataset\Wind Farm A\{dataset_id}_WindFarm_A_predictions_lgb_smoothed.csv"

    is_abnormal_event = True  # all events are abnormal

    # === Load data ===
    gt_df = pd.read_csv(ground_truth_path, delimiter=';')
    pred_df = pd.read_csv(predicted_path)

    # Filter to prediction time frame only
    gt_df = gt_df[gt_df['train_test'].str.lower() == 'prediction'].copy()
    pred_df = pred_df[pred_df['train_test'].str.lower() == 'prediction'].copy()

    # Set anomaly labels
    gt_df['anomaly_label'] = 0
    if is_abnormal_event:
        event_mask = (gt_df['id'] >= event_start) & (gt_df['id'] <= event_end)
        gt_df.loc[event_mask, 'anomaly_label'] = 1

    # Merge with predictions
    merged_df = pd.merge(
        gt_df[['id', 'anomaly_label']],
        pred_df[['id', 'predicted_status_type_binary_smooth']],
        on='id',
        how='inner'
    )

    # Convert predicted_status_type_id to binary labels: 1 = anomaly, 0 = normal
    merged_df['pred_label'] = merged_df['predicted_status_type_binary_smooth'].apply(lambda x: 1 if x != 0 else 0)

    # Filter to event window
    event_df = merged_df[(merged_df['id'] >= event_start) & (merged_df['id'] <= event_end)].copy()

    # Calculate weights for all timestamps in the event window
    event_df['relative_pos'] = (event_df['id'] - event_start) / (event_end - event_start)
    event_df['weight'] = event_df['relative_pos'].apply(weight_function)

    # WS numerator: sum of weights * predicted anomalies
    numerator = (event_df['weight'] * event_df['pred_label']).sum()

    # WS denominator: sum of weights (all timestamps in event window)
    denominator = event_df['weight'].sum()

    ws = numerator / denominator if denominator != 0 else 0

    ws_scores.append(ws)

    print(f"Dataset {dataset_id} âœ¨ Weighted Score (WS) for Event [{event_start}â€“{event_end}]: {ws:.4f}")

# Average WS score
average_ws_a = np.mean(ws_scores)
print(f"\nðŸ”¥ Average Weighted Score (WS) across all datasets of wind farm A: {average_ws_a:.4f}")


Dataset 0 âœ¨ Weighted Score (WS) for Event [52436â€“54447]: 0.4361
Dataset 10 âœ¨ Weighted Score (WS) for Event [52611â€“53591]: 0.1181
Dataset 22 âœ¨ Weighted Score (WS) for Event [51888â€“52892]: 0.2090
Dataset 26 âœ¨ Weighted Score (WS) for Event [52261â€“53269]: 0.0388
Dataset 40 âœ¨ Weighted Score (WS) for Event [51363â€“55870]: 0.3821
Dataset 42 âœ¨ Weighted Score (WS) for Event [52303â€“53309]: 0.1929
Dataset 45 âœ¨ Weighted Score (WS) for Event [52731â€“53738]: 0.1462
Dataset 68 âœ¨ Weighted Score (WS) for Event [52063â€“54076]: 0.7275
Dataset 72 âœ¨ Weighted Score (WS) for Event [52497â€“53505]: 0.0138
Dataset 73 âœ¨ Weighted Score (WS) for Event [52745â€“53753]: 0.1481
Dataset 84 âœ¨ Weighted Score (WS) for Event [52623â€“53627]: 0.3331

ðŸ”¥ Average Weighted Score (WS) across all datasets of wind farm A: 0.2496


## Wind Farm B

In [3]:

# List of datasets with their event start and end IDs
datasets = [
    (7, 52703, 57167),
    (19, 52673, 55553),
    (27, 52619, 61403),
    (34, 52531, 55699),
    (53, 52559, 58606),
    (77, 52991, 61631),
]

def weight_function(relative_pos):
    # Piecewise weight function as per author:
    # weight = 1 for first half (relative_pos <= 0.5)
    # weight decreases linearly from 1 to 0 in second half
    if relative_pos <= 0.5:
        return 1.0
    else:
        return 2 * (1 - relative_pos)  # linear decrease from 1 to 0 between 0.5 and 1

ws_scores = []

for dataset_id, event_start, event_end in datasets:
    ground_truth_path = fr"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm B\Wind Farm B\datasets\{dataset_id}.csv"
    predicted_path = fr"D:\Master Thesis New Data Set\Final Processed Dataset\Wind Farm B\{dataset_id}_WindFarm_B_predictions_lgb_smoothed.csv"

    is_abnormal_event = True  # all events are abnormal

    # === Load data ===
    gt_df = pd.read_csv(ground_truth_path, delimiter=';')
    pred_df = pd.read_csv(predicted_path)

    # Filter to prediction time frame only
    gt_df = gt_df[gt_df['train_test'].str.lower() == 'prediction'].copy()
    pred_df = pred_df[pred_df['train_test'].str.lower() == 'prediction'].copy()

    # Set anomaly labels
    gt_df['anomaly_label'] = 0
    if is_abnormal_event:
        event_mask = (gt_df['id'] >= event_start) & (gt_df['id'] <= event_end)
        gt_df.loc[event_mask, 'anomaly_label'] = 1

    # Merge with predictions
    merged_df = pd.merge(
        gt_df[['id', 'anomaly_label']],
        pred_df[['id', 'predicted_status_type_binary_smooth']],
        on='id',
        how='inner'
    )

    # Convert predicted_status_type_id to binary labels: 1 = anomaly, 0 = normal
    merged_df['pred_label'] = merged_df['predicted_status_type_binary_smooth'].apply(lambda x: 1 if x != 0 else 0)

    # Filter to event window
    event_df = merged_df[(merged_df['id'] >= event_start) & (merged_df['id'] <= event_end)].copy()

    # Calculate weights for all timestamps in the event window
    event_df['relative_pos'] = (event_df['id'] - event_start) / (event_end - event_start)
    event_df['weight'] = event_df['relative_pos'].apply(weight_function)

    # WS numerator: sum of weights * predicted anomalies
    numerator = (event_df['weight'] * event_df['pred_label']).sum()

    # WS denominator: sum of weights (all timestamps in event window)
    denominator = event_df['weight'].sum()

    ws = numerator / denominator if denominator != 0 else 0

    ws_scores.append(ws)

    print(f"Dataset {dataset_id} âœ¨ Weighted Score (WS) for Event [{event_start}â€“{event_end}]: {ws:.4f}")

# Average WS score
average_ws_b = np.mean(ws_scores)
print(f"\nðŸ”¥ Average Weighted Score (WS) across all datasets of wind farm B: {average_ws_b:.4f}")


Dataset 7 âœ¨ Weighted Score (WS) for Event [52703â€“57167]: 0.2177
Dataset 19 âœ¨ Weighted Score (WS) for Event [52673â€“55553]: 1.0000
Dataset 27 âœ¨ Weighted Score (WS) for Event [52619â€“61403]: 0.2486
Dataset 34 âœ¨ Weighted Score (WS) for Event [52531â€“55699]: 0.4286
Dataset 53 âœ¨ Weighted Score (WS) for Event [52559â€“58606]: 0.7809
Dataset 77 âœ¨ Weighted Score (WS) for Event [52991â€“61631]: 1.0000

ðŸ”¥ Average Weighted Score (WS) across all datasets of wind farm B: 0.6126


## Wind Farm C

In [4]:
# List of datasets with their event start and end IDs
datasets = [
    (4, 52992, 55728),
    (5, 52272, 52794),
    (9, 52992, 56028),
    (11, 52416, 55572),
    (12, 52560, 55818),
    (15, 51984, 54432),
    (16, 51264, 53423),
    (18, 51408, 51983),
    (28, 52704, 55629),
    (30, 52560, 55822),
    (31, 52848, 53868),
    (33, 52848, 55728),
    (35, 51696, 52614),
    (39, 52848, 53582),
    (44, 52704, 62138),
    (47, 52416, 53128),
    (49, 51840, 52437),
    (55, 52848, 55320),
    (66, 51696, 52638),
    (67, 52704, 61056),
    (70, 52560, 55461),
    (76, 51552, 51797),
    (78, 52560, 52857),
    (79, 52704, 52992),
    (81, 52704, 53067),
    (90, 52848, 54591),
    (91, 52704, 55599),
]

def weight_function(relative_pos):
    # Piecewise weight function as per author:
    # weight = 1 for first half (relative_pos <= 0.5)
    # weight decreases linearly from 1 to 0 in second half
    if relative_pos <= 0.5:
        return 1.0
    else:
        return 2 * (1 - relative_pos)  # linear decrease from 1 to 0 between 0.5 and 1

ws_scores = []

for dataset_id, event_start, event_end in datasets:
    ground_truth_path = fr"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm C\Wind Farm C\datasets\{dataset_id}.csv"
    predicted_path = fr"D:\Master Thesis New Data Set\Final Processed Dataset\Wind Farm C\{dataset_id}_WindFarm_C_predictions_lgb_smoothed.csv"

    is_abnormal_event = True  # all events are abnormal

    # === Load data ===
    gt_df = pd.read_csv(ground_truth_path, delimiter=';')
    pred_df = pd.read_csv(predicted_path)

    # Filter to prediction time frame only
    gt_df = gt_df[gt_df['train_test'].str.lower() == 'prediction'].copy()
    pred_df = pred_df[pred_df['train_test'].str.lower() == 'prediction'].copy()

    # Set anomaly labels
    gt_df['anomaly_label'] = 0
    if is_abnormal_event:
        event_mask = (gt_df['id'] >= event_start) & (gt_df['id'] <= event_end)
        gt_df.loc[event_mask, 'anomaly_label'] = 1

    # Merge with predictions
    merged_df = pd.merge(
        gt_df[['id', 'anomaly_label']],
        pred_df[['id', 'predicted_status_type_binary_smooth']],
        on='id',
        how='inner'
    )

    # Convert predicted_status_type_id to binary labels: 1 = anomaly, 0 = normal
    merged_df['pred_label'] = merged_df['predicted_status_type_binary_smooth'].apply(lambda x: 1 if x != 0 else 0)

    # Filter to event window
    event_df = merged_df[(merged_df['id'] >= event_start) & (merged_df['id'] <= event_end)].copy()

    # Calculate weights for all timestamps in the event window
    event_df['relative_pos'] = (event_df['id'] - event_start) / (event_end - event_start)
    event_df['weight'] = event_df['relative_pos'].apply(weight_function)

    # WS numerator: sum of weights * predicted anomalies
    numerator = (event_df['weight'] * event_df['pred_label']).sum()

    # WS denominator: sum of weights (all timestamps in event window)
    denominator = event_df['weight'].sum()

    ws = numerator / denominator if denominator != 0 else 0

    ws_scores.append(ws)

    print(f"Dataset {dataset_id} âœ¨ Weighted Score (WS) for Event [{event_start}â€“{event_end}]: {ws:.4f}")

# Average WS score
average_ws_c = np.mean(ws_scores)
print(f"\nðŸ”¥ Average Weighted Score (WS) across all datasets of wind farm C: {average_ws_c:.4f}")


Dataset 4 âœ¨ Weighted Score (WS) for Event [52992â€“55728]: 0.1132
Dataset 5 âœ¨ Weighted Score (WS) for Event [52272â€“52794]: 0.6587
Dataset 9 âœ¨ Weighted Score (WS) for Event [52992â€“56028]: 0.1553
Dataset 11 âœ¨ Weighted Score (WS) for Event [52416â€“55572]: 1.0000
Dataset 12 âœ¨ Weighted Score (WS) for Event [52560â€“55818]: 1.0000
Dataset 15 âœ¨ Weighted Score (WS) for Event [51984â€“54432]: 0.2220
Dataset 16 âœ¨ Weighted Score (WS) for Event [51264â€“53423]: 0.8610
Dataset 18 âœ¨ Weighted Score (WS) for Event [51408â€“51983]: 0.2541
Dataset 28 âœ¨ Weighted Score (WS) for Event [52704â€“55629]: 0.2703
Dataset 30 âœ¨ Weighted Score (WS) for Event [52560â€“55822]: 0.8136
Dataset 31 âœ¨ Weighted Score (WS) for Event [52848â€“53868]: 0.6175
Dataset 33 âœ¨ Weighted Score (WS) for Event [52848â€“55728]: 0.0764
Dataset 35 âœ¨ Weighted Score (WS) for Event [51696â€“52614]: 0.1060
Dataset 39 âœ¨ Weighted Score (WS) for Event [52848â€“53582]: 0.6111
Dataset 44 âœ¨ Weighted Score (WS) fo

## Overall Earliness for All Wind Farms

In [5]:
average_ws =  ((average_ws_a * 11) + (average_ws_b * 6) + (average_ws_c * 27))/44
print(f"\nðŸ”¥ Average Weighted Score (WS) across all datasets of wind farm C: {average_ws:.4f}")


ðŸ”¥ Average Weighted Score (WS) across all datasets of wind farm C: 0.3904
