## Reliability Score

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import os

## Wind Farm A

In [2]:

# === File paths ===
excel_events_path = r"D:\Master Thesis New Data Set\Wind Farm Events\Wind Farm A.xlsx"  # <-- Update this to the actual path to your Excel
gt_base_path = r"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm A\Wind Farm A\datasets"
pred_base_path = r"D:\Master Thesis New Data Set\Final Processed Dataset\Wind Farm A"

# === Constants ===
normal_status_id = 0
abnormal_status_ids = [1, 3, 4, 5]
criticality_threshold = 72  # per author

# === Load event metadata from Excel ===
events_df = pd.read_excel(excel_events_path)
print(events_df.columns)


# === Loop through each dataset in Excel ===
for _, row in events_df.iterrows():
    dataset_id = int(row['Data Set'])
    event_start = row['Event Start ID']
    event_end = row['Event End ID']
    is_abnormal_event = row['Event'].strip().lower() == 'Abnormal'

    ground_truth_path = os.path.join(gt_base_path, f"{dataset_id}.csv")
    predicted_path = os.path.join(pred_base_path, f"{dataset_id}_WindFarm_A_predictions_lgb_smoothed.csv")

    try:
        gt_df = pd.read_csv(ground_truth_path, delimiter=';')
        pred_df = pd.read_csv(predicted_path)
    except FileNotFoundError:
        print(f"⚠️ Missing file(s) for dataset {dataset_id}, skipping.")
        continue

    gt_df = gt_df[gt_df['train_test'].str.lower() == 'prediction'].copy()
    pred_df = pred_df[pred_df['train_test'].str.lower() == 'prediction'].copy()

    def map_to_binary_status(x):
        if x == normal_status_id:
            return 1
        elif x in abnormal_status_ids:
            return 0
        else:
            return 1  # Treat unknowns as normal

    gt_df['adjusted_status'] = gt_df['status_type_id'].apply(map_to_binary_status)

    # Apply override within event window
    event_mask = (gt_df['id'] >= event_start) & (gt_df['id'] <= event_end)
    gt_df.loc[event_mask, 'adjusted_status'] = 0 if is_abnormal_event else 1

    merged_df = pd.merge(
        gt_df[['id', 'adjusted_status']],
        pred_df[['id', 'predicted_status_type_binary_smooth']],
        on='id',
        how='inner'
    )

    merged_df['pred_label'] = merged_df['predicted_status_type_binary_smooth'].apply(
        lambda x: 0 if x == normal_status_id else 1
    )

    def calculate_criticality(gt_statuses, predictions):
        crit = np.zeros(len(gt_statuses) + 1, dtype=int)
        for i in range(len(gt_statuses)):
            if gt_statuses[i] == 0:
                if predictions[i] == 1:
                    crit[i + 1] = crit[i] + 1
                else:
                    crit[i + 1] = max(crit[i] - 1, 0)
            else:
                crit[i + 1] = crit[i]
        return crit[1:]

    criticality = calculate_criticality(
        merged_df['adjusted_status'].values,
        merged_df['pred_label'].values
    )
    max_crit = np.max(criticality)

    # === Final classification ===
    if max_crit >= criticality_threshold:
        print(f"🛑 Dataset {dataset_id} → Predicted: ABNORMAL | Criticality = {max_crit}")
    else:
        print(f"✅ Dataset {dataset_id} → Predicted: NORMAL | Criticality = {max_crit}")


Index(['Serial No.', 'Data Set', 'Event', 'Event Start ID', 'Event End ID'], dtype='object')
🛑 Dataset 0 → Predicted: ABNORMAL | Criticality = 436
✅ Dataset 3 → Predicted: NORMAL | Criticality = 20
✅ Dataset 10 → Predicted: NORMAL | Criticality = 0
✅ Dataset 13 → Predicted: NORMAL | Criticality = 38
✅ Dataset 14 → Predicted: NORMAL | Criticality = 46
✅ Dataset 17 → Predicted: NORMAL | Criticality = 54
✅ Dataset 22 → Predicted: NORMAL | Criticality = 0
✅ Dataset 24 → Predicted: NORMAL | Criticality = 44
✅ Dataset 25 → Predicted: NORMAL | Criticality = 9
🛑 Dataset 26 → Predicted: ABNORMAL | Criticality = 106
✅ Dataset 38 → Predicted: NORMAL | Criticality = 34
🛑 Dataset 40 → Predicted: ABNORMAL | Criticality = 141
✅ Dataset 42 → Predicted: NORMAL | Criticality = 19
✅ Dataset 45 → Predicted: NORMAL | Criticality = 68
✅ Dataset 51 → Predicted: NORMAL | Criticality = 10
✅ Dataset 68 → Predicted: NORMAL | Criticality = 0
✅ Dataset 69 → Predicted: NORMAL | Criticality = 30
✅ Dataset 71 → Predi

## Score Wind Farm A

In [3]:
import pandas as pd
from sklearn.metrics import fbeta_score

# Actual Event vs Light GBM prediction data
data = {
    'Event': [
        'Abnormal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Abnormal',
        'Normal', 'Abnormal', 'Abnormal', 'Abnormal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Abnormal', 'Abnormal',
        'Abnormal', 'Normal'
    ],
    'Light GBM': [
        'Abnormal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal','Normal', 'Normal', 'Abnormal',
        'Normal', 'Abnormal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal','Normal', 'Abnormal',
       'Abnormal', 'Normal'
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Map labels to binary: Abnormal = 1, Normal = 0
label_map = {'Abnormal': 1, 'Normal': 0}
y_true = df['Event'].map(label_map)
y_pred = df['Light GBM'].map(label_map)

# Compute F-beta score (beta = 0.5)
beta = 0.5
f_beta = fbeta_score(y_true, y_pred, beta=beta)

# Output result
print(f"F-beta score (beta={beta}): {f_beta:.4f}")


F-beta score (beta=0.5): 0.8065


## Wind Farm B

In [4]:

# === File paths ===
excel_events_path = r"D:\Master Thesis New Data Set\Wind Farm Events\Wind Farm B.xlsx"  # <-- Update this to the actual path to your Excel
gt_base_path = r"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm B\Wind Farm B\datasets"
pred_base_path = r"D:\Master Thesis New Data Set\Final Processed Dataset\Wind Farm B"

# === Constants ===
normal_status_id = 0
abnormal_status_ids = [1, 3, 4, 5]
criticality_threshold = 72  # per author

# === Load event metadata from Excel ===
events_df = pd.read_excel(excel_events_path)
print(events_df.columns)


# === Loop through each dataset in Excel ===
for _, row in events_df.iterrows():
    dataset_id = int(row['Data Set'])
    event_start = row['Event Start ID']
    event_end = row['Event End ID']
    is_abnormal_event = row['Event'].strip().lower() == 'Abnormal'

    ground_truth_path = os.path.join(gt_base_path, f"{dataset_id}.csv")
    predicted_path = os.path.join(pred_base_path, f"{dataset_id}_WindFarm_B_predictions_lgb_smoothed.csv")

    try:
        gt_df = pd.read_csv(ground_truth_path, delimiter=';')
        pred_df = pd.read_csv(predicted_path)
    except FileNotFoundError:
        print(f"⚠️ Missing file(s) for dataset {dataset_id}, skipping.")
        continue

    gt_df = gt_df[gt_df['train_test'].str.lower() == 'prediction'].copy()
    pred_df = pred_df[pred_df['train_test'].str.lower() == 'prediction'].copy()

    def map_to_binary_status(x):
        if x == normal_status_id:
            return 1
        elif x in abnormal_status_ids:
            return 0
        else:
            return 1  # Treat unknowns as normal

    gt_df['adjusted_status'] = gt_df['status_type_id'].apply(map_to_binary_status)

    # Apply override within event window
    event_mask = (gt_df['id'] >= event_start) & (gt_df['id'] <= event_end)
    gt_df.loc[event_mask, 'adjusted_status'] = 0 if is_abnormal_event else 1

    merged_df = pd.merge(
        gt_df[['id', 'adjusted_status']],
        pred_df[['id', 'predicted_status_type_binary_smooth']],
        on='id',
        how='inner'
    )

    merged_df['pred_label'] = merged_df['predicted_status_type_binary_smooth'].apply(
        lambda x: 0 if x == normal_status_id else 1
    )

    def calculate_criticality(gt_statuses, predictions):
        crit = np.zeros(len(gt_statuses) + 1, dtype=int)
        for i in range(len(gt_statuses)):
            if gt_statuses[i] == 0:
                if predictions[i] == 1:
                    crit[i + 1] = crit[i] + 1
                else:
                    crit[i + 1] = max(crit[i] - 1, 0)
            else:
                crit[i + 1] = crit[i]
        return crit[1:]

    criticality = calculate_criticality(
        merged_df['adjusted_status'].values,
        merged_df['pred_label'].values
    )
    max_crit = np.max(criticality)

    # === Final classification ===
    if max_crit >= criticality_threshold:
        print(f"🛑 Dataset {dataset_id} → Predicted: ABNORMAL | Criticality = {max_crit}")
    else:
        print(f"✅ Dataset {dataset_id} → Predicted: NORMAL | Criticality = {max_crit}")


Index(['Serial No.', 'Data Set', 'Event', 'Event Start ID', 'Event End ID'], dtype='object')
✅ Dataset 2 → Predicted: NORMAL | Criticality = 0
✅ Dataset 7 → Predicted: NORMAL | Criticality = 63
✅ Dataset 19 → Predicted: NORMAL | Criticality = 23
✅ Dataset 21 → Predicted: NORMAL | Criticality = 0
🛑 Dataset 23 → Predicted: ABNORMAL | Criticality = 157
🛑 Dataset 27 → Predicted: ABNORMAL | Criticality = 864
✅ Dataset 34 → Predicted: NORMAL | Criticality = 21
✅ Dataset 52 → Predicted: NORMAL | Criticality = 0
✅ Dataset 53 → Predicted: NORMAL | Criticality = 0
✅ Dataset 74 → Predicted: NORMAL | Criticality = 0
🛑 Dataset 77 → Predicted: ABNORMAL | Criticality = 215
✅ Dataset 82 → Predicted: NORMAL | Criticality = 2
✅ Dataset 83 → Predicted: NORMAL | Criticality = 50
✅ Dataset 86 → Predicted: NORMAL | Criticality = 0
✅ Dataset 87 → Predicted: NORMAL | Criticality = 0


## Score Wind Farm B

In [5]:
import pandas as pd
from sklearn.metrics import fbeta_score

# Actual Event vs Light GBM prediction data
data = {
    'Event': [
        'Normal', 'Abnormal', 'Abnormal', 'Normal', 'Normal', 'Abnormal', 'Abnormal', 'Normal', 'Abnormal',
        'Normal', 'Abnormal',  'Normal', 'Normal', 'Normal',
        'Normal'
    ],
    'Light GBM': [
        'Normal', 'Normal', 'Normal', 'Normal','Abnormal', 'Abnormal', 'Normal', 'Normal', 'Normal',
        'Normal', 'Abnormal',  'Normal', 'Normal', 'Normal',
        'Normal'
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Map labels to binary: Abnormal = 1, Normal = 0
label_map = {'Abnormal': 1, 'Normal': 0}
y_true = df['Event'].map(label_map)
y_pred = df['Light GBM'].map(label_map)

# Compute F-beta score (beta = 0.5)
beta = 0.5
f_beta = fbeta_score(y_true, y_pred, beta=beta)

# Output result
print(f"F-beta score (beta={beta}): {f_beta:.4f}")


F-beta score (beta=0.5): 0.5556


## Wind Farm C

In [2]:

# === File paths ===
excel_events_path = r"D:\Master Thesis New Data Set\Wind Farm Events\Wind Farm C.xlsx"  # <-- Update this to the actual path to your Excel
gt_base_path = r"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm C\Wind Farm C\datasets"
pred_base_path = r"D:\Master Thesis New Data Set\Final Processed Dataset\Wind Farm C"

# === Constants ===
normal_status_id = 0
abnormal_status_ids = [1, 3, 4, 5]
criticality_threshold = 72  # per author

# === Load event metadata from Excel ===
events_df = pd.read_excel(excel_events_path)
print(events_df.columns)


# === Loop through each dataset in Excel ===
for _, row in events_df.iterrows():
    dataset_id = int(row['Data Set'])
    event_start = row['Event Start ID']
    event_end = row['Event End ID']
    is_abnormal_event = row['Event'].strip().lower() == 'Abnormal'

    ground_truth_path = os.path.join(gt_base_path, f"{dataset_id}.csv")
    predicted_path = os.path.join(pred_base_path, f"{dataset_id}_WindFarm_C_predictions_lgb_smoothed.csv")

    try:
        gt_df = pd.read_csv(ground_truth_path, delimiter=';')
        pred_df = pd.read_csv(predicted_path)
    except FileNotFoundError:
        print(f"⚠️ Missing file(s) for dataset {dataset_id}, skipping.")
        continue

    gt_df = gt_df[gt_df['train_test'].str.lower() == 'prediction'].copy()
    pred_df = pred_df[pred_df['train_test'].str.lower() == 'prediction'].copy()

    def map_to_binary_status(x):
        if x == normal_status_id:
            return 1
        elif x in abnormal_status_ids:
            return 0
        else:
            return 1  # Treat unknowns as normal

    gt_df['adjusted_status'] = gt_df['status_type_id'].apply(map_to_binary_status)

    # Apply override within event window
    event_mask = (gt_df['id'] >= event_start) & (gt_df['id'] <= event_end)
    gt_df.loc[event_mask, 'adjusted_status'] = 0 if is_abnormal_event else 1

    merged_df = pd.merge(
        gt_df[['id', 'adjusted_status']],
        pred_df[['id', 'predicted_status_type_binary_smooth']],
        on='id',
        how='inner'
    )

    merged_df['pred_label'] = merged_df['predicted_status_type_binary_smooth'].apply(
        lambda x: 0 if x == normal_status_id else 1
    )

    def calculate_criticality(gt_statuses, predictions):
        crit = np.zeros(len(gt_statuses) + 1, dtype=int)
        for i in range(len(gt_statuses)):
            if gt_statuses[i] == 0:
                if predictions[i] == 1:
                    crit[i + 1] = crit[i] + 1
                else:
                    crit[i + 1] = max(crit[i] - 1, 0)
            else:
                crit[i + 1] = crit[i]
        return crit[1:]

    criticality = calculate_criticality(
        merged_df['adjusted_status'].values,
        merged_df['pred_label'].values
    )
    max_crit = np.max(criticality)

    # === Final classification ===
    if max_crit >= criticality_threshold:
        print(f"🛑 Dataset {dataset_id} → Predicted: ABNORMAL | Criticality = {max_crit}")
    else:
        print(f"✅ Dataset {dataset_id} → Predicted: NORMAL | Criticality = {max_crit}")


Index(['Serial No.', 'Data Set', 'Event', 'Event Start ID', 'Event End ID'], dtype='object')
✅ Dataset 1 → Predicted: NORMAL | Criticality = 1
🛑 Dataset 4 → Predicted: ABNORMAL | Criticality = 212
🛑 Dataset 5 → Predicted: ABNORMAL | Criticality = 74
✅ Dataset 6 → Predicted: NORMAL | Criticality = 0
✅ Dataset 8 → Predicted: NORMAL | Criticality = 0
✅ Dataset 9 → Predicted: NORMAL | Criticality = 5
🛑 Dataset 11 → Predicted: ABNORMAL | Criticality = 864
✅ Dataset 12 → Predicted: NORMAL | Criticality = 4
✅ Dataset 15 → Predicted: NORMAL | Criticality = 2
✅ Dataset 16 → Predicted: NORMAL | Criticality = 0
🛑 Dataset 18 → Predicted: ABNORMAL | Criticality = 581
✅ Dataset 20 → Predicted: NORMAL | Criticality = 0
🛑 Dataset 28 → Predicted: ABNORMAL | Criticality = 119
✅ Dataset 29 → Predicted: NORMAL | Criticality = 1
✅ Dataset 30 → Predicted: NORMAL | Criticality = 3
✅ Dataset 31 → Predicted: NORMAL | Criticality = 1
✅ Dataset 32 → Predicted: NORMAL | Criticality = 1
🛑 Dataset 33 → Predicted: A

## Score Wind Farm C

In [7]:
import pandas as pd
from sklearn.metrics import fbeta_score


# Actual Event vs Light GBM prediction data
data = {
    'Event': [
        'Normal', 'Abnormal', 'Abnormal', 'Normal', 'Normal', 'Abnormal', 'Abnormal', 'Abnormal', 'Abnormal', 'Abnormal',
        'Abnormal', 'Normal', 'Abnormal', 'Normal', 'Abnormal', 'Abnormal', 'Normal', 'Abnormal', 'Abnormal', 'Normal',
        'Normal', 'Abnormal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Abnormal', 'Normal', 'Abnormal', 'Normal',
        'Normal', 'Abnormal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal',
        'Normal', 'Normal', 'Abnormal', 'Abnormal', 'Abnormal', 'Normal', 'Abnormal', 'Abnormal', 'Abnormal', 'Normal',
        'Abnormal', 'Normal', 'Normal', 'Normal', 'Abnormal', 'Abnormal', 'Normal', 'Normal'
    ],
    'Light GBM': [
        'Normal', 'Abnormal', 'Abnormal', 'Normal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Normal','Normal',
        'Abnormal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Normal', 'Normal', 'Abnormal', 'Abnormal','Normal',
        'Normal', 'Normal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Normal', 'Abnormal','Normal',
        'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal','Normal',
        'Normal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Normal','Normal',
        'Normal', 'Normal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Abnormal'
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Map labels to binary: Abnormal = 1, Normal = 0
label_map = {'Abnormal': 1, 'Normal': 0}
y_true = df['Event'].map(label_map)
y_pred = df['Light GBM'].map(label_map)

# Compute F-beta score (beta = 0.5)
beta = 0.5
f_beta = fbeta_score(y_true, y_pred, beta=beta)

# Output result
print(f"F-beta score (beta={beta}): {f_beta:.4f}")



F-beta score (beta=0.5): 0.7595


## Overall Reliability Score

In [8]:
import pandas as pd
from sklearn.metrics import fbeta_score

# Actual Event vs Light GBM prediction data
data = {
    'Event': [
        'Abnormal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Abnormal',
        'Normal', 'Abnormal', 'Abnormal', 'Abnormal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Abnormal', 'Abnormal',
        'Abnormal', 'Normal',
        'Normal', 'Abnormal', 'Abnormal', 'Normal', 'Normal', 'Abnormal', 'Abnormal', 'Normal', 'Abnormal',
        'Normal', 'Abnormal',  'Normal', 'Normal', 'Normal',
        'Normal',
        'Normal', 'Abnormal', 'Abnormal', 'Normal', 'Normal', 'Abnormal', 'Abnormal', 'Abnormal', 'Abnormal', 'Abnormal',
        'Abnormal', 'Normal', 'Abnormal', 'Normal', 'Abnormal', 'Abnormal', 'Normal', 'Abnormal', 'Abnormal', 'Normal',
        'Normal', 'Abnormal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Abnormal', 'Normal', 'Abnormal', 'Normal',
        'Normal', 'Abnormal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal',
        'Normal', 'Normal', 'Abnormal', 'Abnormal', 'Abnormal', 'Normal', 'Abnormal', 'Abnormal', 'Abnormal', 'Normal',
        'Abnormal', 'Normal', 'Normal', 'Normal', 'Abnormal', 'Abnormal', 'Normal', 'Normal'
        
    ],
    'Light GBM': [
        'Abnormal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Abnormal',
        'Normal', 'Abnormal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Abnormal',
        'Abnormal', 'Normal',
        'Normal', 'Normal', 'Normal', 'Normal','Abnormal', 'Abnormal', 'Normal', 'Normal', 'Normal',
        'Normal', 'Abnormal',  'Normal', 'Normal', 'Normal',
        'Normal',
        'Normal', 'Abnormal', 'Abnormal', 'Normal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Normal','Normal',
        'Abnormal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Normal', 'Normal', 'Abnormal', 'Abnormal','Normal',
        'Normal', 'Normal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Normal', 'Abnormal','Normal',
        'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal','Normal',
        'Normal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Normal','Normal',
        'Normal', 'Normal', 'Normal', 'Normal', 'Abnormal', 'Normal', 'Normal', 'Abnormal'
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Map labels to binary: Abnormal = 1, Normal = 0
label_map = {'Abnormal': 1, 'Normal': 0}
y_true = df['Event'].map(label_map)
y_pred = df['Light GBM'].map(label_map)

# Compute F-beta score (beta = 0.5)
beta = 0.5
f_beta = fbeta_score(y_true, y_pred, beta=beta)

# Output result
print(f"F-beta score (beta={beta}): {f_beta:.4f}")


F-beta score (beta=0.5): 0.7422
