## Wind Farm A Code 3 (Strict for Front Row Turbines)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import fsolve
import os

# List of dataset IDs
dataset_ids = [0,13,14,22,24,26,38,45,51,71,72,73,84]   ##Based on Turbine Layout, these datasets belong to front row

# Paths
input_dir = r"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm A\Wind Farm A\datasets"
output_dir = r"D:\Master Thesis New Data Set\Final DataSet\Wind Farm A Code 3"
os.makedirs(output_dir, exist_ok=True)

# Ideal power curve function
def get_ideal_power(wind_speed, cut_in, rated_speed, rated_power):
    if wind_speed < cut_in:
        return 0
    elif wind_speed < rated_speed:
        return rated_power * ((wind_speed - cut_in) / (rated_speed - cut_in))**3
    else:
        return rated_power

# Loop through each dataset
for dataset_id in dataset_ids:
    try:
        print(f"\n📂 Processing dataset {dataset_id}...")

        # Load data
        file_path = os.path.join(input_dir, f"{dataset_id}.csv")
        df = pd.read_csv(file_path, delimiter=';')
        df_full = df.copy()  # Preserve original for export

        # Ensure positive power values
        df['sensor_50'] = df['sensor_50'].apply(lambda x: max(x, 0))

        # Step 1: Filter for normal operation (to calculate rated region)
        df_filtered = df[
            (df['sensor_5_avg'] <= 2) &
            (df['sensor_5_avg'] >= -5) &
            (df['sensor_2_avg'] >= -10) &
            (df['sensor_2_avg'] <= 10) &
            (df['status_type_id'] == 0) &
            (df['train_test'] == 'train')
        ].dropna(subset=['wind_speed_3_avg', 'sensor_50'])

        df_sorted = df_filtered.sort_values(by='wind_speed_3_avg')
        max_power = df_sorted['sensor_50'].max()

        # Rated band within 98%–102% of max power
        rated_band = df_sorted[
            (df_sorted['sensor_50'] >= max_power * 0.98) &
            (df_sorted['sensor_50'] <= max_power * 1.02) &
            (df_sorted['wind_speed_3_avg'] >= 11.2) &
            (df_sorted['wind_speed_3_avg'] <= 12)
        ]

        if rated_band.empty:
            raise ValueError("Rated band empty — check input data.")

        rated_speed = rated_band['wind_speed_3_avg'].min()
        rated_power = max_power

        # Step 2: Estimate cut-in speed using min power around 3 m/s
        wind_speed_3_df = df_sorted[
            (df_sorted['wind_speed_3_avg'] >= 2.98) &
            (df_sorted['wind_speed_3_avg'] <= 3.02)
        ]
        min_power_at_3 = wind_speed_3_df['sensor_50'].min()

        def equation(cut_in): 
            return rated_power * ((3 - cut_in) / (rated_speed - cut_in))**3 - min_power_at_3

        cut_in = fsolve(equation, 1.0)[0]

        # --- Status reassignment ---
        def reassign_status(row):
            wind_speed = row['wind_speed_3_avg']
            pitch_deg = row['sensor_5_avg']
            actual_power = row['sensor_50']
            current_status = row['status_type_id']

            if pd.isna(wind_speed) or pd.isna(pitch_deg) or pd.isna(actual_power):
                return current_status  # leave unchanged if missing

            if wind_speed < 3:
                return current_status
                
            # Compute tolerance multiplier based on pitch
            pitch_rad = np.deg2rad(pitch_deg)
            tol_mult = np.clip(np.cos(pitch_rad)**3, 0, 1)

            ideal_p = get_ideal_power(wind_speed, cut_in, rated_speed, rated_power)
            threshold = ideal_p * tol_mult

            # Special rule for very low power
            if current_status in [3, 4]:
                if actual_power <= 0.10*rated_power:
                    return current_status
                return 0 if actual_power >= threshold else 5

            return 0 if actual_power >= threshold else 5

        # Apply reclassification
        df['status_type_id'] = df.apply(reassign_status, axis=1)

        # Update original dataframe with new status
        df_full['status_type_id'] = df['status_type_id']

        # Save result
        output_path = os.path.join(output_dir, f"{dataset_id}_WindFarm_A.csv")
        df_full.to_csv(output_path, index=False)
        print(f"✅ Dataset {dataset_id} processed and saved.")

    except Exception as e:
        print(f"❌ Failed to process dataset {dataset_id}: {e}")



📂 Processing dataset 0...
✅ Dataset 0 processed and saved.

📂 Processing dataset 3...
✅ Dataset 3 processed and saved.

📂 Processing dataset 10...
✅ Dataset 10 processed and saved.

📂 Processing dataset 13...
✅ Dataset 13 processed and saved.

📂 Processing dataset 14...
✅ Dataset 14 processed and saved.

📂 Processing dataset 17...
✅ Dataset 17 processed and saved.

📂 Processing dataset 22...
✅ Dataset 22 processed and saved.

📂 Processing dataset 24...
✅ Dataset 24 processed and saved.

📂 Processing dataset 25...
✅ Dataset 25 processed and saved.

📂 Processing dataset 26...
✅ Dataset 26 processed and saved.

📂 Processing dataset 38...
✅ Dataset 38 processed and saved.

📂 Processing dataset 40...
✅ Dataset 40 processed and saved.

📂 Processing dataset 42...
✅ Dataset 42 processed and saved.

📂 Processing dataset 45...
✅ Dataset 45 processed and saved.

📂 Processing dataset 51...
✅ Dataset 51 processed and saved.

📂 Processing dataset 68...
✅ Dataset 68 processed and saved.

📂 Processin

  improvement from the last ten iterations.
  cut_in = fsolve(equation, 1.0)[0]


✅ Dataset 71 processed and saved.

📂 Processing dataset 72...
✅ Dataset 72 processed and saved.

📂 Processing dataset 73...
✅ Dataset 73 processed and saved.

📂 Processing dataset 84...
✅ Dataset 84 processed and saved.

📂 Processing dataset 92...
✅ Dataset 92 processed and saved.
