## Wind Farm C Code 3 (Strict for Front Row Turbines)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import fsolve
import os

# List of dataset IDs
dataset_ids = [8,11,12,28,32,33,36,39,43,44,54,55,61,63,75,90,91,93] ##As per determined windfarm layout, these datasets belong to front row


# Paths
input_dir = r"D:\Master Thesis New Data Set\CARE DATA SET\CARE_To_Compare\Wind Farm C\Wind Farm C\datasets"
output_dir = r"D:\Master Thesis New Data Set\Final DataSet\Wind Farm C Code 3"
os.makedirs(output_dir, exist_ok=True)

# Function to calculate ideal power
def get_ideal_power(wind_speed, cut_in, rated_speed, rated_power):
    if wind_speed < cut_in:
        return 0
    elif wind_speed < rated_speed:
        return rated_power * ((wind_speed - cut_in) / (rated_speed - cut_in))**3
    else:
        return rated_power

# Loop through each dataset
for dataset_id in dataset_ids:
    try:
        print(f"\n📂 Processing dataset {dataset_id}...")

        # Load data
        file_path = os.path.join(input_dir, f"{dataset_id}.csv")
        df = pd.read_csv(file_path, delimiter=';')
        df_2 = df.copy()

        # Select relevant columns
        cols = ['power_2_avg','power_6_avg', 'sensor_76_avg', 'sensor_124_avg', 'status_type_id', 'train_test', 'wind_speed_236_avg']
        df = df[cols]
        df['power_6_avg'] = df['power_6_avg'].apply(lambda x: max(x, 0))

        # Filter
        df_filtered = df[
            (df['sensor_76_avg'] <= 2) & 
            (df['sensor_76_avg'] >= -5) & 
            (df['sensor_124_avg'] >= -2) & 
            (df['sensor_124_avg'] <= 2) & 
            (df['status_type_id'] == 0) &
            (df['train_test'] == 'train')
        ].dropna(subset=['wind_speed_236_avg', 'power_6_avg'])

        df_sorted = df_filtered.sort_values(by='wind_speed_236_avg')
        original_max_power = df_sorted['power_6_avg'].max()
        df_sorted['power_6_avg'] = df_sorted['power_6_avg'] / original_max_power

        rated_band = df_sorted[
            (df_sorted['power_6_avg'] >= 0.98) & 
            (df_sorted['power_6_avg'] <= 1.02) & 
            (df_sorted['wind_speed_236_avg'] >= 10) & 
            (df_sorted['wind_speed_236_avg'] <= 12)
        ]
        rated_speed = rated_band['wind_speed_236_avg'].min()
        rated_power = 1

        wind_speed_3_df = df_sorted[(df_sorted['wind_speed_236_avg'] >= 2.98) & (df_sorted['wind_speed_236_avg'] <= 3.02)]
        min_power_at_3 = wind_speed_3_df['power_6_avg'].min()

        # Solve for cut-in speed
        def equation(cut_in): return rated_power * ((3 - cut_in) / (rated_speed - cut_in))**3 - min_power_at_3
        cut_in = fsolve(equation, 1.0)[0]

        # Normalize full dataset
        df['power_6_avg_norm'] = df['power_6_avg'] / original_max_power

        # Status reassignment
        def reassign_status(row):
            wind_speed = row['wind_speed_236_avg']
            pitch_deg = row['sensor_76_avg']
            yaw_deg = row['sensor_124_avg']
            actual_power = row['power_6_avg_norm']
            current_status = row['status_type_id']

                
            if wind_speed < 3:
                return current_status

            pitch_rad = np.deg2rad(pitch_deg)
            yaw_rad = np.deg2rad(yaw_deg)
            tol_mult = (np.cos(pitch_rad)**3) 
            tol_mult = np.clip(tol_mult, 0, 1)

            ideal_power = get_ideal_power(wind_speed, cut_in, rated_speed, rated_power)
            threshold = ideal_power * tol_mult 

            if current_status in [3, 4]:
                if actual_power <= 0.10:
                    return current_status
                return 0 if actual_power >= threshold else 5

            return 0 if actual_power >= threshold else 5

        df['status_type_id_original'] = df['status_type_id']
        print("🔍 Original status_type_id distribution:")
        print(df['status_type_id'].value_counts())

        df['status_type_id'] = df.apply(reassign_status, axis=1)

        print("🔁 New status_type_id distribution:")
        print(df['status_type_id'].value_counts())

        # Save result
        df_2['status_type_id'] = df['status_type_id']
        output_path = os.path.join(output_dir, f"{dataset_id}_WindFarm_C.csv")
        df_2.to_csv(output_path, index=False)
        print(f"✅ Dataset {dataset_id} processed and saved.")
    
    except Exception as e:
        print(f"❌ Failed to process dataset {dataset_id}: {e}")




📂 Processing dataset 1...
🔍 Original status_type_id distribution:
status_type_id
0    47264
3     3375
5     2168
4      762
Name: count, dtype: int64
🔁 New status_type_id distribution:
status_type_id
0    34279
5    15422
3     3343
4      525
Name: count, dtype: int64
✅ Dataset 1 processed and saved.

📂 Processing dataset 4...
🔍 Original status_type_id distribution:
status_type_id
0    49760
3     6076
5      400
4      213
Name: count, dtype: int64
🔁 New status_type_id distribution:
status_type_id
0    35768
5    14505
3     6057
4      119
Name: count, dtype: int64
✅ Dataset 4 processed and saved.

📂 Processing dataset 5...
🔍 Original status_type_id distribution:
status_type_id
0    47640
3     4252
5      520
4      383
Name: count, dtype: int64
🔁 New status_type_id distribution:
status_type_id
0    33568
5    14724
3     4150
4      353
Name: count, dtype: int64
✅ Dataset 5 processed and saved.

📂 Processing dataset 6...
🔍 Original status_type_id distribution:
status_type_id
0  