### Forecasting Wind Turbine Power Output

#### Week 1

##### Data
The Wind Turbine Scada Dataset (year 2018).



In [4]:
# Importing Dataset
import pandas as pd
turbine_data = pd.read_csv('WindTurbine_Data.csv')
turbine_data.head(10)

Unnamed: 0,Date/Time,LV ActivePower (kW),Wind Speed (m/s),Theoretical_Power_Curve (KWh),Wind Direction (°)
0,01 01 2018 00:00,380.047791,5.311336,416.328908,259.994904
1,01 01 2018 00:10,453.769196,5.672167,519.917511,268.641113
2,01 01 2018 00:20,306.376587,5.216037,390.900016,272.564789
3,01 01 2018 00:30,419.645904,5.659674,516.127569,271.258087
4,01 01 2018 00:40,380.650696,5.577941,491.702972,265.674286
5,01 01 2018 00:50,402.391998,5.604052,499.436385,264.578613
6,01 01 2018 01:00,447.605713,5.793008,557.372363,266.163605
7,01 01 2018 01:10,387.242188,5.30605,414.898179,257.949493
8,01 01 2018 01:20,463.651215,5.584629,493.677652,253.480698
9,01 01 2018 01:30,439.725708,5.523228,475.706783,258.723785


##### Preprocessing

In [5]:
turbine_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50530 entries, 0 to 50529
Data columns (total 5 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Date/Time                      50530 non-null  object 
 1   LV ActivePower (kW)            50530 non-null  float64
 2   Wind Speed (m/s)               50530 non-null  float64
 3   Theoretical_Power_Curve (KWh)  50530 non-null  float64
 4   Wind Direction (°)             50530 non-null  float64
dtypes: float64(4), object(1)
memory usage: 1.9+ MB


In [6]:
# Find and remove Null values
turbine_data.isnull().sum()

Unnamed: 0,0
Date/Time,0
LV ActivePower (kW),0
Wind Speed (m/s),0
Theoretical_Power_Curve (KWh),0
Wind Direction (°),0


In [7]:
# Duplicate values
turbine_data.duplicated().sum()

np.int64(0)

In [None]:
# # Unit check
# turbine_data['LV ActivePower (kW)'	'Wind Speed (m/s)'	'Theoretical_Power_Curve (KWh)'	'Wind Direction (°)'].unique()

In [8]:
# Converting Date/time same format.
turbine_data['Date/Time'] = pd.to_datetime(turbine_data['Date/Time'], format='%d %m %Y %H:%M')
# keeping it time ordered
turbine_data = turbine_data.sort_values(by='Date/Time')

display(turbine_data.head())

Unnamed: 0,Date/Time,LV ActivePower (kW),Wind Speed (m/s),Theoretical_Power_Curve (KWh),Wind Direction (°)
0,2018-01-01 00:00:00,380.047791,5.311336,416.328908,259.994904
1,2018-01-01 00:10:00,453.769196,5.672167,519.917511,268.641113
2,2018-01-01 00:20:00,306.376587,5.216037,390.900016,272.564789
3,2018-01-01 00:30:00,419.645904,5.659674,516.127569,271.258087
4,2018-01-01 00:40:00,380.650696,5.577941,491.702972,265.674286


##### Feature Engineering

In [11]:
# wind speed cubed
turbine_data['Wind_Speed_Cubed'] = turbine_data['Wind Speed (m/s)'] ** 3  # Physics: P ∝ v³

#Dir_Sin & Dir_Cos for 0 to 360 degree to radinas
import numpy as np
turbine_data['Dir_Sin'] = np.sin(np.deg2rad(turbine_data['Wind Direction (°)']))
turbine_data['Dir_Cos'] = np.cos(np.deg2rad(turbine_data['Wind Direction (°)']))

#Rolling spreed to smooth guts
turbine_data['Rolling_Speed'] = turbine_data['Wind Speed (m/s)'].rolling(window=3, min_periods=1, center=True).mean()

#Theoretical_kW: Unit fix (kWh *6 for 10-min → kW)
turbine_data['Theoretical_kW'] = turbine_data['Theoretical_Power_Curve (KWh)'] * 6

#Deficit & Efficiency: Post-pred diagnostics (add now for EDA)
turbine_data['Power_Deficit'] = turbine_data['Theoretical_kW'] - turbine_data['LV ActivePower (kW)']
turbine_data['Efficiency (%)'] = (turbine_data['LV ActivePower (kW)'] / turbine_data['Theoretical_kW']) * 100
turbine_data['Efficiency (%)'] = turbine_data['Efficiency (%)'].clip(0, 100)  # Bound

print(turbine_data[['Wind Speed (m/s)', 'Wind_Speed_Cubed', 'Dir_Sin', 'Dir_Cos', 'Rolling_Speed', 'Theoretical_kW', 'Power_Deficit', 'Efficiency (%)']].head())
# Save: turbine_data.to_csv('New_turbine_data.csv', index=False)


   Wind Speed (m/s)  Wind_Speed_Cubed   Dir_Sin   Dir_Cos  Rolling_Speed  \
0          0.210717          0.009356  0.012605  0.999921       0.217875   
1          0.225032          0.011396  0.013024  0.999915       0.214229   
2          0.206936          0.008862  0.013214  0.999913       0.218835   
3          0.224537          0.011320  0.013151  0.999914       0.217589   
4          0.221294          0.010837  0.012880  0.999917       0.222720   

   Theoretical_kW  Power_Deficit  Efficiency (%)  
0        0.693882       0.588248       15.223513  
1        0.866529       0.740538       14.539776  
2        0.651500       0.566211       13.091135  
3        0.860213       0.743644       13.551093  
4        0.819505       0.713705       12.910189  
