In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

In [27]:
def preprocessing(generator_file_path, anemometer_file_path):
    generator_data = pd.read_csv(generator_file_path)
    wind_speed_data = pd.read_csv(anemometer_file_path)
    
    generator_data.drop(columns=['name', 'TURBINEID', 'DEVICETYPE', 'DEVICEID'], inplace=True)
    wind_speed_data.drop(columns=['name', 'windspeed_MW'], inplace=True)
    
    generator_data = generator_data.drop_duplicates(subset='time')
    wind_speed_data = wind_speed_data.drop_duplicates(subset='time')
    
    # Merging both DataFrames based on 'time' column, keeping only the intersection
    merged_df = pd.merge(wind_speed_data, generator_data, on='time', how='inner')
    
    # Filtering MPPT Data
    merged_df['IREG_SPEED'] = pd.to_numeric(merged_df['IREG_SPEED'], errors='coerce')
    filtered_data = merged_df[(merged_df['IREG_SPEED'] >= 400) & (merged_df['IREG_SPEED'] <= 750)]
    
    filtered_data.reset_index(drop=True, inplace=True)
    data=filtered_data.drop(columns=['time'])
    
    data['IREG_UBUS'] = pd.to_numeric(data['IREG_UBUS'], errors='coerce')
    data['IREG_IBUS'] = pd.to_numeric(data['IREG_IBUS'], errors='coerce')
    data['IREG_PWM'] = pd.to_numeric(data['IREG_PWM'], errors='coerce')
    data['IREG_CHOPPER_PWM'] = pd.to_numeric(data['IREG_CHOPPER_PWM'], errors='coerce')
    data['IREG_IGBT_TEMP'] = pd.to_numeric(data['IREG_IGBT_TEMP'], errors='coerce')
    data['IREG_MOTOR_TEMP'] = pd.to_numeric(data['IREG_MOTOR_TEMP'], errors='coerce')
    data['windspeed'] = pd.to_numeric(data['windspeed'], errors='coerce')
    
    X = data.drop(columns=['IREG_CHOPPER_PWM', 'IREG_IGBT_TEMP', 'IREG_MOTOR_TEMP'])
    
    y = X['IREG_SPEED']
    y = pd.DataFrame(y)
    X = X.drop(columns=['IREG_SPEED'])
    
    return X, y

def standardScaling(X, y):
    X_scaler = StandardScaler()
    X_scaler.fit(X)
    X_scaled = X_scaler.transform(X)

    y_scaler = StandardScaler()
    y_scaler.fit(y)
    y_scaled = y_scaler.transform(y)
    
    X_scaled = pd.DataFrame(X_scaled, columns=X.columns)
    y_scaled = pd.DataFrame(y_scaled, columns=y.columns)
    
    return X_scaled, y_scaled, X_scaler, y_scaler

def splitting_dataset(X, y, test_size):
    train_X, val_X, train_y, val_y = train_test_split(X, y, test_size=test_size, random_state=42)
    return train_X, val_X, train_y, val_y

def training(train_X, train_y):
    xgb_model = xgb.XGBRegressor(objective="reg:squarederror", random_state=42)
    xgb_model.fit(train_X, train_y.values.ravel())
    return xgb_model

In [46]:
def full_training(generator_data, anemometer_file_path):
    X, y = preprocessing(generator_data, anemometer_file_path)
    X_scaled, y_scaled, X_scaler, y_scaler = standardScaling(X, y)
    train_X, val_X, train_y, val_y = splitting_dataset(X_scaled, y_scaled, 0.4)
    model = training(train_X, train_y)
    
    preds = model.predict(val_X)
    preds = pd.DataFrame(preds)
    val_y = pd.DataFrame(val_y)
    val_y.reset_index(drop=True, inplace=True)
    
    val_y_unscaled = pd.DataFrame(y_scaler.inverse_transform(val_y))
    preds_unscaled = pd.DataFrame(y_scaler.inverse_transform(preds))
    
    combined_df = pd.concat([val_y_unscaled, preds_unscaled], axis=1)
    combined_df.rename(columns={'IREG_SPEED': 'Actual Turbine Speed', 0: 'Predicted Turbine Speed'}, inplace=True)
#     print("Actual vs Predicted")
#     print(combined_df)
#     print("\n")
    print("Evaluation Metrics")
    print("MAE: ", mean_absolute_error(val_y, preds))
    print("RMSE: ",np.sqrt(mean_squared_error(val_y, preds)))
    print("R2 Score: ", r2_score(val_y, preds))
    
    return model, X_scaler, y_scaler

In [47]:
anemometer_file_path = "moweapi00080_2023_11_16\WindgeberHD.csv"

## Generator 1

In [48]:
generator1 = 'moweapi00080_2023_11_16\HDGeneratorIREG01.csv'
model1, X_scaler1, y_scaler1 = full_training(generator1, anemometer_file_path)

Evaluation Metrics
MAE:  0.04944329288960195
RMSE:  0.08265456310978701
R2 Score:  0.9931783931047864


## Generator 2

In [49]:
generator2 = 'moweapi00080_2023_11_16\HDGeneratorIREG02.csv'
model2, X_scaler2, y_scaler2 = full_training(generator2, anemometer_file_path)

Evaluation Metrics
MAE:  0.04570119019134613
RMSE:  0.0704726732628834
R2 Score:  0.9949168818210984


## Generator 3

In [50]:
generator3 = 'moweapi00080_2023_11_16\HDGeneratorIREG03.csv'
model3, X_scaler3, y_scaler3 = full_training(generator3, anemometer_file_path)

Evaluation Metrics
MAE:  0.05408798233818737
RMSE:  0.07756228297018383
R2 Score:  0.994006810642999


## Generator 4

In [51]:
generator4 = 'moweapi00080_2023_11_16\HDGeneratorIREG04.csv'
model4, X_scaler4, y_scaler4 = full_training(generator4, anemometer_file_path)

Evaluation Metrics
MAE:  0.054768256459593004
RMSE:  0.0758204366341659
R2 Score:  0.9942807407026413


## Generator 5

In [52]:
generator5 = 'moweapi00080_2023_11_16\HDGeneratorIREG01.csv'
model5, X_scaler5, y_scaler5 = full_training(generator5, anemometer_file_path)

Evaluation Metrics
MAE:  0.04944329288960195
RMSE:  0.08265456310978701
R2 Score:  0.9931783931047864


## Generator 6

In [53]:
generator6 = 'moweapi00080_2023_11_16\HDGeneratorIREG01.csv'
model6, X_scaler6, y_scaler6 = full_training(generator6, anemometer_file_path)

Evaluation Metrics
MAE:  0.04944329288960195
RMSE:  0.08265456310978701
R2 Score:  0.9931783931047864


## Generator 7

In [54]:
generator7 = 'moweapi00080_2023_11_16\HDGeneratorIREG01.csv'
model7, X_scaler7, y_scaler7 = full_training(generator7, anemometer_file_path)

Evaluation Metrics
MAE:  0.04944329288960195
RMSE:  0.08265456310978701
R2 Score:  0.9931783931047864


## Generator 8

In [55]:
generator8 = 'moweapi00080_2023_11_16\HDGeneratorIREG01.csv'
model8, X_scaler8, y_scaler8 = full_training(generator8, anemometer_file_path)

Evaluation Metrics
MAE:  0.04944329288960195
RMSE:  0.08265456310978701
R2 Score:  0.9931783931047864


## Generator 9

In [56]:
generator9 = 'moweapi00080_2023_11_16\HDGeneratorIREG09.csv'
model9, X_scaler9, y_scaler9 = full_training(generator9, anemometer_file_path)

Evaluation Metrics
MAE:  0.04527723590985904
RMSE:  0.0719335744589181
R2 Score:  0.9948728165925834


## Generator 10

In [57]:
generator10 = 'moweapi00080_2023_11_16\HDGeneratorIREG10.csv'
model10, X_scaler10, y_scaler10 = full_training(generator10, anemometer_file_path)

Evaluation Metrics
MAE:  0.044852974237620795
RMSE:  0.06692326231629744
R2 Score:  0.9954934458433474


## Generator 11

In [58]:
generator11 = 'moweapi00080_2023_11_16\HDGeneratorIREG01.csv'
model11, X_scaler11, y_scaler11 = full_training(generator11, anemometer_file_path)

Evaluation Metrics
MAE:  0.04944329288960195
RMSE:  0.08265456310978701
R2 Score:  0.9931783931047864


## Generator 12

In [59]:
generator12 = 'moweapi00080_2023_11_16\HDGeneratorIREG12.csv'
model12, X_scaler12, y_scaler12 = full_training(generator12, anemometer_file_path)

Evaluation Metrics
MAE:  0.04312243188939635
RMSE:  0.06212077509254867
R2 Score:  0.9960861463614747


## Generator 13

In [60]:
generator13 = 'moweapi00080_2023_11_16\HDGeneratorIREG13.csv'
model13, X_scaler13, y_scaler13 = full_training(generator13, anemometer_file_path)

Evaluation Metrics
MAE:  0.08913554370597797
RMSE:  0.17417877060602646
R2 Score:  0.970117830211267


## Generator 14

In [61]:
generator14 = 'moweapi00080_2023_11_16\HDGeneratorIREG14.csv'
model14, X_scaler14, y_scaler14 = full_training(generator14, anemometer_file_path)

Evaluation Metrics
MAE:  0.10012145768361556
RMSE:  0.1873399605950894
R2 Score:  0.9646839238268143


## Generator 15

In [62]:
generator15 = 'moweapi00080_2023_11_16\HDGeneratorIREG15.csv'
model15, X_scaler15, y_scaler15 = full_training(generator15, anemometer_file_path)

Evaluation Metrics
MAE:  0.06837777202626666
RMSE:  0.13655949746096605
R2 Score:  0.9816240512561802


## Generator 16

In [63]:
generator16 = 'moweapi00080_2023_11_16\HDGeneratorIREG16.csv'
model16, X_scaler16, y_scaler16 = full_training(generator16, anemometer_file_path)

Evaluation Metrics
MAE:  0.05973248159103547
RMSE:  0.1261700629481125
R2 Score:  0.9832166447762833
