In [1]:
import os
import numpy as np
import pandas as pd
import polars as pl

In [2]:
parent_dir = os.path.abspath(r'../data/')
output_dir = os.path.join(parent_dir, 'processed_data', 'RawData_ModelingData_1')

In [None]:
'''import os

parent_dir_param = None

if parent_dir_param is None:
    parent_dir = os.path.abspath(r'../data/') 
else:
    parent_dir = os.path.abspath(parent_dir_param)

output_dir_param = None

if output_dir_param is None:
    output_dir = os.path.join(parent_dir, 'processed_data', 'RawData_ModelingData_1')
else:
    output_dir = os.path.abspath(output_dir_param)'''


In [3]:

parquet_files = [file for file in os.listdir(output_dir) if file.endswith('.parquet')]

dataframes = {}
for parquet_file in parquet_files:
    parquet_file_path = os.path.join(output_dir, parquet_file)
    dataframe_name = os.path.splitext(parquet_file)[0] 
    dataframes[dataframe_name] = pd.read_parquet(parquet_file_path)

big_raw_train_df = dataframes['big_raw_train_df']
big_raw_test_df = dataframes['big_raw_test_df']



In [None]:

import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from catboost import CatBoostRegressor


input_columns = ['Pitch_Angle_B1', 'Pitch_Angle_B2', 'Pitch_Angle_B3', 'Rotorposition',
                 'Generator_Speed', 'Rotor_Speed', 'Generator_Torque', 'Power', 'YAW_Bearing_THRUST']
target_column = 'Wind_Speed'

input_features_train = big_raw_train_df[input_columns].values
target_train = big_raw_train_df[target_column].values

input_features_test = big_raw_test_df[input_columns].values
target_test = big_raw_test_df[target_column].values

scaler = MinMaxScaler()
input_features_train = scaler.fit_transform(input_features_train)
input_features_test = scaler.transform(input_features_test)

window_size = 100
future_steps = 15 

models = []

for step in range(future_steps):
    X_train = []
    y_train = []

    for i in range(0, len(input_features_train) - window_size - step):
        X_train.append(input_features_train[i : i + window_size].flatten())
        y_train.append(target_train[i + window_size + step])

    X_train = np.array(X_train)
    y_train = np.array(y_train)

    model = CatBoostRegressor(iterations=1000, learning_rate=0.1, depth=6)
    model.fit(X_train, y_train)
    
    models.append(model)

y_preds = []

for step in range(future_steps):
    X_test = []

    for i in range(0, len(input_features_test) - window_size - step):
        X_test.append(input_features_test[i : i + window_size].flatten())

    X_test = np.array(X_test)

    y_pred = models[step].predict(X_test)
    y_preds.append(y_pred)

for step in range(future_steps):
    mae = mean_absolute_error(target_test[window_size + step:], y_preds[step])
    mse = mean_squared_error(target_test[window_size + step:], y_preds[step])
    rmse = np.sqrt(mse)
    r2 = r2_score(target_test[window_size + step:], y_preds[step])

    print(f"Step {step + 1}:")
    print(f"Mean Absolute Error (MAE): {mae}")
    print(f"Mean Squared Error (MSE): {mse}")
    print(f"Root Mean Squared Error (RMSE): {rmse}")
    print(f"R-squared (R2) Score: {r2}")