In [1]:
%load_ext autoreload
%autoreload 2

In [73]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error


In [None]:
from projectwind.clean import add_timestamps
from projectwind.sampling import get_clean_sequences
from projectwind.pipeline import get_pipeline

In [3]:
from projectwind.data import get_data, split_fit_data, split_test_data, get_samples, get_pipeline

In [4]:
def get_data():
    # Take the parent dirname for the raw data
    parentdir = os.path.dirname(os.path.abspath("__file__"))
    rawdir = os.path.join(parentdir,"raw_data")

    # Output dict
    all_WTG_data = {}
    fit_data = pd.DataFrame()

    # Append all csv data files to a dict("WTG_number" : dataframe)
    for root, directory, file in os.walk(rawdir):
        for WTG_number in range (5):
            print(WTG_number)
            # Train/Val/Test dataset
            # Output format: Dataframe per WTG assembled in a dict("WTG_number": dataframe)
            data = pd.read_csv(root +'/' +file[WTG_number],
                                index_col=0,
                                parse_dates=True,
                                dayfirst=True)

            data.rename(columns={"Desalineación Nacelle y Dirección de Viento Media 10M\n(°)": "Misalignment",
                                    "Media de Potencia Activa 10M\n(kW)": "Power",
                                    "Posición Nacelle Media 10M\n(°)":"Nacelle Orientation",
                                    "Velocidad Rotor Media 10M\n(rpm)":"Rotor Speed",
                                    "Velocidad Viento Media 10M\n(m/s)":"Wind Speed",
                                    "Ángulo Pitch Media 10M\n(°)":"Blade Pitch"},
                                    inplace=True)

            all_WTG_data[WTG_number] = data

    # Prepare df containing scaler fit data (no need for cleaning as there are no outliers)
    # Format: timesteps concatenated / only 6 columns
    for WTG_number in all_WTG_data:
        fit_data = pd.concat((fit_data,all_WTG_data[WTG_number]),ignore_index=True)


    return all_WTG_data, fit_data


In [5]:
data, fit_data = get_data()

0
1
2
3
4


In [58]:
historical_power = data[0]['Power']

In [68]:
historical_power = pd.DataFrame(data=historical_power)
historical_power

Unnamed: 0_level_0,Power,Prediction
Fecha,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-05-01 00:00:00,1159.76,
2019-05-01 00:10:00,1103.73,
2019-05-01 00:20:00,1119.01,
2019-05-01 00:30:00,1281.76,
2019-05-01 00:40:00,1121.60,
...,...,...
2021-09-30 23:10:00,52.39,413.432917
2021-09-30 23:20:00,159.25,415.798889
2021-09-30 23:30:00,246.98,419.390000
2021-09-30 23:40:00,88.34,420.777778


In [60]:
historical_power['Prediction'] = historical_power['Power'].rolling(window = 72).mean()


In [66]:
historical_power.isna().sum()

Power           1
Prediction    143
dtype: int64

In [69]:
baseline = historical_power.dropna()

In [70]:
baseline

Unnamed: 0_level_0,Power,Prediction
Fecha,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-05-01 11:50:00,1907.57,1287.083750
2019-05-01 12:00:00,1638.74,1293.736250
2019-05-01 12:10:00,1410.02,1297.990278
2019-05-01 12:20:00,1094.71,1297.652778
2019-05-01 12:30:00,1440.44,1299.856667
...,...,...
2021-09-30 23:10:00,52.39,413.432917
2021-09-30 23:20:00,159.25,415.798889
2021-09-30 23:30:00,246.98,419.390000
2021-09-30 23:40:00,88.34,420.777778


In [83]:
y_true = baseline.Power
y_pred = baseline.Prediction

mae = mean_absolute_error(y_true, y_pred)

std = baseline.Power.std()

print('mae =' ,mae)
print('Standard deviation =', std)

mae = 424.48346258814735
Standard deviation = 661.4035157315992
