In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from solar_forecasting.utils import simulate_realtime_forecast
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [2]:
data_path = Path("../data")
point_forecast_data = "system_efficient_results.csv"

In [3]:
df = pd.read_csv(data_path / point_forecast_data)

In [4]:
df.head()

Unnamed: 0,timestamp,pv_output_kw,irradiance_direct,irradiance_diffuse,temperature_pv,hour,month,season,temperature_wx,humidity,...,cloudiness,zenith,azimuth_sun,GHI,DNI,DHI,POA_irradiance,Pcs_kW,Pccs_kW,Ppccs_kW
0,2019-01-01 00:00:00+00:00,0.0,0.0,0.0,0.912,0,1,Winter,0.68,98.9,...,0.5029,153.467842,22.640819,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2019-01-01 01:00:00+00:00,0.0,0.0,0.0,0.864,1,1,Winter,0.64,98.94,...,0.6432,147.611733,48.311134,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2019-01-01 02:00:00+00:00,0.0,0.0,0.0,0.833,2,1,Winter,0.64,98.91,...,1.0,139.159465,66.745701,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2019-01-01 03:00:00+00:00,0.0,0.0,0.0,0.793,3,1,Winter,0.64,98.84,...,1.0,129.564773,80.686691,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2019-01-01 04:00:00+00:00,0.0,0.0,0.0,0.63,4,1,Winter,0.5,98.56,...,1.0,119.592585,92.303776,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
features = ["temperature_wx", "cloudiness", "humidity"]
df = df.copy()
df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df.set_index("timestamp")

In [6]:
df.head()

Unnamed: 0_level_0,pv_output_kw,irradiance_direct,irradiance_diffuse,temperature_pv,hour,month,season,temperature_wx,humidity,wind_speed,...,cloudiness,zenith,azimuth_sun,GHI,DNI,DHI,POA_irradiance,Pcs_kW,Pccs_kW,Ppccs_kW
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-01 00:00:00+00:00,0.0,0.0,0.0,0.912,0,1,Winter,0.68,98.9,1.38,...,0.5029,153.467842,22.640819,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-01-01 01:00:00+00:00,0.0,0.0,0.0,0.864,1,1,Winter,0.64,98.94,1.36,...,0.6432,147.611733,48.311134,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-01-01 02:00:00+00:00,0.0,0.0,0.0,0.833,2,1,Winter,0.64,98.91,1.42,...,1.0,139.159465,66.745701,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-01-01 03:00:00+00:00,0.0,0.0,0.0,0.793,3,1,Winter,0.64,98.84,1.45,...,1.0,129.564773,80.686691,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-01-01 04:00:00+00:00,0.0,0.0,0.0,0.63,4,1,Winter,0.5,98.56,1.47,...,1.0,119.592585,92.303776,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8760 entries, 2019-01-01 00:00:00+00:00 to 2019-12-31 23:00:00+00:00
Data columns (total 22 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   pv_output_kw             8760 non-null   float64
 1   irradiance_direct        8760 non-null   float64
 2   irradiance_diffuse       8760 non-null   float64
 3   temperature_pv           8760 non-null   float64
 4   hour                     8760 non-null   int64  
 5   month                    8760 non-null   int64  
 6   season                   8760 non-null   object 
 7   temperature_wx           8760 non-null   float64
 8   humidity                 8760 non-null   float64
 9   wind_speed               8760 non-null   float64
 10  wind_direction           8760 non-null   float64
 11  mean_sea_level_pressure  8760 non-null   float64
 12  cloudiness               8760 non-null   float64
 13  zenith                   8760 

In [8]:
results_df = simulate_realtime_forecast(
    df=df,
    feature_cols=features,
    start_date="2019-01-01",
    end_date="2019-12-31",
    output_col="Ppf_kW",
    target_col="pv_output_kw",
    reference_col="Ppccs_kW",
)

100%|██████████| 365/365 [00:12<00:00, 30.05it/s]


In [9]:
results_df.head()

Unnamed: 0,pv_output_kw,Ppccs_kW,Ppf_kW,lambda_hat
2019-01-14 00:00:00,0.0,0.0,0.0,
2019-01-14 01:00:00,0.0,0.0,0.0,
2019-01-14 02:00:00,0.0,0.0,0.0,
2019-01-14 03:00:00,0.0,0.0,0.0,
2019-01-14 04:00:00,0.0,0.0,0.0,


In [10]:
rmse = np.sqrt(mean_squared_error(results_df["pv_output_kw"], results_df["Ppf_kW"]))
mae = mean_absolute_error(results_df["pv_output_kw"], results_df["Ppf_kW"])
print(f"RMSE: {rmse:.3f}, MAE: {mae:.3f}")

RMSE: 0.444, MAE: 0.235


TODO:
- Validate that results is in KW not in W
- Validate results based on the kW or W
- Evaluate using forecast skill vs persistence
- Evaluate probabilistic forecast