## Performance Analysis for Timeseries (where possible)

Caleb Phillips (caleb.phillips@nrel.gov) and Jenna Ruzekowicz (jenna.ruzekowicz@nrel.gov)

The purpose of this notebook is to read in computed predictions and compare them to actual observations, computing metrics. 

In [None]:
from common import *
import pandas as pd
import numpy as np
from tqdm import tqdm
import glob
import re
from dw_tap.power_output import estimate_power_output
import os.path
import importlib
import power_output
import site_index
import plotly.express as px

### Load Ground Truth (Bergey Timeseries Data)

Since this data starts in 2018, we can only use WTK LED

In [None]:
bergey_timeseries = pd.read_csv("01 Bergey Turbine Data/prepared_and_combined.csv.bz2")
bergey_timeseries.rename(columns={"packet_date":"datetime","windspeed_mps":"ws-observed"},inplace=True)
bergey_timeseries['datetime'] = pd.to_datetime(bergey_timeseries['datetime'],utc=True) # this is slow
bergey_timeseries.head()

In [None]:
index = site_index.SiteIndex()

In [None]:
providers = ["bergey"]
models = ["perera","anl"]
wind_sources = ["wtk_led_2018","wtk_led_2019","wtk_led_bc"]
obs_groups = ["all","bldgsonly"]

dfs = []

for provider in providers:
    for model in models:
        for wind_source in wind_sources:
            for og in obs_groups:
                for tid in index.tids(True):
                    if og == "bldgsonly":
                        fname = f"03 Model Outputs/{provider}_{model}_{tid}_{wind_source}_bldgsonly.csv.bz2"
                    else:
                        fname = f"03 Model Outputs/{provider}_{model}_{tid}_{wind_source}.csv.bz2"
                        
                    if not os.path.exists(fname):
                        continue

                    print(fname)

                    d = pd.read_csv(fname)
                    d["model"] = model
                    d["wind_source"] = wind_source
                    d["provider"] = provider
                    d["tid"] = tid
                    d["obsgroup"] = og
                    dfs.append(d[["model","obsgroup","wind_source","provider","tid","datetime","ws-adjusted"]])

                    if model == "perera":
                        d2 = d.copy()
                        d2["ws-adjusted"] = d2["ws-adjusted-2"]
                        d2["model"] = "shelter"
                        dfs.append(d2[["model","obsgroup","wind_source","provider","tid","datetime","ws-adjusted"]])

                        d3 = d.copy()
                        d3["ws-adjusted"] = d3["ws-adjusted-3"]
                        d3["model"] = "shelter+"
                        dfs.append(d3[["model","obsgroup","wind_source","provider","tid","datetime","ws-adjusted"]])

In [None]:
# No model data
for provider in providers:
    for wind_source in tqdm(wind_sources):
        if wind_source == "wtk_led_2018":
            d = pd.read_csv("01 Bergey Turbine Data/wtk_led_2018.csv.bz2")
            d.rename(columns={"ws":"ws-adjusted","packet_date":"datetime"},inplace=True)
            
        elif wind_source == "wtk_led_2019":
            d = pd.read_csv("01 Bergey Turbine Data/wtk_led_2019.csv.bz2")
            d.rename(columns={"ws":"ws-adjusted","packet_date":"datetime"},inplace=True)
            
        elif wind_source == "wtk_led_bc":
            d = pd.read_csv("02 Bias Correction/wtk_led_bc.csv.bz2")
            d.rename(columns={"ws_bc":"ws-adjusted"},inplace=True)
            
        else:
            print("Unsupported wind source")
            
        d["wind_source"] = wind_source
        d["model"] = "none"
        d["provider"] = provider
        d["obsgroup"] = "none"
        dfs.append(d[["model","wind_source","provider","obsgroup","tid","datetime","ws-adjusted"]])

In [None]:
bigdf = pd.concat(dfs)
bigdf.head()

In [None]:
#bigdf["power_kw"] = power_output.Bergey10.windspeed_to_kw(bigdf,'ws-adjusted')
bigdf['datetime'] = pd.to_datetime(bigdf['datetime'],format="%Y-%m-%d %H:%M:%S",utc=True) # this is slow
#bigdf['date'] = bigdf['datetime'].dt.date
bigdf.head()

### Create Merged Dataframe

In [None]:
merged_df = bigdf.merge(bergey_timeseries,on=['tid','datetime'])
merged_df.head()

In [None]:
merged_df['error'] = merged_df['ws-adjusted'] - merged_df['ws-observed']
merged_df.describe()

### Performance Plots: Overall

In [None]:
fig = px.scatter(merged_df,x="ws-observed",y="ws-adjusted",
                 facet_row="model",facet_col="wind_source",
                 labels={"ws-observed":"Obs(mps)","ws-adjusted":"Pred(mps)"})
fig.update_yaxes(matches=None)
fig.update_xaxes(matches=None)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.show()

In [None]:
fig = px.density_contour(merged_df,x="ws-observed",y="ws-adjusted",
                 facet_row="model",facet_col="wind_source",
                 labels={"ws-observed":"Obs(mps)","ws-adjusted":"Pred(mps)"})
fig.update_xaxes(range=[0, 15])
fig.update_yaxes(range=[0, 15])
fig.update_traces(contours_coloring="fill", contours_showlabels = True)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.show()

In [None]:
fig = px.histogram(merged_df,x="error",
                 facet_row="model",facet_col="wind_source",
                 labels={"ws-observed":"Obs(mps)","ws-adjusted":"Pred(mps)"},
                 nbins=200)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_xaxes(range=[-20, 20])
fig.update_yaxes(matches=None)
fig.show()

In [None]:
# Defining function for 50th Percentile
p25 = lambda x: x.quantile(0.25)
p75 = lambda x: x.quantile(0.75)
rmse = lambda x: np.sqrt(np.mean(np.power(x,2)))
mae = lambda x: np.mean(np.abs(x))

perf_summary = merged_df.groupby(["model","wind_source","obsgroup"]).\
    agg({ "error": ["mean","median",p25,p75,rmse,mae] }).\
    rename(columns={"<lambda_0>":'25%ile',"<lambda_1>":'75%ile',
                    "<lambda_2>":'rmse',"<lambda_3>":'mae'})
perf_summary.columns = ['_'.join(col) for col in perf_summary.columns.values]
perf_summary.reset_index(inplace=True)
perf_summary

In [None]:
fig = px.bar(perf_summary,x="model",y="error_rmse",facet_row="wind_source",color="model",
             labels={"error_rmse":"RMSE (mps)","model":"Model"})
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.show()

In [None]:
fig = px.box(merged_df,x="model",y="error",color="wind_source",
             labels={"error":"Error (mps)","model":"Model"},
             points=False)
fig.show()