## Compute Metrics

Caleb Phillips (caleb.phillips@nrel.gov) and Jenna Ruzekowicz (jenna.ruzekowicz@nrel.gov)

The purpose of this notebook is to read in computed predictions and compare them to actual observations, computing metrics. We will compute metrics and produce the summary files:

> error_metrics_summary_wtk|wtk_led_2019|wtk_led_2018.csv
  
> provider,site,height_m,model,metric,value
  
And, hour, month, sector summaries:

> results_hms_{metric}.csv
  
> provider,site,height_m,model,hour,month,sector_deg,value

In [1]:
from common import *
import pandas as pd
import numpy as np
from tqdm import tqdm
import glob
import re
from dw_tap.power_output import estimate_power_output

FileNotFoundError: [Errno 2] No such file or directory: '../bergey/bergey_excel10_powercurve.xlsx'

In [2]:
#Two empty dataframes to store the final metrics into
error_dataframe_wtk_2019 = pd.DataFrame()
error_dataframe_wtk = pd.DataFrame()

#Directory path of model data 
directoryPath = "03 Model Outputs/"

#For every file with model results (both wtk and wtk-led)
for file in glob.glob(directoryPath + '*.csv.bz2'):
    model_data_file = pd.read_csv(file)
    
    #If the file is a wtk_led file for 2019, we have hourly ws prediction data
    if file.find("wtk_led_2019.csv") != -1:
        #Pull the turbine id from the name of the file
        tid = re.search("_t[0-9]{3}_", file).group().replace("_", "")
        if file.find("lanl") != -1: model = "lanl"
        elif file.find("anl") != -1: model = "anl"
        
        #Ground truth data comes from reversing the measured power output to windspeed
        ground_truth = pd.read_csv("01 Bergey Turbine Data/prepared_and_combined.csv.bz2")
        ground_truth = ground_truth.loc[ground_truth["tid"] == tid]
        
        #Merge the two data sources (ground truth and wtk-led predicted) into single dataframe
        turbine_data = pd.DataFrame.merge(model_data_file, ground_truth, on="packet_date")
        turbine_data["error"] = turbine_data["ws-adjusted"] - turbine_data["windspeed_mps"]
        
        #Bin timeseries data by hour, month, and sector
        turbine_data['hour'] = turbine_data['packet_date'].astype('datetime64[ns]').dt.hour
        turbine_data['month'] = turbine_data['packet_date'].astype('datetime64[ns]').dt.month
        turbine_data['sector'] = sectorize(turbine_data['wd'])
        
        #Cuts out turbine ids with no ground truth data present (192 and 207 as of 4/16/23)
        if len(turbine_data["windspeed_mps"]) > 0:
            error_metrics_dict = error_metrics(turbine_data["ws-adjusted"], turbine_data["windspeed_mps"])
            error_metrics_dict["source"] = "wtk_led_2019"
            error_metrics_dict["tid"] = tid
            error_metrics_dict["model"] = model
            error_metrics_df = pd.DataFrame(error_metrics_dict, index=[0])
            error_dataframe_wtk_2019 = pd.concat([error_dataframe_wtk_2019, error_metrics_df])
            
            #Visualizations
            #Scatter and histogram plots for individual turbine ids error metrics
            #plot_scatter_and_hist(turbine_data["ws-adjusted"],turbine_data["windspeed_mps"],tid = tid, axrange=[0,16])
            
            #Polar plot for individual turbine ids error metrics
            #plotpolar(turbine_data)
            
            #12x24 heatmap for individual turbine ids error metrics
            #heat_table = plot1224heatmap(turbine_data)
            
            #Save site-specific 12x24 summaries (heat_table), can only save if previous viz line is run
            #heat_table.to_csv("04 Error Metrics/1224summary_wtk_led_2019_" + tid + '_' + model + ".csv.bz2")
        
"""
    if file.find("wtk.csv") != -1:
        #Do processing for wtk data (<2018)
        tid = re.search("_t[0-9]{3}_", file).group().replace("_", "")
        if file.find("lanl") != -1: model = "lanl"
        elif file.find("anl") != -1: model = "anl"
        print(model_data_file)
        #Read in the ground truth data for daily summaries 
        ground_truth = pd.read_csv("01 Bergey Turbine Data/daily_summaries.csv.bz2")
        #Get rid of everything that is before 2018
        ground_truth = ground_truth[~(ground_truth['date'] >= '2017-12-31')]
        
        print(ground_truth)
        
#    elif file.find("wtk_led_2018") != -1:
        #Do processing for wtk_led_2018 data
"""
print(error_dataframe_wtk_2019)


#Save error_dataframe_wtk_2019
error_dataframe_wtk_2019.to_csv("04 Error Metrics/error_metrics_summary_wtk_led_2019.csv.bz2")

KeyboardInterrupt: 

## Results for WTK LED 2019 Bergey Sites (ANL Model)