## Run Legacy Models (Perera)

Caleb Phillips (caleb.phillips@nrel.gov) and Dmitry Duplyakin (dmitry.duplyakin@nrel.gov)

The purpose of this notebook is to run the Legacy "LOM"s which form a baseline for more complex models. These include:

 - Vanilla Perera: The original Perera with infinite length obstacles
 - Perera 2: A version of Perera proposed in the WaSP paper with finite length obstacles
 - Perera 3: A version of Perera proposed in the WaSP paper with asymmetric finite length obstacles
 
In practice, all three models are quite similar.

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
import matplotlib.pyplot as plt
from dw_tap.lom import run_lom
import os
import seaborn as sns
import glob
from dw_tap.data_processing import _LatLon_To_XY, filter_obstacles

from pyproj import Transformer
from shapely.geometry import LineString, Polygon, MultiPolygon, Point, MultiPoint, shape
from shapely.ops import split, nearest_points
import fiona
import perera
import pickle
import common

%matplotlib inline
%config InlineBackend.figure_format='retina'

obstacle_data_dir = "01 Bergey Turbine Data/3dbuildings_geojson"

In [None]:
index = pd.read_csv("01 Bergey Turbine Data/bergey_sites.csv")
index.head()

### Select which sites need to be processed and wind data source

In [None]:
# Small test with several sites
#selected = ["t133", "t135"]

# Process all sites:
selected = index["APRS ID"].tolist()

# Remove 2 sites that currently don't have obstacle descriptions with the heights based on lidar data
selected = [x for x in selected if not(x in ["t007", "t074"])]
print(selected)

# One or more of: ["wtk_bc", wtk", "wtk_led_2018", "wtk_led_2019"]
wind_sources = ["wtk","wtk_bc","wtk_led_2018","wtk_led_2019","wtk_led_bc"] 

### Load wind data

In [None]:
atmospheric_inputs = {}

for wind_source in tqdm(wind_sources):

    dfs_by_tid = {}
    
    if wind_source == "wtk":

        wtk_df = pd.read_csv("01 Bergey Turbine Data/wtk.csv.bz2")
        # Create dict with dataframes that correspond to selected tid's
        for tid in selected:
            dfs_by_tid[tid] = wtk_df[wtk_df["tid"] == tid].reset_index(drop=True)
            #display(dfs_by_tid[tid].head(3))

    elif wind_source == "wtk_led_2018":

        wtk_led_2018 = pd.read_csv("01 Bergey Turbine Data/wtk_led_2018.csv.bz2")

        # Create dict with dataframes that correspond to selected tid's
        for tid in selected:
            dfs_by_tid[tid] = wtk_led_2018[wtk_led_2018["tid"] == tid].copy().reset_index(drop=True)
            dfs_by_tid[tid]["datetime"] = dfs_by_tid[tid]["packet_date"]

    elif wind_source == "wtk_led_2019":

        wtk_led_2019 = pd.read_csv("01 Bergey Turbine Data/wtk_led_2019.csv.bz2")

        # Create dict with dataframes that correspond to selected tid's
        for tid in selected:
            dfs_by_tid[tid] = wtk_led_2019[wtk_led_2019["tid"] == tid].copy().reset_index(drop=True)
            dfs_by_tid[tid]["datetime"] = dfs_by_tid[tid]["packet_date"]

    elif wind_source == "wtk_bc":
        wtk_bc_df = pd.read_csv("02 Bias Correction/wtk_bc.csv.bz2")
        
        # Create dict with dataframes that correspond to selected tid's
        for tid in selected:
            dfs_by_tid[tid] = wtk_bc_df[wtk_bc_df["tid"] == tid].reset_index(drop=True)
            
            # Actually use bias corrected wind speeds for further steps (overwrite original ws)
            dfs_by_tid[tid]["ws"] = dfs_by_tid[tid]["ws_bc"]
        
    elif wind_source == "wtk_led_bc":
        wtk_led_bc_df = pd.read_csv("02 Bias Correction/wtk_led_bc.csv.bz2")
        
        # Create dict with dataframes that correspond to selected tid's
        for tid in selected:
            dfs_by_tid[tid] = wtk_led_bc_df[wtk_led_bc_df["tid"] == tid].reset_index(drop=True)
            
            # Actually use bias corrected wind speeds for further steps (overwrite original ws)
            dfs_by_tid[tid]["ws"] = dfs_by_tid[tid]["ws_bc"]
        
    else:
        print("Unsupported wind_source selected:", wind_source)
        continue
        
    atmospheric_inputs[wind_source] = dfs_by_tid

### Load obstacle data

In [None]:
sites_with_tall_blgs = [] 

obstacle_inputs = {}
for tid in selected:
    
    index_row = index[index["APRS ID"] == tid].iloc[0]
    z_turbine = index_row["Hub Height (m)"]
    
    obstacle_data_dir = "01 Bergey Turbine Data/3dbuildings_geojson"
    obstacle_data_file = "%s/%sv2.json" % (obstacle_data_dir, tid)
    
    if os.path.exists(obstacle_data_file):
        #print("BEFORE filtering (%s):" % obstacle_data_file)
        #display(gpd.read_file(obstacle_data_file))
        
        obstacle_df = filter_obstacles(gpd.read_file(obstacle_data_file), 
                                       include_trees=True, 
                                       turbine_height_for_checking=z_turbine)
        obstacle_df["tid"] = tid
        obstacle_inputs[tid] = obstacle_df
        
        #print("AFTER filtering (%s):" % obstacle_data_file)
        #display(obstacle_df)
    else:
        print("Can't access: %s. Skipping" % obstacle_data_file)

all_obstacle_inputs = pd.concat(obstacle_inputs.values())
display(all_obstacle_inputs)

In [None]:
# Save combined and filtered obstacles dataframe into a file
dest_file = "%s/all_obstacles.json" % (obstacle_data_dir)
all_obstacle_inputs.to_file(dest_file, driver="GeoJSON", index=False)

dest_file = "%s/all_obstacles_epsg3740.json" % (obstacle_data_dir)
all_obstacle_inputs.to_crs(3740).to_file(dest_file, driver="GeoJSON", index=False)

In [None]:
# Quick vis:
for tid, obstacle_df in obstacle_inputs.items():
    obstacle_df.plot()

### Calculate and Save Perera Features

In [None]:
transformer = Transformer.from_crs("epsg:4326", "epsg:3740")
buildings = fiona.open("%s/all_obstacles_epsg3740.json" % (obstacle_data_dir))

features = {}
for tid in tqdm(selected):
    row = index[index["APRS ID"] == tid].iloc[0]
    lat = row["Latitude"]
    lon = row["Longitude"]
    lat,lon = transformer.transform(lat,lon)
    point = Point(lat,lon)
    features[tid] = perera.calculate_perera_features(point,buildings)
    
pickle.dump( features, open( "%s/perera_features.p" % (obstacle_data_dir,), "wb" ) )

### Run Perera Model

In [None]:
# This flag allows overwriting previously saved files with results if they are found in the specified directory dest_dir 
overwrite = False

# Will be used in the filenames
site_type = "bergey"

# Will be used in the filenames
model_type = "perera"

dest_dir = "03 Model Outputs"
if not os.path.exists(dest_dir):
    os.makedirs(dest_dir)    

In [None]:
len(atmospheric_inputs["wtk"]["t024"])

In [None]:
features = pickle.load(open("%s/perera_features.p" % (obstacle_data_dir,),"rb"))

for tid in tqdm(selected):
    
    for wind_source in wind_sources:
        
        # skip those combinations that don't have data
        if len(atmospheric_inputs[wind_source][tid]) == 0:
            continue
        
        dest_filename = "%s/%s_%s_%s_%s.csv.bz2" % (dest_dir, site_type, model_type, tid, wind_source)
        if (not overwrite) and (os.path.exists(dest_filename)):
            print("Found previously saved %s); overwrite flag is off. Skipping to next config." % (dest_filename))
            continue
           
        atmospheric_inputs[wind_source][tid]["sector"] = common.sectorize(atmospheric_inputs[wind_source][tid]["ws"])
        
        atmospheric_inputs[wind_source][tid]["ws-adjusted"] = \
            atmospheric_inputs[wind_source][tid][["tid","sector","ws"]].\
            apply(perera.perera,axis=1,args=(features,))
        atmospheric_inputs[wind_source][tid]["ws-adjusted-2"] = \
            atmospheric_inputs[wind_source][tid][["tid","sector","ws"]].\
            apply(perera.perera2,axis=1,args=(features,))
        atmospheric_inputs[wind_source][tid]["ws-adjusted-3"] = \
            atmospheric_inputs[wind_source][tid][["tid","sector","ws"]].\
            apply(perera.perera3,axis=1,args=(features,))
        
        atmospheric_inputs[wind_source][tid].to_csv(dest_filename, index=False)

In [None]:
# Quick vis of data in produced files

for f in glob.iglob("%s/*perera*" % dest_dir):
    df = pd.read_csv(f)
    fig = plt.gcf()
    fig.set_size_inches(2.5,2.5)
    sns.scatterplot(x=df["ws"], \
                    y=df["ws-adjusted"], alpha=0.2).set(title=os.path.basename(f));
    #plt.show()