## Step 1: Be able to read in the temperature, pressure, wind speed, wind direction, and time into a pandas dataframe. 


In [2]:
import numpy as np
import pandas as pd
import time
import h5pyd
import geopandas as gpd
#from dw_tap.power_output import estimate_power_output
from dw_tap.lom import run_lom

In [19]:
def estimate_power_output(df, temp, pres, ws_column="ws-adjusted"): 
    """
    Inputs: dataframe, temperature series and pressure series
    Outputs: total kw predicted over time period, instances with wind speed above possible generation, instances with wind speed below possible generation, lists of wind speeds above and below those marks
    """
    df_copy = df.copy()
    
    air_density = (pres) / (287.05 * temp)
    df_copy[ws_column] = (df_copy[ws_column] * ((air_density/1.225)**(1/3)))
    kw = Goldwind871500.windspeed_to_kw(df_copy, ws_column)
    above_curve_counter = Goldwind871500.above_curve_counter
    below_curve_counter = Goldwind871500.below_curve_counter
    above_curve_list = Goldwind871500.above_curve_list
    below_curve_list = Goldwind871500.below_curve_list
    return kw, above_curve_counter, below_curve_counter, above_curve_list, below_curve_list

class Goldwind871500(object):
    
    # Load data and minimal preprocessing
    raw_data = pd.read_excel("../powercurves/Goldwind871500.xlsx")
    raw_data.rename(columns={"Wind Speed (m/s)": "ws", "Turbine Output": "kw"}, inplace=True)
    
    # Create vectors for interpolation
    interp_x = raw_data.ws
    interp_y = raw_data.kw
    
    # Counters for cases outside of the real curve
    below_curve_counter = 0
    above_curve_counter = 0
    # Keeping windspeeds that are higher than what is in the curve
    above_curve_list = []
    below_curve_list = []
    
    max_ws = max(raw_data.ws)
    
    @classmethod
    def windspeed_to_kw(cls, df, ws_column="ws-adjusted"):
        """ Converts wind speed to kw """
        kw = pd.Series(np.interp(df[ws_column], cls.interp_x, cls.interp_y))
        ws = df[ws_column]
        for i in range(len(kw)):
            if kw.loc[i] <= 0: 
                cls.below_curve_counter += 1
                cls.below_curve_list.append(tuple((df["timestamp"][i], kw[i])))
            if ws.loc[i] > cls.max_ws:
                cls.above_curve_counter += 1
                cls.above_curve_list.append(tuple((df["timestamp"][i], ws[i])))
                kw.loc[i] = 0
        
        return kw
    
    @classmethod
    def reset_counters(cls):
        """ Sets counters and lists back to 0 and empty """
        cls.below_curve_counter = 0
        cls.above_curve_counter = 0
        cls.above_curve_list = []
        cls.below_curve_list = []

In [4]:
#Using 79m wind speed and direction (hub height is 80m for turbines in this area)
atmospheric_df = pd.read_excel("../data/lidar_marion_OH.xlsx", header=3, usecols="A,L,M,AY,AZ")
atmospheric_df.rename(columns={'Date (UTC)':'datetime', 'Air Temp. Cel.':'temp', 'Pressure (mbar)':'pres', 'Wind Direction (deg) at 79m':'wd', 'Horizontal Wind Speed (m/s) at 79m':'ws'}, inplace=True)
atmospheric_df['temp'] = atmospheric_df['temp'].apply(lambda x : x + 273.15) #convert to K from C
atmospheric_df['pres'] = atmospheric_df['pres'].apply(lambda x : x * 100) #convert to Pascals from mbar
print(atmospheric_df)

                 datetime    temp     pres       wd     ws
0     2017-08-15 14:50:00  304.36  97990.0  272.334  2.110
1     2017-08-15 15:00:00  304.84  97990.0  289.350  2.485
2     2017-08-15 15:10:00  305.71  98000.0  291.621  3.112
3     2017-08-15 15:20:00  306.42  98000.0  298.223  3.143
4     2017-08-15 15:30:00  307.01  98010.0  278.997  4.026
...                   ...     ...      ...      ...    ...
52647 2018-08-16 14:40:00  295.08  98420.0  178.480  8.749
52648 2018-08-16 14:50:00  295.03  98430.0  179.000  8.429
52649 2018-08-16 15:00:00  295.17  98430.0  178.750  8.544
52650 2018-08-16 15:10:00  295.24  98440.0  178.223  9.819
52651 2018-08-16 15:20:00  295.25  98440.0  178.199  9.053

[52652 rows x 5 columns]


In [5]:
#W1 coordinates
z_turbine = 80
lat, lon = 40.591555, -83.182092
obstacle_file = "../sites/simple_marion_obstacles.geojson"
obstacles_df = gpd.read_file(obstacle_file)
obstacles_df = obstacles_df[["height", "geometry"]]
x1_turbine, y1_turbine = lat, lon
xy_turbine = [np.array([x1_turbine, y1_turbine])]

In [6]:
predictions_df = \
    run_lom(atmospheric_df, obstacles_df, xy_turbine, z_turbine).join(atmospheric_df["wd"])

2023-01-28 09:00:52.900865: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


LOM time : 3.24  min


In [7]:
temp = atmospheric_df['temp']
predictions_df = predictions_df.join(temp)

In [8]:
pres = atmospheric_df['pres']
predictions_df = predictions_df.join(pres)

## Step 2: Read in the actual generated kw production from the same time period.

One issue is that the lidar data spans from mid-august of 2017 to mid-august of 2018. It looks like we only have wind production data from 2018 onwards. 

In [9]:
power_output_df = pd.read_excel("../data/marion/turbine.oneenergy.00.20180131.000000.marion.w1.xlsx", header=1, usecols="B, C, M")
power_output_df = power_output_df.append(pd.read_excel("../data/marion/turbine.oneenergy.00.20180228.000000.marion.w1.xlsx", header=1, usecols="B, C, M"))
power_output_df = power_output_df.append(pd.read_excel("../data/marion/turbine.oneenergy.00.20180331.000000.marion.w1.xlsx", header=1, usecols="B, C, M"))
power_output_df = power_output_df.append(pd.read_excel("../data/marion/turbine.oneenergy.00.20180430.000000.marion.w1.xlsx", header=1, usecols="B, C, M"))
power_output_df = power_output_df.append(pd.read_excel("../data/marion/turbine.oneenergy.00.20180531.000000.marion.w1.xlsx", header=1, usecols="B, C, M"))
power_output_df = power_output_df.append(pd.read_excel("../data/marion/turbine.oneenergy.00.20180630.000000.marion.w1.xlsx", header=1, usecols="B, C, M"))
power_output_df = power_output_df.append(pd.read_excel("../data/marion/turbine.oneenergy.00.20180731.000000.marion.w1.xlsx", header=1, usecols="B, C, M"))
power_output_df = power_output_df.append(pd.read_excel("../data/marion/turbine.oneenergy.00.20180831.000000.marion.w1.xlsx", header=1, usecols="B, C, M"))
power_output_df.rename(columns={'Time':'timestamp', 'Wind Turbine Energy yield(kWh)':'measured_production', 'Avg Wind Speed(m/s)':'measured_ws'}, inplace=True)

  power_output_df = power_output_df.append(pd.read_excel("../data/marion/turbine.oneenergy.00.20180228.000000.marion.w1.xlsx", header=1, usecols="B, C, M"))
  power_output_df = power_output_df.append(pd.read_excel("../data/marion/turbine.oneenergy.00.20180331.000000.marion.w1.xlsx", header=1, usecols="B, C, M"))
  power_output_df = power_output_df.append(pd.read_excel("../data/marion/turbine.oneenergy.00.20180430.000000.marion.w1.xlsx", header=1, usecols="B, C, M"))
  power_output_df = power_output_df.append(pd.read_excel("../data/marion/turbine.oneenergy.00.20180531.000000.marion.w1.xlsx", header=1, usecols="B, C, M"))
  power_output_df = power_output_df.append(pd.read_excel("../data/marion/turbine.oneenergy.00.20180630.000000.marion.w1.xlsx", header=1, usecols="B, C, M"))
  power_output_df = power_output_df.append(pd.read_excel("../data/marion/turbine.oneenergy.00.20180731.000000.marion.w1.xlsx", header=1, usecols="B, C, M"))
  power_output_df = power_output_df.append(pd.read_excel("

In [10]:
print(power_output_df)

               timestamp  measured_ws  measured_production
0    2018-01-01 00:00:00         5.92                   52
1    2018-01-01 00:10:00         6.01                   58
2    2018-01-01 00:20:00         5.96                   56
3    2018-01-01 00:30:00         6.01                   58
4    2018-01-01 00:40:00         5.82                   52
...                  ...          ...                  ...
4153 2018-08-31 23:10:00         6.24                   56
4154 2018-08-31 23:20:00         5.99                   49
4155 2018-08-31 23:30:00         6.40                   63
4156 2018-08-31 23:40:00         6.26                   58
4157 2018-08-31 23:50:00         5.53                   33

[34080 rows x 3 columns]


In [11]:
pre_analysis = predictions_df.merge(power_output_df[['timestamp', 'measured_production', 'measured_ws']], on='timestamp', how='left')
print(pre_analysis)

                timestamp  ws-adjusted     ws       wd    temp     pres  \
0     2017-08-15 14:50:00        2.110  2.110  272.334  304.36  97990.0   
1     2017-08-15 15:00:00        2.485  2.485  289.350  304.84  97990.0   
2     2017-08-15 15:10:00        3.112  3.112  291.621  305.71  98000.0   
3     2017-08-15 15:20:00        3.143  3.143  298.223  306.42  98000.0   
4     2017-08-15 15:30:00        4.026  4.026  278.997  307.01  98010.0   
...                   ...          ...    ...      ...     ...      ...   
52647 2018-08-16 14:40:00        8.749  8.749  178.480  295.08  98420.0   
52648 2018-08-16 14:50:00        8.429  8.429  179.000  295.03  98430.0   
52649 2018-08-16 15:00:00        8.544  8.544  178.750  295.17  98430.0   
52650 2018-08-16 15:10:00        9.819  9.819  178.223  295.24  98440.0   
52651 2018-08-16 15:20:00        9.053  9.053  178.199  295.25  98440.0   

       measured_production  measured_ws  
0                      NaN          NaN  
1              

In [12]:
pre_analysis = pre_analysis.dropna()
print(pre_analysis)

                timestamp  ws-adjusted     ws       wd    temp     pres  \
19885 2018-01-01 00:00:00        2.852  2.852  302.343  259.85  99780.0   
19886 2018-01-01 00:10:00        2.566  2.566  305.654  259.79  99790.0   
19887 2018-01-01 00:20:00        1.967  1.967  303.185  259.73  99790.0   
19888 2018-01-01 00:30:00        1.774  1.774  301.331  259.44  99800.0   
19889 2018-01-01 00:40:00        2.039  2.039  296.283  259.05  99820.0   
...                   ...          ...    ...      ...     ...      ...   
52616 2018-08-16 09:30:00        6.802  6.802  182.113  293.15  98320.0   
52617 2018-08-16 09:40:00        5.719  5.719  185.084  293.22  98330.0   
52618 2018-08-16 09:50:00        6.585  6.585  170.531  293.35  98340.0   
52619 2018-08-16 10:00:00        6.719  6.719  168.171  293.36  98340.0   
52620 2018-08-16 10:10:00        7.121  7.121  163.370  293.36  98350.0   

       measured_production  measured_ws  
19885                 52.0         5.92  
19886          

In [22]:
#pre_analysis = pre_analysis.reset_index()
kw, above_curve, below_curve, above_curve_list, below_curve_list = \
    estimate_power_output(pre_analysis, pre_analysis["temp"], pre_analysis["pres"])
print(pre_analysis)

       level_0  index           timestamp  ws-adjusted     ws       wd  \
0            0  19885 2018-01-01 00:00:00        2.852  2.852  302.343   
1            1  19886 2018-01-01 00:10:00        2.566  2.566  305.654   
2            2  19887 2018-01-01 00:20:00        1.967  1.967  303.185   
3            3  19888 2018-01-01 00:30:00        1.774  1.774  301.331   
4            4  19889 2018-01-01 00:40:00        2.039  2.039  296.283   
...        ...    ...                 ...          ...    ...      ...   
29375    29375  52616 2018-08-16 09:30:00        6.802  6.802  182.113   
29376    29376  52617 2018-08-16 09:40:00        5.719  5.719  185.084   
29377    29377  52618 2018-08-16 09:50:00        6.585  6.585  170.531   
29378    29378  52619 2018-08-16 10:00:00        6.719  6.719  168.171   
29379    29379  52620 2018-08-16 10:10:00        7.121  7.121  163.370   

         temp     pres  measured_production  measured_ws  
0      259.85  99780.0                 52.0         

In [23]:
print(kw)

0         23.593601
1          7.705506
2          0.000000
3          0.000000
4          0.000000
            ...    
29375    493.596463
29376    290.202307
29377    445.267952
29378    474.710793
29379    565.265831
Length: 29380, dtype: float64
