In [1]:
from copy import deepcopy
from pprint import pprint
from pathlib import Path

import numpy as np
import pandas as pd

from openoa.types.plant_v2 import PlantDataV3, PlantMetaData

In [2]:
fpath = Path("/Users/rhammond/Documents/GitHub/OpenOA/examples/data/la_haute_borne")
fn_scada = fpath / "la-haute-borne-data-2014-2015.csv"
fn_meter = fpath / "plant_data.csv"
fn_curtail = fpath / "plant_data.csv"
fn_reanalysis_merra2 = fpath / "merra2_la_haute_borne.csv"
fn_reanalysis_era5 = fpath / "era5_wind_la_haute_borne.csv"
fn_asset = fpath / "la-haute-borne_asset_table.csv"

scada = pd.read_csv(fn_scada)
meter = pd.read_csv(fn_meter)
curtail = pd.read_csv(fn_curtail)
reanalysis_era5 = pd.read_csv(fn_reanalysis_era5)
reanalysis_merra2 = pd.read_csv(fn_reanalysis_merra2)
asset = pd.read_csv(fn_asset)

## TODO
 - [x] read data from spark, csv, pandas
 - [x] read metadata from json, yaml, dict, and pre-loaded object
 - [ ] automatically calculate wind direction from u/v windspeed
 - [x] call planetos api if API key is provided
   - [ ] validate this works
 - [x] support flags for if csv/planetos/data object/etc
 - datetime column frequency checks
    - [ ] check against the provided metadata
    - [ ] validate against the analysis requirements
 - [x] expand metadata to contain plant-level identifiers (latitude, longitude)
 - check against the -25 namings and (likely) adopt that naming convention for the plant data
   - [ ] update internal column naming convention to the -25 schema (Eric/Lewis)
   - [ ] map the input column names, and provide a method to provide them back as the original inputs
 - [ ] get the 0 notebook working, or at least as a means to understand what will be required for refactoring

In [3]:
plant_meta = dict(
    scada=dict(
        time="Date_time",
        id="Wind_turbine_name",
        power="P_avg",
        windspeed="Ws_avg",
#         wtur_wspd="Ws_avg",  # TODO: adopt the -25 naming
        wind_direction="Wa_avg",
#         status="?",
        pitch="Ba_avg",
        temperature="Ot_avg",
        frequency="10T",
    ),
    meter=dict(
        time="time_utc",
        energy="net_energy_kwh",
    ),
    curtail=dict(
        time="time_utc",
        curtailment="curtailment_kwh",
        availability="availability_kwh",
        net_energy="net_energy_kwh",
        frequency="10T",
    ),
    reanalysis=dict(
        era5=dict(
            time="datetime",
            windspeed="ws_100m",
            windspeed_u="u_100",
            windspeed_v="v_100",
            frequency="H",
        ),
        merra2=dict(
            time="datetime",
            windspeed="windspeed_ms",
            windspeed_u="u_ms",
            windspeed_v="v_ms",
            temperature="temperature_K",
            rho="rho_kgm",
            frequency="H",
        )
    ),
    asset=dict(
        id="id",
        latitude="latitude",
        longitude="longitude",
        rated_power="rated_power_kw",
#         type="?",
    ),
)
# import yaml
# import json

# with open("plant_meta.yml", "w") as f:
#     yaml.safe_dump(plant_meta, f, default_flow_style=False)
    
# with open("plant_meta.json", "w") as f:
#     json.dump(plant_meta, f, indent=4)

In [4]:
meta_from_dict = PlantMetaData.from_dict(plant_meta)
meta_from_json = PlantMetaData.from_json("plant_meta.json")
meta_from_yaml = PlantMetaData.from_yaml("plant_meta.yml")
meta_from_dict == meta_from_json == meta_from_yaml, type(meta_from_dict)

(True, openoa.types.plant_v2.PlantMetaData)

In [5]:
plant_from_data = PlantDataV3(
    metadata=meta_from_dict,
    scada=scada,
    meter=meter,
    curtail=curtail,
    reanalysis={"merra2": reanalysis_merra2, "era5": reanalysis_era5},  # preferred, and enable API pulling
    asset=asset,
    analysis_type="MonteCarloAEP"
)

plant_from_file1 = PlantDataV3(
    metadata="plant_meta.yml",
    scada=fn_scada,
    meter=fn_meter,
    curtail=fn_curtail,
    reanalysis={"merra2": fn_reanalysis_merra2, "era5": fn_reanalysis_era5},  # preferred, and enable API pulling
    asset=fn_asset,
    analysis_type="MonteCarloAEP"
)

plant_from_file2 = PlantDataV3(
    metadata="plant_meta.json",
    scada=fn_scada,
    meter=fn_meter,
    curtail=fn_curtail,
    reanalysis={"merra2": fn_reanalysis_merra2, "era5": fn_reanalysis_era5},  # preferred, and enable API pulling
    asset=fn_asset,
    analysis_type="MonteCarloAEP"
)
type(plant_from_data)

openoa.types.plant_v2.PlantDataV3

In [6]:
plant_from_data = PlantDataV3(
    metadata=meta_from_dict,
    scada=scada,
    meter=meter,
    curtail=curtail,
    reanalysis={"merra2": reanalysis_merra2, "era5": reanalysis_era5},  # preferred, and enable API pulling
    asset=asset,
    analysis_type="all"
)

ValueError: `scada` data is missing the following columns: ['status']
`meter` data is missing the following columns: ['power']
`tower` data is missing the following columns: ['time', 'id']
`asset` data is missing the following columns: ['rated_power_kw', 'id', 'latitude', 'type', 'longitude']
`status` data is missing the following columns: ['time', 'id', 'status_id', 'status_code', 'status_text']
`reanalysis-merra2` data is missing the following columns: ['rho_kgm', 'wind_direction', 'windspeed_ms']
`reanalysis-era5` data is missing the following columns: ['rho', 'wind_direction']
`scada` data columns were of the wrong type: ['status']
`meter` data columns were of the wrong type: ['power']
`tower` data columns were of the wrong type: ['time', 'id']
`asset` data columns were of the wrong type: ['id', 'latitude', 'longitude', 'rated_power_kw', 'type']
`status` data columns were of the wrong type: ['time', 'id', 'status_id', 'status_code', 'status_text']
`reanalysis-merra2` data columns were of the wrong type: ['windspeed_ms', 'wind_direction', 'rho_kgm']
`reanalysis-era5` data columns were of the wrong type: ['wind_direction', 'rho']

In [7]:
plant = deepcopy(plant_from_data)

In [9]:
plant.analysis_type = "all"
plant.validate()

ValueError: `scada` data is missing the following columns: ['status']
`meter` data is missing the following columns: ['power']
`tower` data is missing the following columns: ['time', 'id']
`asset` data is missing the following columns: ['rated_power_kw', 'id', 'latitude', 'type', 'longitude']
`status` data is missing the following columns: ['time', 'id', 'status_id', 'status_code', 'status_text']
`reanalysis-merra2` data is missing the following columns: ['rho_kgm', 'wind_direction', 'windspeed_ms']
`reanalysis-era5` data is missing the following columns: ['rho', 'wind_direction']
`scada` data columns were of the wrong type: ['status']
`meter` data columns were of the wrong type: ['power']
`tower` data columns were of the wrong type: ['time', 'id']
`asset` data columns were of the wrong type: ['id', 'latitude', 'longitude', 'rated_power_kw', 'type']
`status` data columns were of the wrong type: ['time', 'id', 'status_id', 'status_code', 'status_text']
`reanalysis` data columns were of the wrong type: ['era5', 'merra2']
`reanalysis-merra2` data columns were of the wrong type: ['windspeed_ms', 'wind_direction', 'rho_kgm']
`reanalysis-era5` data columns were of the wrong type: ['wind_direction', 'rho']

In [10]:
print(plant.reanalysis["era5"].dtypes)
plant.reanalysis["era5"].head()

Unnamed: 0                  int64
datetime      datetime64[ns, UTC]
u_100                     float64
v_100                     float64
t_2m                      float64
surf_pres                 float64
ws_100m                   float64
dens_100m                 float64
dtype: object


Unnamed: 0.1,Unnamed: 0,datetime,u_100,v_100,t_2m,surf_pres,ws_100m,dens_100m
0,0,1999-01-01 00:00:00+00:00,-4.456234,4.999991,277.496492,97020.784132,6.697606,1.216004
1,1,1999-01-01 01:00:00+00:00,-5.006666,4.540059,277.330357,97002.61796,6.758612,1.216525
2,2,1999-01-01 02:00:00+00:00,-5.101353,4.032729,276.968939,97035.490081,6.502823,1.218571
3,3,1999-01-01 03:00:00+00:00,-5.787051,2.111372,276.721193,97021.649188,6.160183,1.219518
4,4,1999-01-01 04:00:00+00:00,-6.349969,1.693571,276.29128,97005.213128,6.571932,1.221259


In [11]:
plant.analysis_type = "TurbineLongTermGrossEnergy"
plant.validate()

In [12]:
plant.analysis_type = "ElectricalLosses"
plant.validate()

In [None]:
ANALYSIS_REQUIREMENTS = {
    "MonteCarloAEP": {
        "meter": {
            "columns": ["energy"],
            "freq": ("MS", "D", "H", "T"),
        },
        "curtail": {
            "columns": ["availability", "curtailment"],
            "freq": ("MS", "D", "H", "T"),
        },
        "reanalysis": {
            "columns": ["windspeed", "rho"],
            "conditional_columns": {
                "reg_temperature": ["temperature"],
                "reg_winddirection": ["windspeed_u", "windspeed_v"],
            },
        },
    },
    "TurbineLongTermGrossEnergy": {
        "scada": {
            "columns": ["id", "windspeed", "power"],  # TODO: wtur_W_avg vs energy_kwh ?
            "freq": ("D", "H", "T"),
        },
        "reanalysis": {
            "columns": ["windspeed", "wind_direction", "rho"],
        },
    },
    "ElectricalLosses": {
        "scada": {
            "columns": ["energy"],
            "freq": ("D", "H", "T"),
        },
        "meter": {
            "columns": ["energy"],
            "freq": ("MS", "D", "H", "T"),
        },
    },
}