In [1]:
from pathlib import Path
from pprint import pprint

import numpy as np
import pandas as pd

from openoa.types.plant_v2 import PlantDataV3, PlantMetaData

In [2]:
fpath = Path("/Users/rhammond/Documents/GitHub/OpenOA/examples/data/la_haute_borne")
fn_scada = "la-haute-borne-data-2014-2015.csv"
fn_meter = "plant_data.csv"
fn_curtail = "plant_data.csv"
fn_reanalysis = "merra2_la_haute_borne.csv"
fn_asset = "la-haute-borne_asset_table.csv"

scada = pd.read_csv(fpath / fn_scada)
meter = pd.read_csv(fpath / fn_meter)
curtail = pd.read_csv(fpath / fn_curtail)
reanalysis = pd.read_csv(fpath / fn_reanalysis)
asset = pd.read_csv(fpath / fn_asset)

In [3]:
column_mapping = dict(
    scada=dict(
        time="Date_time",
        id="Wind_turbine_name",
        power="P_avg",
        windspeed="Ws_avg",
        wind_direction="Wa_avg",
#         status="?",
        pitch="Ba_avg",
        temperature="Ot_avg",
        frequency="10T",
    ),
    meter=dict(
        time="time_utc",
#         power="?",
        energy="net_energy_kwh",
    ),
    curtail=dict(
        time="time_utc",
        curtailment="curtailment_kwh",
        availability="availability_kwh",
        net_energy="net_energy_kwh",
        frequency="10T",
    ),
    reanalysis=dict(
        time="datetime",
        windspeed="windspeed_ms",
        windspeed_u="u_ms",
        windspeed_v="v_ms",
        temperature="temperature_K",
        rho="rho_kgm",
        frequency="10T",
#         product="merra2",
    ),
    asset=dict(
        id="id",
        latitude="latitude",
        longitude="longitude",
        rated_power="rated_power_kw",
#         type="?",
    ),
)

In [4]:
plant_meta = PlantMetaData.from_dict(column_mapping)

In [5]:
plant = PlantDataV3(
    metadata=column_mapping,
    scada=scada,
    meter=meter,
    curtail=curtail,
    reanalysis=reanalysis,
    asset=asset,
    analysis_type="MonteCarloAEP"
)

{'dtype': {'asset': ['id', 'latitude', 'longitude', 'rated_power_kw', 'type'],
           'curtail': [],
           'meter': ['power'],
           'scada': ['status'],
           'status': ['time', 'id', 'status_id', 'status_code', 'status_text'],
           'tower': ['time', 'id']},
 'missing': {'asset': ['id', 'type', 'rated_power_kw', 'latitude', 'longitude'],
             'curtail': [],
             'meter': ['power'],
             'scada': ['status'],
             'status': ['time',
                        'id',
                        'status_id',
                        'status_code',
                        'status_text'],
             'tower': ['time', 'id']}}
{'dtype': {'asset': ['id', 'latitude', 'longitude', 'rated_power_kw', 'type'],
           'curtail': [],
           'meter': ['power'],
           'scada': ['status'],
           'status': ['time', 'id', 'status_id', 'status_code', 'status_text'],
           'tower': ['time', 'id']},
 'missing': {'asset': set(),
        

ValueError: `scada` data columns were of the wrong type: ['status']
`meter` data columns were of the wrong type: ['power']
`tower` data columns were of the wrong type: ['time', 'id']
`status` data columns were of the wrong type: ['time', 'id', 'status_id', 'status_code', 'status_text']
`asset` data columns were of the wrong type: ['id', 'latitude', 'longitude', 'rated_power_kw', 'type']

In [None]:
ANALYSIS_REQUIREMENTS = {
    "MonteCarloAEP": {
        "meter": {
            "columns": ["energy"],
            "freq": ("MS", "D", "H", "T"),
        },
        "curtail": {
            "columns": ["availability", "curtailment"],
            "freq": ("MS", "D", "H", "T"),
        },
        "reanalysis": {
            "columns": ["windspeed", "rho"],
            "conditional_columns": {
                "reg_temperature": ["temperature"],
                "reg_winddirection": ["windspeed_u", "windspeed_v"],
            },
        },
    },
    "TurbineLongTermGrossEnergy": {
        "scada": {
            "columns": ["id", "windspeed", "power"],  # TODO: wtur_W_avg vs energy_kwh ?
            "freq": ("D", "H", "T"),
        },
        "reanalysis": {
            "columns": ["windspeed", "wind_direction", "rho"],
        },
    },
    "ElectricalLosses": {
        "scada": {
            "columns": ["energy"],
            "freq": ("D", "H", "T"),
        },
        "meter": {
            "columns": ["energy"],
            "freq": ("MS", "D", "H", "T"),
        },
    },
}

In [None]:
categories = ("scada", "meter", "tower", "curtail", "reanalysis", "asset")
analysis_types = [*ANALYSIS_REQUIREMENTS]
requirements = {key: ANALYSIS_REQUIREMENTS[key] for key in analysis_types}
column_requirements = {cat: 
    list(itertools.chain(*[r.get(cat, {}).get("columns", []) for r in requirements.values()]))
    for cat in categories
}
column_requirements

In [None]:
requirements.get("scada", {})

In [None]:
error_dict = {
    'dtype': {
        'asset': ['id', 'latitude', 'longitude', 'rated_power_kw', 'type'],
        'curtail': [],
        'meter': ['power'],
        'scada': ['status'],
    },
    'missing': {
        'asset': ['longitude', 'type', 'latitude', 'id', 'rated_power_kw'],
        'curtail': [],
        'meter': ['power'],
        'scada': ['status'],
    },
}

categories = ("scada", "meter", "tower", "curtail", "reanalysis", "asset")
analysis_types = [*ANALYSIS_REQUIREMENTS]
requirements = {key: ANALYSIS_REQUIREMENTS[key] for key in analysis_types}
column_requirements = {cat: 
    set(itertools.chain(*[r.get(cat, {}).get("columns", []) for r in requirements.values()]))
    for cat in categories
}
error_dict["missing"] = {
    key: values.intersection(error_dict["missing"][key])
    for key, values in column_requirements.items()
}
error_dict["missing"]