In [None]:
import pandas as pd
import os
from pathlib import Path
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import json
import predict_energy_behavior.data.read
import predict_energy_behavior.data.constants
import predict_energy_behavior.features
from predict_energy_behavior.models.production.solar_output_regression import SolarOutputRegression, Parameter
from predict_energy_behavior.visualize.inspect_solar_output import inspect_solar_output_predictions
from predict_energy_behavior.data.constants import product_type_to_name
import numpy as np
import polars as pl
from scipy.optimize import curve_fit
from sklearn.metrics import mean_absolute_error

%load_ext autoreload
%autoreload 2

In [None]:
PATH_REPO_ROOT = Path().resolve().parent
PATH_DATA = PATH_REPO_ROOT / "data"
PATH_DATA_RAW = PATH_DATA / "raw"
PATH_DATA_PROCESSED = PATH_DATA/"processed"
PATH_DATA_RAW

In [None]:
df_features = pd.read_parquet(PATH_DATA_PROCESSED / "train" / "make_features" / "df_features.parquet").dropna()
df_features = df_features.loc[df_features["is_consumption"]==0]
with open(PATH_DATA_RAW / "county_id_to_name_map.json", "r") as f:
    county_id_to_name = json.load(f)
    county_id_to_name = {int(k): v.lower().capitalize() for k,v in county_id_to_name.items()}

df_features["product_name"] = df_features["product_type"].map(product_type_to_name)

df_features["county_name"] = df_features["county"].map(county_id_to_name)
df_features[["county_name", "product_name"]] 
list(df_features.columns)

### Model target(I)

In [None]:
df_features["shortwave_radiation"].describe()

In [None]:
df_features.loc[df_features["shortwave_radiation"] == 0.0]["target"].describe()

In [None]:
def get_shortwave_df(df_features: pd.DataFrame, ws: int = 50):
    df = df_features.set_index("shortwave_radiation").sort_index()
    df = df.loc[(df["rain_historical"]==0) & (df["snowfall_historical"]==0) & (df.index > 0.0)]
    df["target"] = df["target"] / df["installed_capacity"]
    df = df["target"].rolling(ws).mean().dropna()
    df.index = df.index/1000
    return df

def explore_target_from_shortwave_rad(df_features: pd.DataFrame, ws: int = 50):
    df = get_shortwave_df(df_features, ws)
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=df.index,
        y=df
    ))
    return fig, df

fig, df = explore_target_from_shortwave_rad(df_features, 10)
fig

In [None]:
f = lambda I, k_1, thr_1, k_2, thr_2, k_3: np.where(
    I<=thr_1,
    I*k_1,
    np.where(
        I >= thr_2,
        (I-thr_2)*k_3 + thr_2*k_2 + thr_1*k_1,
        (I-thr_1)*k_2 + thr_1*k_1, 
    )
)
popt, pcov = curve_fit(f, xdata=df.index, ydata=df, p0=(0.3, 0.1, 0.75, 0.6, 0.4), bounds=[(0., 0., 0., 0.0, 0.0), (1, 1, 1, 1, 1)])
print(popt)
fig = go.Figure() 
fig.add_trace(go.Scatter(
    x=df.index,
    y=df
))
x = np.linspace(0.0, 0.8, 100000)
fig.add_trace(
    go.Scatter(
        x=x,
        y=f(x, *popt)
    )
)

### Model 1 order

In [None]:
estimator = SolarOutputRegression(
    regressors={
        "temperature": "temperature_historical",
        "dewpoint": "dewpoint_historical",
        "shortwave_radiation": "shortwave_radiation",
        "snowfall": "snowfall_historical",
        "rain": "rain_historical",
        "windspeed_10m": "windspeed_10m_historical",
        "installed_capacity": "installed_capacity",
    }
)

estimator.fit(df_features, df_features["target"])

print(f"Score: {estimator.loss.__name__}={estimator.optim_result.fun}")
print(estimator.weights)

In [None]:
hist_regressors = {
    "temperature": "temperature_historical",
    "dewpoint": "dewpoint_historical",
    "shortwave_radiation": "shortwave_radiation",
    "snowfall": "snowfall_historical",
    "rain": "rain_historical",
    "windspeed_10m": "windspeed_10m_historical",
    "installed_capacity": "installed_capacity",
}

fut_regressors = {
    "temperature": "temperature_forecast",
    "dewpoint": "dewpoint_forecast",
    "shortwave_radiation": "surface_solar_radiation_downwards_forecast",
    "snowfall": "snowfall_forecast",
    "rain": "rain_forecast",
    "windspeed_10m": "windspeed_10m_forecast",
    "installed_capacity": "installed_capacity_forecast",
}

_parameters = [
	Parameter(name="C_area", value=4.0, bounds=(0.0, 10.0)),
	Parameter(name="C_fog", value=0.14, bounds=(0.0, 1.0)),
	Parameter(name="C_dew", value=0.65, bounds=(0.0, 1.0)),
    Parameter(name="C_rain", value=0.1, bounds=(0.1, 1.0)),
    Parameter(name="C_rad_tot_k1", value=0.3, bounds=(0.0, 1.0)),
    Parameter(name="C_rad_tot_k2", value=0.74, bounds=(0.0, 1.0)),
    Parameter(name="C_rad_tot_k3", value=0.4, bounds=(0.0, 1.0)),
    Parameter(name="C_rad_tot_b1", value=0.097, bounds=(0.0, 0.2)),
    Parameter(name="C_rad_tot_b2", value=0.6, bounds=(0.21, 0.7)),
    Parameter(name=f"C_snow_thr",value=0.3, bounds=(0.2, 0.5)),
    Parameter(name=f"C_snow_const",value=0.0014, bounds=(0.0, 1.0)),
]

estimator = SolarOutputRegression.from_params(
    params=_parameters,
    regressors={
        "temperature": "temperature_historical",
        "dewpoint": "dewpoint_historical",
        "shortwave_radiation": "shortwave_radiation",
        "snowfall": "snowfall_historical",
        "rain": "rain_historical",
        "windspeed_10m": "windspeed_10m_historical",
        "installed_capacity": "installed_capacity",
    }
)

estimator.fit(df_features, df_features["target"])
print(f"Score (hist): {estimator.loss.__name__}={estimator.optim_result.fun}")
print(estimator.weights)

estimator.set_regressors(fut_regressors)
preds_f = estimator.predict(df_features)
print(f"Score (fut): {estimator.loss.__name__}={mean_absolute_error(preds_f, df_features['target'])}")

In [None]:
_parameters = [
	Parameter(name="C_area", value=4.0, bounds=(0.0, 10.0)),
	Parameter(name="C_fog", value=0.14, bounds=(0.0, 1.0)),
	Parameter(name="C_dew", value=0.65, bounds=(0.0, 1.0)),
    Parameter(name="C_rain", value=0.1, bounds=(0.1, 1.0)),
    Parameter(name="C_rad_tot_k1", value=0.3, bounds=(0.0, 1.0)),
    Parameter(name="C_rad_tot_k2", value=0.74, bounds=(0.0, 1.0)),
    Parameter(name="C_rad_tot_k3", value=0.4, bounds=(0.0, 1.0)),
    Parameter(name="C_rad_tot_b1", value=0.097, bounds=(0.0, 0.2)),
    Parameter(name="C_rad_tot_b2", value=0.6, bounds=(0.21, 0.7)),
    Parameter(name=f"C_snow_thr",value=0.3, bounds=(0.2, 0.5)),
    Parameter(name=f"C_snow_const",value=0.0014, bounds=(0.0, 1.0)),
]

estimator = SolarOutputRegression.from_params(
    params=_parameters,
    regressors={
        "temperature": "temperature_forecast",
        "dewpoint": "dewpoint_forecast",
        "shortwave_radiation": "surface_solar_radiation_downwards_forecast",
        "snowfall": "snowfall_forecast",
        "rain": "rain_forecast",
        "windspeed_10m": "windspeed_10m_forecast",
        "installed_capacity": "installed_capacity",
    }
)

estimator.fit(df_features, df_features["target"])
print(f"Score (fut): {estimator.loss.__name__}={estimator.optim_result.fun}")
print(estimator.weights)