# Predict the power production using sunshine duration

## Init, Load

In [None]:
import os
import pandas as pd

from src.config import DATA_RAW_DIR, POWER_OPENMETEO_WEATHER_FILENAME
from src.transformation import prepare_aggregate_openmeteo_data

In [None]:
df_raw = pd.read_csv(
    os.path.join(DATA_RAW_DIR, POWER_OPENMETEO_WEATHER_FILENAME),
    sep=";",
    usecols=[
        "installation",
        "timestamp",
        "sol_prod",
        "cloud_cover",
        "snow_depth",
        "sunshine_duration",
        "is_day",
        "direct_radiation",
        "weather_description",
    ],
)

In [None]:
df_doy = prepare_aggregate_openmeteo_data(df_raw)

## Split

In [None]:
y = df_doy["sol_prod"]
X = df_doy.reset_index().drop(["sol_prod"], axis=1)

# last year as holdout
split_point = len(X) - 365
X_train, X_test = X.loc[:split_point], X.loc[split_point + 1 :]
y_train, y_test = y.loc[:split_point], y.loc[split_point + 1 :]

## Training

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from src.transformers import DayOfYearTransformer

### Pipeline

In [None]:
# ohe.columns can be interpreted as numeric, because they represent the share of a certain weather type on the whole day
num_cols = [c for c in sum_cols + mean_cols + list(ohe.columns) if c != "sol_prod"]

num_pipe = Pipeline(
    steps=[
        ("scale", MinMaxScaler()),
    ]
)

doy_sin_pipe = Pipeline(
    steps=[
        ("trig", DayOfYearTransformer("sin")),
    ]
)
doy_cos_pipe = Pipeline(
    steps=[
        ("trig", DayOfYearTransformer("cos")),
    ]
)

preprocessor = ColumnTransformer(
    transformers=[
        ("num", num_pipe, num_cols),
        ("sin", doy_sin_pipe, ["date"]),
        ("cos", doy_cos_pipe, ["date"]),
    ],
    remainder="drop",
)
preprocessor.set_output(transform="pandas")

### Fit

In [None]:
reg = Ridge(alpha=1.0)
model = Pipeline(steps=[("prep", preprocessor), ("reg", reg)])
model.fit(X_train, y_train)

## Predict

In [None]:
y_pred = model.predict(X_test)

## Evaluation

In [None]:
from src.model_evaluation.regressor_evaluation import evaluate_regressor
from datetime import datetime

results = evaluate_regressor(
    regressor=reg,
    y_true=y_test,
    y_pred=y_pred,
    timestamp=datetime.now(),
    model_purpose="predict",
    special_features="mult-weather,feateng-doytrig",
)

print("Evaluation Results:")
for key in [
    k
    for k in ["MAE", "MSE", "RMSE", "MAPE", "MedAE", "R2", "ExplainedVar"]
    if k in results
]:
    print(f"  {key}: {results.get(key):.4f}")

## Save Model And Results

In [None]:
import pickle
import json
import os

from src.config import MODELS_DIR

model_name = results["model_name"]

folder = os.path.join(MODELS_DIR, model_name)
filename = os.path.join(folder, model_name)
os.makedirs(folder, exist_ok=True)

# because of issues with pickling custom transformers,
# save only the config and results

with open(f"{filename}.model.pkl", "wb") as f:
    pickle.dump(reg, f)

with open(f"{filename}.pipeline.pkl", "wb") as f:
    pickle.dump(preprocessor, f)

with open(f"{filename}.model.txt", "w") as file:
    file.write(str(reg))

with open(f"{filename}.model_params.json", "w") as f:
    json.dump(reg.get_params(), f, indent=2)

with open(f"{filename}.pipeline_params.txt", "w") as f:
    f.write(preprocessor.get_params().__str__())

with open(f"{filename}.results.json", "w") as f:
    json.dump(results, f, indent=2)
