# Create different baseline models for total solar production

## Init, Load

In [None]:
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

from src.config import DATA_RAW_DIR, DATA_RAW_FILENAME

In [None]:
df_raw = pd.read_csv(
    os.path.join(DATA_RAW_DIR, DATA_RAW_FILENAME),
    sep=";",
    index_col=["timestamp"],
    date_format="%Y-%m-%d %H:%M",
)
# Recover data in relevant columns
df_raw.loc[df_raw["sol_prod"].isna(), "sol_prod"] = (
    df_raw[df_raw["sol_prod"].isna()]["sol_prod_1"]
    + df_raw[df_raw["sol_prod"].isna()]["sol_prod_2"]
)


display(df_raw.head(5))

## Date Feature Regressor

In [None]:
from sklearn.base import BaseEstimator, RegressorMixin


class DateFeatureRegressor(BaseEstimator, RegressorMixin):
    """A simple regressor that predicts solar production based on the mean production oneach day of the year."""

    def _make_date_features(self, X):
        ts = pd.to_datetime(X.iloc[:, 0])

        df_feat = pd.DataFrame(
            {
                "year": ts.dt.year,
                "day_of_year": ts.dt.dayofyear,
            }
        )

        return df_feat

    def fit(self, X, y):
        X = pd.DataFrame(X) if not isinstance(X, pd.DataFrame) else X.copy()
        y = pd.Series(y) if not isinstance(y, pd.Series) else y.copy()
        y.name = "target"

        X_feat = self._make_date_features(X)
        X = pd.concat([X_feat.reset_index(drop=True), y.reset_index(drop=True)], axis=1)

        # sum up each days production
        X_sum = X.groupby(["day_of_year", "year"]).sum().reset_index()

        # calculate mean production for each day of the year
        self.means_ = (
            X_sum.sort_values(by=["day_of_year", "year"])
            .drop("year", axis=1)
            .groupby("day_of_year")
            .mean()
        )

        return self

    def predict(self, X):
        X = pd.DataFrame(X) if not isinstance(X, pd.DataFrame) else X.copy()
        X_feat = self._make_date_features(X)

        X = pd.merge(X_feat, self.means_, on="day_of_year", how="left")
        return X["target"].values



## Predict

In [None]:
reg = DateFeatureRegressor()

reg.fit(df_raw.index, df_raw["sol_prod"])

In [None]:
df_sum = df_raw.copy().reset_index()
df_sum["date"] = df_sum["timestamp"].dt.date

df_sum = df_sum[["date", "sol_prod"]].groupby(["date"]).sum()
df_sum["pred"] = reg.predict(df_sum.index)
display(df_sum)

## Plot

In [None]:
fig, axes = plt.subplots(figsize=(12, 6), nrows=3, sharex=True)
axes[0].set_title("Actual Solar Production vs Predicted")
sns.lineplot(
    data=df_sum,
    x="date",
    y="sol_prod",
    label="Actual",
    linewidth=0.5,
    alpha=0.8,
    ax=axes[0],
)
sns.lineplot(
    data=df_sum,
    x="date",
    y="pred",
    label="Predicted",
    linewidth=0.5,
    alpha=0.8,
    ax=axes[0],
)


axes[0].set_ylabel("Solar Production (Wh)")
df_sum["diff"] = df_sum["pred"] - df_sum["sol_prod"]

axes[1].set_title("Difference between Predicted and Actual")
sns.lineplot(
    data=df_sum,
    x="date",
    y="diff",
    label="Difference",
    linewidth=0.5,
    alpha=0.8,
    ax=axes[1],
)
axes[1].set_ylabel("Difference (Wh)")

df_sum["ratio"] = np.nan
df_sum.loc[df_sum["sol_prod"] > 0, "ratio"] = df_sum["pred"] / df_sum["sol_prod"]

axes[2].set_title("Ratio of Predicted to Actual")
sns.lineplot(
    data=df_sum,
    x="date",
    y="ratio",
    label="Ratio",
    linewidth=0.5,
    alpha=0.8,
    ax=axes[2],
)
axes[2].set_ylabel("Ratio")
axes[2].set_ylim(0.1, 100) 
axes[2].set_yscale("log")

plt.tight_layout()
plt.show()

## Evaluate

In [None]:
from src.model_evaluation.regressor_evaluation import evaluate_regressor
from datetime import datetime

results = evaluate_regressor(
    regressor=reg,
    y_true=df_sum["sol_prod"],
    y_pred=df_sum["pred"],
    timestamp=datetime.now(),
    model_purpose="baseline",
    special_features="reg-from-rollmean",
)

print("Evaluation Results:")
for key in [
    k
    for k in ["MAE", "MSE", "RMSE", "MAPE", "MedAE", "R2", "ExplainedVar"]
    if k in results
]:
    print(f"  {key}: {results.get(key):.4f}")