# Forecasting WS (Wind Speed - Velocidade do Vento) - Diário
## Preparação dos Dados

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

from sklearn.linear_model import Ridge
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error
import io

def load_daily_csv(path_or_str: str) -> pd.Series:
    if isinstance(path_or_str, str) and "\n" in path_or_str:
        df = pd.read_csv(io.StringIO(path_or_str), dtype={"date": str})
    else:
        df = pd.read_csv(path_or_str, dtype={"date": str})
    df["date"] = pd.to_datetime(df["date"], format="%Y%m%d")
    df = df.rename(columns={"ws2m_mean": "ws"})
    df = df.set_index("date").sort_index()
    return df["ws"].astype(float)

LAGS = 7
H = 3  # horizonte de previsão

s = load_daily_csv("ws_days.csv")
train_s, test_s = s.iloc[:-H], s.iloc[-H:]

print(f"Tamanho treino: {len(train_s)} | Tamanho teste: {len(test_s)}")

## Ridge com Lags + Weekday + Rolling Mean

In [None]:
def make_supervised(series: pd.Series, lags=LAGS) -> pd.DataFrame:
    df = pd.DataFrame({"y": series})
    for i in range(1, lags+1):
        df[f"lag_{i}"] = series.shift(i)
    df["rm7"] = series.shift(1).rolling(7, min_periods=1).mean()
    df["weekday"] = series.index.weekday
    return df.dropna()

def fit_ridge_with_cv(df_sup: pd.DataFrame):
    feature_cols = [c for c in df_sup.columns if c != "y"]
    num_cols = [c for c in feature_cols if c not in ["weekday"]]
    cat_cols = ["weekday"]

    pre = ColumnTransformer(
        transformers=[
            ("num", StandardScaler(), num_cols),
            ("cat", OneHotEncoder(handle_unknown="ignore", drop=None, sparse_output=False), cat_cols),
        ],
        remainder="drop",
    )

    pipe = Pipeline([
        ("pre", pre),
        ("ridge", Ridge())
    ])

    tscv = TimeSeriesSplit(n_splits=min(3, max(2, len(df_sup)//10)))
    param_grid = {"ridge__alpha": [0.1, 0.3, 1.0, 3.0, 10.0]}
    gs = GridSearchCV(pipe, param_grid, cv=tscv, scoring="neg_mean_squared_error")
    X = df_sup.drop(columns=["y"])
    y = df_sup["y"].values
    gs.fit(X, y)
    return gs.best_estimator_, feature_cols

def forecast_next_n_ridge(series: pd.Series, n: int, pipe, lags=LAGS) -> pd.Series:
    history = list(series.values)
    window  = list(series.iloc[-lags:].values)
    idx = pd.date_range(series.index[-1] + pd.Timedelta(days=1), periods=n, freq="D")
    preds = []

    for dt in idx:
        feats = {f"lag_{i}": window[-i] for i in range(1, lags+1)}
        rm7 = pd.Series(history).tail(7).mean()
        feats["rm7"] = rm7
        feats["weekday"] = dt.weekday()

        X_row = pd.DataFrame([feats], index=[dt])
        y_hat = float(pipe.predict(X_row)[0])

        preds.append(y_hat)
        history.append(y_hat)
        window.append(y_hat)
        window = window[-lags:]

    return pd.Series(preds, index=idx, name="Ridge_pred")

df_sup_full = make_supervised(s, lags=LAGS)
split_date = test_s.index[0]
df_sup_train = df_sup_full[df_sup_full.index < split_date]

best_pipe, feat_cols = fit_ridge_with_cv(df_sup_train)
pred_3 = forecast_next_n_ridge(train_s, H, best_pipe, lags=LAGS)

mae = mean_absolute_error(test_s.values, pred_3.reindex(test_s.index).values)
rmse = np.sqrt(mean_squared_error(test_s.values, pred_3.reindex(test_s.index).values))
print(f"Ridge (lags+weekday+rm7) -> MAE: {mae:.2f} | RMSE: {rmse:.2f}")
print(pd.DataFrame({"Real": test_s, "Ridge_pred": pred_3}).round(2))

plt.figure(figsize=(11,4))
plt.plot(s.index, s.values, marker="o", label="Histórico")
plt.plot(test_s.index, test_s.values, color="black", marker="x", label="Real (3d)")
plt.plot(pred_3.index, pred_3.values, marker="o", label="Ridge (3d)")
plt.title("Previsão diária (3 dias) — Ridge regularizado com lags")
plt.xlabel("Data"); plt.ylabel("WS (m/s)")
plt.legend(); plt.tight_layout(); plt.show()

## Função Final de Previsão

In [None]:
def prever_3_dias_ridge_csv(csv_input, lags=7) -> str:
    s = load_daily_csv(csv_input)
    df_sup = make_supervised(s, lags=lags)
    pipe, _ = fit_ridge_with_cv(df_sup)
    preds = forecast_next_n_ridge(s, 3, pipe, lags=lags)
    return ",".join(f"{v:.2f}" for v in preds.values)

out = prever_3_dias_ridge_csv("ws_days.csv")
print(out)