In [None]:
import pandas as pd
import requests
from datetime import date, timedelta, datetime
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
def fetch_precip_hourly_openmeteo(lat: float, lon: float, start: date, end: date) -> pd.DataFrame:
    """
    Baixa precipitação horária (mm) da API histórica da Open-Meteo
    e retorna DataFrame com colunas ['time','precipitation'].
    """
    url = (
        "https://archive-api.open-meteo.com/v1/archive"
        f"?latitude={lat}&longitude={lon}"
        f"&start_date={start.isoformat()}&end_date={end.isoformat()}"
        "&hourly=precipitation&timezone=auto"
    )
    r = requests.get(url, timeout=60)
    r.raise_for_status()
    j = r.json()
    hourly = j.get("hourly", {})
    df = pd.DataFrame(hourly)
    
    if df.empty:
        print("⚠ Sem dados retornados para:", start, end)
        return pd.DataFrame(columns=["time","precipitation"])
    
    df["time"] = pd.to_datetime(df["time"])
    return df


In [None]:
def fetch_10_years_hourly(lat: float, lon: float, years: int = 10):
    """
    Baixa histórico de precipitação horária dos últimos N anos,
    fazendo requisições ano a ano para evitar travar a API.
    """
    today = date.today()
    start_total = today - timedelta(days=365 * years)

    dfs = []
    current_start = start_total

    while current_start < today:
        current_end = min(current_start + timedelta(days=365), today)

        print(f"Baixando {current_start} → {current_end}")
        df_chunk = fetch_precip_hourly_openmeteo(lat, lon, current_start, current_end)
        dfs.append(df_chunk)

        # próxima fatia começa no dia seguinte
        current_start = current_end + timedelta(days=1)

    # junta tudo
    df_all = pd.concat(dfs, ignore_index=True)
    return df_all


In [None]:
def hourly_to_daily(df_hourly):
    """
    Converte dados horários em chuva diária (mm/dia).
    Retorna DataFrame com colunas ['ds', 'y'].
    """
    df_hourly["date"] = df_hourly["time"].dt.date

    df_daily = df_hourly.groupby("date", as_index=False)["precipitation"].sum()
    df_daily.rename(columns={"date": "ds", "precipitation": "y"}, inplace=True)

    # garantir frequência diária contínua
    full_range = pd.date_range(
        start=df_daily["ds"].min(),
        end=df_daily["ds"].max(),
        freq="D"
    ).date

    df_full = pd.DataFrame({"ds": full_range})
    df_final = df_full.merge(df_daily, on="ds", how="left")
    df_final["y"] = df_final["y"].fillna(0.0)

    df_final["ds"] = pd.to_datetime(df_final["ds"])
    return df_final


In [None]:
LAT = -16.68
LON = -49.25

df_hourly_10y = fetch_10_years_hourly(LAT, LON, years=10)
df_hourly_10y.head()

df_daily_10y = hourly_to_daily(df_hourly_10y)
#df_daily_10y.head(), df_daily_10y.tail()


In [None]:
plt.figure(figsize=(14,4))
plt.plot(df_daily_10y["ds"], df_daily_10y["y"])
plt.title("Chuva diária acumulada - últimos 10 anos")
plt.xlabel("Data")
plt.ylabel("mm")
plt.grid(alpha=0.3)
plt.show()


In [None]:
df = df_daily_10y.copy()
df = df.set_index("ds")
df.index = pd.to_datetime(df.index)
df = df.asfreq("D")           # garante frequência diária
df["y"] = df["y"].fillna(0.0) # segurança
df.head()


In [None]:
def create_rain_features(df, target_col="y"):
    """
    Cria features de série temporal para previsão de chuva diária.
    
    Features incluídas:
    - Lags (1,2,3,7,14)
    - Rolling windows (média e soma: 3,7,14,30 dias)
    - Sazonalidade (mês, dia do ano, dia da semana)
    - Alvo: y_next (chuva de amanhã)
    """
    
    df = df.copy()
    df.index = pd.to_datetime(df.index)
    df = df.asfreq("D")
    df[target_col] = df[target_col].astype(float).fillna(0.0)
    
    # ===========================
    # 1. Lags (passados)
    # ===========================
    lags = [1, 2, 3, 7, 14]
    for lag in lags:
        df[f"lag_{lag}"] = df[target_col].shift(lag)
    
    # ===========================
    # 2. Rolling windows (tendências)
    # ===========================
    windows = [3, 7, 14, 30]
    for w in windows:
        df[f"roll_mean_{w}"] = df[target_col].shift(1).rolling(w).mean()
        df[f"roll_sum_{w}"] = df[target_col].shift(1).rolling(w).sum()
        df[f"roll_max_{w}"] = df[target_col].shift(1).rolling(w).max()
    
    # ===========================
    # 3. Sazonalidade
    # ===========================
    df["month"]       = df.index.month
    df["day_of_year"] = df.index.dayofyear
    df["day_of_week"] = df.index.dayofweek
    
    # ===========================
    # 4. Target: chuva de amanhã
    # ===========================
    df["y_next"] = df[target_col].shift(-1)

    # ===========================
    # 5. Remover NaNs (causados por lags/janelas)
    # ===========================
    df = df.dropna()
    
    return df


In [None]:
df_features = create_rain_features(df)
#df_features.head()


In [None]:
df_features.shape


In [None]:
#!pip install seaborn
import seaborn as sns
plt.figure(figsize=(12,10))
sns.heatmap(df_features.corr(), cmap="coolwarm", center=0)
plt.show()


Preparar X e y (treino/teste temporal)

In [None]:
df_features.head()


In [None]:
TARGET = "y_next"

feature_cols = [col for col in df_features.columns 
                if col not in [TARGET]  # remove o alvo
                and col != "ds"         # não é feature
               ]

X = df_features[feature_cols]
y = df_features[TARGET]

X.shape, y.shape


In [None]:
split_ratio = 0.8
split_idx = int(len(df_features) * split_ratio)

X_train = X.iloc[:split_idx]
X_test  = X.iloc[split_idx:]

y_train = y.iloc[:split_idx]
y_test  = y.iloc[split_idx:]

len(X_train), len(X_test)


In [None]:
print("Treino:", X_train.index.min(), "→", X_train.index.max())
print("Teste :", X_test.index.min(),  "→", X_test.index.max())


In [None]:
X_train.isna().sum().sum(), X_test.isna().sum().sum()


Treinar LightGBM

In [None]:
!pip install lightgbm


In [None]:
import lightgbm as lgb
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt


In [None]:
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test)


In [None]:
params = {
    "objective": "regression",
    "metric": "rmse",
    "learning_rate": 0.03,
    "num_leaves": 63,
    "feature_fraction": 0.8,
    "bagging_fraction": 0.8,
    "bagging_freq": 5,
    "lambda_l2": 2.0,
    "lambda_l1": 0.5,
    "verbose": -1,
}


In [None]:
lgb_model = lgb.train(
    params,
    train_data,
    valid_sets=[train_data, valid_data],
    valid_names=["train","valid"],
    num_boost_round=2000,
    
    # --- CORREÇÃO ABAIXO ---
    # Substitua 'early_stopping_rounds' e 'verbose_eval' por 'callbacks'
    callbacks=[
        lgb.early_stopping(stopping_rounds=100),
        lgb.log_evaluation(period=100)
    ]
)


In [None]:
y_pred = lgb_model.predict(X_test)


In [None]:
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print("MAE  :", mae)
print("RMSE :", rmse)


In [None]:
plt.figure(figsize=(14,5))
plt.plot(y_test.index, y_test.values, label="Real", alpha=0.7)
plt.plot(y_test.index, y_pred, label="Previsto", alpha=0.7)
plt.legend()
plt.title("Previsão de chuva - LightGBM (Teste)")
plt.grid(alpha=0.3)
plt.show()


Previsão de chuva para AMANHÃ

In [None]:
def get_tomorrow_features(df_features, feature_cols):
    """
    Extrai a última linha válida do df_features,
    contendo todas as features necessárias para prever a chuva de amanhã.
    """
    last_row = df_features.iloc[-1][feature_cols]
    X_tomorrow = last_row.values.reshape(1, -1)
    return X_tomorrow


In [None]:
X_tomorrow = get_tomorrow_features(df_features, feature_cols)
X_tomorrow


In [None]:
y_pred_tomorrow = lgb_model.predict(X_tomorrow)[0]
y_pred_tomorrow


In [None]:
def interpret_prediction(mm):
    if mm < 0.1:
        return f"NÃO deve chover amanhã (previsão ≈ {mm:.2f} mm)."
    elif mm < 2:
        return f"Chuva MUITO fraca prevista amanhã (≈ {mm:.2f} mm)."
    elif mm < 10:
        return f"Chuva moderada prevista amanhã (≈ {mm:.2f} mm)."
    else:
        return f"⚠️ Chuva forte prevista amanhã (≈ {mm:.2f} mm)."

print(interpret_prediction(y_pred_tomorrow))



Previsão para vários dias (multi-step: 3, 5, 7 dias)

In [None]:
import numpy as np
import pandas as pd

def build_features_for_time(y_series: pd.Series, current_date: pd.Timestamp, feature_cols):
    """
    Gera um vetor de features para o instante 'current_date',
    usando a série estendida y_series (com histórico + previsões anteriores).

    y_series: pd.Series indexada por data (freq diária) com chuva em mm.
    current_date: dia t (Timestamp) para o qual queremos as features (estado do dia t).
    feature_cols: mesma lista usada no treino (colunas de X).
    """
    y_series = y_series.asfreq("D")
    shifted = y_series.shift(1)  # usado nas rollings

    values = []

    for col in feature_cols:
        if col == "y":
            val = y_series.loc[current_date]

        elif col.startswith("lag_"):
            n = int(col.split("_")[1])
            val = y_series.loc[current_date - pd.Timedelta(days=n)]

        elif col.startswith("roll_mean_"):
            w = int(col.split("_")[2])
            val = shifted.rolling(w).mean().loc[current_date]

        elif col.startswith("roll_sum_"):
            w = int(col.split("_")[2])
            val = shifted.rolling(w).sum().loc[current_date]

        elif col.startswith("roll_max_"):
            w = int(col.split("_")[2])
            val = shifted.rolling(w).max().loc[current_date]

        elif col == "month":
            val = current_date.month

        elif col == "day_of_year":
            val = current_date.dayofyear

        elif col == "day_of_week":
            val = current_date.dayofweek

        else:
            raise ValueError(f"Coluna de feature não tratada: {col}")

        values.append(val)

    return np.array(values).reshape(1, -1)


In [None]:
def forecast_next_days(df_daily, lgb_model, feature_cols, steps=7):
    """
    Faz previsão multi-step com LightGBM:
    - Usa estado do dia t -> prevê chuva do dia t+1
    - Atualiza a série com a previsão para alimentar o próximo passo

    df_daily: DataFrame diário com índice datetime e coluna 'y'
    steps: quantos dias à frente (ex.: 3, 5, 7)
    """
    # cópia da série de chuva
    y_ext = df_daily["y"].copy()
    y_ext.index = pd.to_datetime(y_ext.index)
    y_ext = y_ext.asfreq("D").fillna(0.0)

    results = []
    # data T = último dia com dado real
    current_date = y_ext.index.max()

    for step in range(steps):
        # estado de hoje = current_date (t)
        X_feat = build_features_for_time(y_ext, current_date, feature_cols)
        # prevê chuva do dia t+1
        y_next_pred = lgb_model.predict(X_feat)[0]

        next_date = current_date + pd.Timedelta(days=1)
        # adiciona previsão na série estendida (vira histórico pro próximo passo)
        y_ext.loc[next_date] = y_next_pred

        results.append({"ds": next_date, "y_pred": y_next_pred})

        # avança o relógio: agora o "hoje" passa a ser o dia seguinte
        current_date = next_date

    return pd.DataFrame(results)


In [None]:
# previsão para 3 dias
forecast_3 = forecast_next_days(df, lgb_model, feature_cols, steps=3)
print("Previsão 3 dias:")
print(forecast_3)

# previsão para 5 dias
forecast_5 = forecast_next_days(df, lgb_model, feature_cols, steps=5)
print("\nPrevisão 5 dias:")
print(forecast_5)

# previsão para 7 dias
forecast_7 = forecast_next_days(df, lgb_model, feature_cols, steps=7)
print("\nPrevisão 7 dias:")
print(forecast_7)


In [None]:
def classify_rain(mm):
    if mm < 0.1:
        return "sem chuva"
    elif mm < 2:
        return "chuva muito fraca"
    elif mm < 10:
        return "chuva moderada"
    else:
        return "chuva forte"

forecast_7["classe"] = forecast_7["y_pred"].apply(classify_rain)
forecast_7


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10,4))
plt.bar(forecast_7["ds"], forecast_7["y_pred"])
plt.title("Previsão de chuva (mm) - próximos 7 dias")
plt.xlabel("Data")
plt.ylabel("mm previstos")
plt.grid(alpha=0.3)
plt.show()
