In [1]:
from modulos import ann, eda, ml_func
import keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

2025-08-26 22:24:58.178347: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# ML - Bacia do Rio Negro 2

Usando a metodologia do ML - Bacia do Rio Negro 1 + Cota das estações montantes

### Estações

1. Rio Negro

2. Rio Branco

3. Rio Tiquié + Uaupés

In [2]:
path = 'Cotas extraidas'
date_start = '1985-01-01'
date_end = '2023-12-31'

In [3]:
arquivos = [
    'dados_diarios_estacao_14480002.csv',
    'dados_diarios_estacao_14840000.csv',
    'dados_diarios_estacao_14990000.csv'
]

negro = eda.data_processing(path, date_start, date_end, arquivos)
negro

Unnamed: 0_level_0,14480002,14840000,14990000
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1985-01-01,409.0,669.0,2169.0
1985-01-02,411.0,677.0,2178.0
1985-01-03,413.0,681.0,2186.0
1985-01-04,416.0,685.0,2194.0
1985-01-05,427.0,691.0,2201.0
...,...,...,...
2023-12-27,325.0,413.0,1807.0
2023-12-28,329.0,421.0,1822.0
2023-12-29,331.0,427.0,1834.0
2023-12-30,333.0,436.0,1849.0


In [4]:
arquivos = [
    'dados_diarios_estacao_14620000.csv',
    'dados_diarios_estacao_14710000.csv',
    'dados_diarios_estacao_14790000.csv'
]

branco = eda.data_processing(path, date_start, date_end, arquivos)
branco

Unnamed: 0_level_0,14620000,14710000,14790000
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1985-01-01,285.0,281.0,509.0
1985-01-02,309.0,285.0,503.0
1985-01-03,310.0,290.0,498.0
1985-01-04,260.0,293.0,504.0
1985-01-05,259.0,280.0,510.0
...,...,...,...
2023-12-27,83.0,126.0,340.0
2023-12-28,84.0,131.0,339.0
2023-12-29,78.0,137.0,345.0
2023-12-30,76.0,135.0,354.0


### Correlação cruzada
Aplicar lags nas estações montantes para verificar qual possui maior correlação com a cota atual

In [5]:
lags_negro = eda.cross_corr(negro, '14990000')

Estação 14480002: Melhor lag = 12; Correlação = 0.7487
Estação 14840000: Melhor lag = 4; Correlação = 0.8751


In [6]:
lags_branco = eda.cross_corr(branco, '14790000')

Estação 14620000: Melhor lag = 8; Correlação = 0.8376
Estação 14710000: Melhor lag = 6; Correlação = 0.8908


### Aprendizado de Máquina

##### Separação dos conjuntos

In [7]:
date_split = '2015-01-01'
negro_treino, negro_teste = negro[negro.index < date_split], negro[negro.index >= date_split]
branco_treino, branco_teste = branco[branco.index < date_split], branco[branco.index >= date_split]

##### Funções

In [None]:
def train_test_model(models, name_models, df, name_column, split, janela, steps, shifts):
    # Criação de janelas
    x_shifts = {}
    for i, col in enumerate(df.columns):
        if col == name_column:
            x_shifts[col] = df[col]
        else:
            x_shifts[f"{col} - lag {shifts[i]}"] = df[col].shift(shifts[i])

    x_shifts = pd.DataFrame(x_shifts, index=df.index)
    x_shifts.dropna(inplace=True)
    
    X, y = ml_func.create_multi_step_dataset(x_shifts[name_column].values, janela, steps)
    for i, col in enumerate(x_shifts.columns):
        if col == name_column:
            continue
        else:
            x, _ = ml_func.create_multi_step_dataset(x_shifts[col].values, janela, steps)
            X = np.append(X, x, axis=1)

    # Separação dos cojuntos
    x_train = X[:split-janela]
    y_train = y[:split-janela]

    x_test = X[split-janela:]
    y_test = y[split-janela:]

    # Normalização dos dados
    x_scale = MinMaxScaler()
    y_scale = MinMaxScaler()

    x_train = x_scale.fit_transform(x_train)
    x_test = x_scale.transform(x_test)

    y_train = y_scale.fit_transform(y_train)

    # Callback Early Stopping
    early_stop = keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True
    )

    predicts = []
    for i, model in enumerate(models):
        # Treino
        model.fit(x_train, y_train, epochs=100, verbose=0, callbacks=early_stop)

        # Predição
        pred = model.predict(x_test, verbose=0)
        pred_inv = y_scale.inverse_transform(pred)
        predicts.append(pred_inv)

        # Métrica R²
        print(f"\nModelo: {name_models[i]}")
        print(f"Média R²: {r2_score(y_test, pred_inv):.4f}")
        print(f"R² (Dia {steps}): {r2_score(y_test[:, steps-1], pred_inv[:, steps-1]):.4f}")

    # Plot dos dados Observados e Preditos
    max_year, min_year = eda._max_min_year_(df[name_column])

    step_day = y_test[:, steps-1].reshape(len(y_test),)
    step_day_df = pd.DataFrame(step_day, index=series.index[split+steps-1:])
    step_std = np.std(step_day)

    plt.figure(figsize=(16, 6))
    plt.plot(step_day_df, linestyle='-', label='Observado')
    plt.gca().fill_between(step_day_df.index, step_day + step_std, step_day - step_std, alpha=0.2, label='Desvio Padrão')
    
    for i, name in enumerate(name_models):
        predict_df = pd.DataFrame(predicts[i][:, steps-1].reshape(len(y_test),), index=series.index[split+steps-1:])
        plt.plot(predict_df, linestyle='--', label=name)

    plt.axhline(y=np.mean(df[name_column].loc[max_year]), linestyle='--', color='red', label='Média Máxima')
    plt.axhline(y=np.mean(df[name_column].loc[min_year]), linestyle='--', color='black', label='Média Mínima')

    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.18), ncol=2)
    plt.title(f'Estação {name_column} - Dia {steps}')
    plt.xlabel('Tempo')
    plt.ylabel('Cota (cm)')
    plt.tight_layout()
    plt.grid(True)
    plt.show()

In [19]:
train_test_model('a', 'b', negro, '14990000', negro_treino.shape[0], 15, 15, lags_negro)

            14480002 - lag 12  14840000 - lag 4  14990000
Data                                                     
1985-01-13              409.0             744.0    2273.0
1985-01-14              411.0             755.0    2284.0
1985-01-15              413.0             765.0    2296.0
1985-01-16              416.0             774.0    2308.0
1985-01-17              427.0             783.0    2318.0
...                       ...               ...       ...
2023-12-27              253.0             374.0    1807.0
2023-12-28              259.0             388.0    1822.0
2023-12-29              265.0             396.0    1834.0
2023-12-30              272.0             405.0    1849.0
2023-12-31              279.0             413.0    1864.0

[14232 rows x 3 columns]
[[2273. 2284. 2296. ...  854.  859.  863.]
 [2284. 2296. 2308. ...  859.  863.  866.]
 [2296. 2308. 2318. ...  863.  866.  868.]
 ...
 [1448. 1457. 1465. ...  318.  322.  322.]
 [1457. 1465. 1472. ...  322.  322.  322.]


##### MLP

##### LSTM