# GRU Simples - Morbidade Circulatória (Previsão da Próxima Semana)

Este notebook implementa o modelo GRU simples para previsão da **próxima semana** das taxas de morbidade circulatória em municípios brasileiros.

- **Modelo:** GRU Simples
- **Dataset:** Morbidade Circulatória (`morb_circ`)
- **Alvo:** Próxima semana (previsão de 1 passo)
- **Input:** sequência de 12 semanas (shape: [batch, 12, 1])
- **Arquitetura:** GRU(32, return_sequences=False) → Dense(1)
- **Perda:** MAE
- **Todo o código é modular e importado dos módulos `src/`.**

In [1]:
import sys
import os

# Get the absolute path to the project root
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
print(f"Project root: {project_root}")

# Add the project root to sys.path
if project_root not in sys.path:
    sys.path.insert(0, project_root)
    print(f"Added {project_root} to sys.path")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import tensorflow as tf

# Import custom modules
from src.preprocessing import load_city_data, prepare_data_for_model, filter_city, clean_timeseries
from src.models import build_gru
from src.train import train_model, evaluate_model, save_metrics
from src.utils import plot_forecast, plot_actual_vs_predicted_scatter, plot_training_history

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("Imports and setup complete.")

# Create results directory specific to morb_circ
results_dir = os.path.join('results', 'morb_circ_gru_simple')
os.makedirs(results_dir, exist_ok=True)

Project root: c:\Users\pedro\OneDrive - Unesp\Documentos\GitHub\cities-models\cities-models
Added c:\Users\pedro\OneDrive - Unesp\Documentos\GitHub\cities-models\cities-models to sys.path
Imports and setup complete.
Imports and setup complete.


In [None]:
# =============================================================================
# Configuration
# =============================================================================
# --- Data Selection ---
CD_MUN_SELECTED = 3550308  # São Paulo
CITY_NAME = "São Paulo"
TARGET_COLUMN = 'target'
FEATURE_COLUMNS = ['target'] # Using only the target for the simple GRU

# --- Data Splitting ---
TEST_SIZE = 52  # Last 52 weeks for testing (1 year)
VAL_SIZE = 52   # Previous 52 weeks for validation

# --- Model Parameters ---
SEQUENCE_LENGTH = 12
FORECAST_HORIZON = 1
NORMALIZATION = 'zscore' # 'zscore' or 'minmax'

# --- Training Parameters ---
EPOCHS = 100
BATCH_SIZE = 32
PATIENCE = 15

# --- Model Architecture ---
GRU_UNITS = 32
LOSS_FUNCTION = 'mae'

# --- Paths ---
DATA_PATH = '../data/df_base_morb_circ.csv'
RESULTS_DIR = os.path.join('results', 'morb_circ_gru_simple')
os.makedirs(RESULTS_DIR, exist_ok=True)

print("--- Configuration ---")
print(f"City: {CITY_NAME} ({CD_MUN_SELECTED})")
print(f"Target Column: {TARGET_COLUMN}")
print(f"Sequence Length: {SEQUENCE_LENGTH}")
print(f"Test Size: {TEST_SIZE}, Val Size: {VAL_SIZE}")
print("-" * 20)

--- Configuration ---
City: São Paulo (3550308)
Target Column: target
Sequence Length: 12
Test Size: 52, Val Size: 52
--------------------
Full dataset loaded: (6344064, 11)
Full dataset loaded: (6344064, 11)


Unnamed: 0,CD_MUN,target,week,PIB,DENS,URB,CO2,CH4,N2O,LAT,LON
0,1100015,0.199872,1999-01-01,3469.14,3.541043,0.000611,550.985905,92.946598,6.657747,-12.883213,-62.39
1,1100015,1.304184,1999-01-08,3469.14,3.541043,0.000611,550.985905,92.946598,6.657747,-12.883213,-62.39
2,1100015,2.495194,1999-01-15,3469.14,3.541043,0.000611,550.985905,92.946598,6.657747,-12.883213,-62.39
3,1100015,3.538533,1999-01-22,3469.14,3.541043,0.000611,550.985905,92.946598,6.657747,-12.883213,-62.39
4,1100015,11.927224,1999-01-29,3469.14,3.541043,0.000611,550.985905,92.946598,6.657747,-12.883213,-62.39


In [None]:
# =============================================================================
# Load Data
# =============================================================================
df = load_city_data(DATA_PATH)
print(f"Full dataset loaded: {df.shape}")
display(df.head())

In [None]:
# Select city for modeling (set to None to use all cities)
CD_MUN_SELECTED = 3550308  # São Paulo

# =============================================================================
# Filter and Clean Data for Selected City
# =============================================================================
df_city = filter_city(df, cd_mun=CD_MUN_SELECTED)
df_city = clean_timeseries(df_city, target_column=TARGET_COLUMN)
print(f"Data for {CITY_NAME}: {df_city.shape}")

Data for São Paulo: (1152, 11)


In [None]:
# =============================================================================
# Save Results
# =============================================================================
preds_df = pd.DataFrame({
    'date': test_dates,
    'y_true': y_true_denorm,
    'y_pred': y_pred_denorm
})

preds_file = os.path.join(RESULTS_DIR, f'{CITY_NAME}_predictions.csv')
preds_df.to_csv(preds_file, index=False)
print(f"Predictions saved to: {preds_file}")

metrics_to_save = {k: v for k, v in metrics.items() if k in ['mae', 'rmse', 'r2']}
params_to_save = {
    'city_name': CITY_NAME,
    'cd_mun': CD_MUN_SELECTED,
    'sequence_length': SEQUENCE_LENGTH,
    'forecast_horizon': FORECAST_HORIZON,
    'epochs': EPOCHS,
    'batch_size': BATCH_SIZE,
    'gru_units': GRU_UNITS
}

metrics_file = save_metrics(
    metrics=metrics_to_save,
    city_name=CITY_NAME,
    model_name='gru_simple',
    output_dir=RESULTS_DIR,
    params=params_to_save
)
print(f"Metrics saved to: {metrics_file}")

In [None]:
# =============================================================================
# Visualize Forecasts
# =============================================================================

# --- 1. Plot Test Set Forecast ---
test_dates = original_test_df.index[-len(y_true_denorm):]

fig_test = plot_forecast(
    dates=test_dates,
    true_values=y_true_denorm,
    predictions=y_pred_denorm,
    title=f'{CITY_NAME} - Test Set Forecast',
    metrics=metrics
)
plt.savefig(os.path.join(RESULTS_DIR, f'{CITY_NAME}_test_forecast.png'), dpi=300)
fig_test.show()


# --- 2. Plot Validation + Test Set Forecast ---
y_val_pred_normalized = model.predict(X_val)
val_pred_dummy = np.zeros((len(y_val_pred_normalized), scaler.n_features_in_))
val_pred_dummy[:, FEATURE_COLUMNS.index(TARGET_COLUMN)] = y_val_pred_normalized.flatten()
y_val_pred_denorm = scaler.inverse_transform(val_pred_dummy)[:, FEATURE_COLUMNS.index(TARGET_COLUMN)]

val_true_dummy = np.zeros((len(y_val), scaler.n_features_in_))
val_true_dummy[:, FEATURE_COLUMNS.index(TARGET_COLUMN)] = y_val.flatten()
y_val_true_denorm = scaler.inverse_transform(val_true_dummy)[:, FEATURE_COLUMNS.index(TARGET_COLUMN)]

full_true = np.concatenate([y_val_true_denorm, y_true_denorm])
full_pred = np.concatenate([y_val_pred_denorm, y_pred_denorm])

val_dates = original_val_df.index[-len(y_val_true_denorm):]
full_dates = val_dates.union(test_dates)

fig_full = plot_forecast(
    dates=full_dates,
    true_values=full_true,
    predictions=full_pred,
    title=f'{CITY_NAME} - Full Validation + Test Forecast'
)
plt.axvline(test_dates[0], color='black', linestyle='--', label='Train/Test Split')
plt.legend()
plt.savefig(os.path.join(RESULTS_DIR, f'{CITY_NAME}_full_forecast.png'), dpi=300)
fig_full.show()


# --- 3. Scatter Plot of Test Set ---
fig_scatter = plot_actual_vs_predicted_scatter(
    true_values=y_true_denorm,
    predictions=y_pred_denorm,
    title=f'{CITY_NAME} - Test Set: Actual vs. Predicted',
    metrics=metrics
)
plt.savefig(os.path.join(RESULTS_DIR, f'{CITY_NAME}_scatter_plot.png'), dpi=300)
fig_scatter.show()

In [None]:
# =============================================================================
# Evaluate the Model on the Test Set
# =============================================================================
metrics = evaluate_model(
    model=model,
    X_test=X_test,
    y_test=y_test,
    scaler=scaler,
    target_idx=FEATURE_COLUMNS.index(TARGET_COLUMN)
)

# Extract denormalized values for plotting
y_true_denorm = metrics['y_true_denorm']
y_pred_denorm = metrics['y_pred_denorm']

# Print metrics
print(f"--- Test Set Metrics for {CITY_NAME} ---")
print(f"MAE:  {metrics['mae']:.4f}")
print(f"RMSE: {metrics['rmse']:.4f}")
print(f"R²:   {metrics['r2']:.4f}")

In [None]:
# =============================================================================
# Build and Train the GRU Model
# =============================================================================
input_shape = X_train.shape[1:]
model = build_gru(
    input_shape=input_shape,
    units=GRU_UNITS,
    loss=LOSS_FUNCTION
)
model.summary()

history = train_model(
    model=model,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    patience=PATIENCE,
    verbose=1
)

# Plot training history
fig_history = plot_training_history(history, title=f'GRU Model Training History - {CITY_NAME}')
plt.savefig(os.path.join(RESULTS_DIR, f'{CITY_NAME}_training_history.png'), dpi=300)
plt.show()

In [None]:
# =============================================================================
# Prepare Data for Modeling
# =============================================================================
data_dict = prepare_data_for_model(
    df=df_city,
    target_column=TARGET_COLUMN,
    feature_columns=FEATURE_COLUMNS,
    sequence_length=SEQUENCE_LENGTH,
    forecast_horizon=FORECAST_HORIZON,
    test_size=TEST_SIZE,
    val_size=VAL_SIZE,
    normalization=NORMALIZATION,
)

# Unpack the data dictionary
X_train, y_train = data_dict['X_train'], data_dict['y_train']
X_val, y_val = data_dict['X_val'], data_dict['y_val']
X_test, y_test = data_dict['X_test'], data_dict['y_test']
scaler = data_dict['scaler']
original_train_df = data_dict['original_train_df']
original_val_df = data_dict['original_val_df']
original_test_df = data_dict['original_test_df']

print("--- Data Shapes ---")
print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"X_val:   {X_val.shape}, y_val:   {y_val.shape}")
print(f"X_test:  {X_test.shape}, y_test:  {y_test.shape}")

## Conclusão

Este notebook demonstrou um fluxo de trabalho limpo e modular para treinar, avaliar e visualizar um modelo GRU simples para previsão de séries temporais.

- **Configuração Centralizada:** Todos os parâmetros são definidos em um único local, facilitando a experimentação.
- **Código Modular:** Toda a lógica de pré-processamento, modelagem, treinamento e avaliação é importada do diretório `src/`, mantendo o notebook limpo e focado na orquestração do fluxo de trabalho.
- **Avaliação Robusta:** A função `evaluate_model` lida com a desnormalização e o cálculo de métricas de forma consistente.
- **Visualização Clara:** As visualizações mostram o desempenho do modelo nos conjuntos de teste e validação, fornecendo insights sobre a generalização do modelo.

O desempenho deste modelo GRU simples serve como uma linha de base para modelos mais complexos.