In [17]:
# Imports
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import load_model

# allow importing local modules (preprocess)
        # allow importing local modules (preprocess)
        # __file__ is not available in notebooks — locate repo root by searching upward for src/models/preprocess.py
import pathlib
import sys

repo_root = pathlib.Path().cwd()
found = False
p = repo_root

for _ in range(10):
    candidate = p / 'src' / 'models' / 'preprocess.py'
    
    if candidate.exists():
        repo_root = p
        found = True
        break
    
    if p.parent == p:  # chegou na raiz do SO
        break
    
    p = p.parent

if not found:
    # fallback to cwd
    repo_root = pathlib.Path().cwd()

# add local models folder to sys.path
sys.path.append(str(repo_root / 'src' / 'models'))

from preprocess import prepare_dataset

In [None]:
src
data
Soja
df_soja.parquet
src
models
output_test
src
models

In [19]:
# Load data (uses the same preprocessing as treino)
X_train, X_val, X_test, y_train, y_val, y_test, scaler, feature_names = prepare_dataset(parquet_path)
print("X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_test shape: {X_test.shape}, y_test shape: {y_test.shape}


In [20]:
# Load model (prefer .keras native format, fallback to .h5)
if os.path.exists(model_keras):
    model = load_model(model_keras)
    print("Loaded model from .keras")
elif os.path.exists(model_h5):
    model = load_model(model_h5)
    print("Loaded model from .h5 checkpoint")
else:
    raise FileNotFoundError(
        "Nenhum modelo encontrado em {model_keras} ou {model_h5}. Rode o treino primeiro."
    )

FileNotFoundError: Nenhum modelo encontrado em {model_keras} ou {model_h5}. Rode o treino primeiro.

In [None]:
# Load stored metrics if present
if os.path.exists(metrics_path):
    stored_metrics = joblib.load(metrics_path)
    print("Stored metrics:")
    print(stored_metrics)
else:
    stored_metrics = None
    print("No stored metrics found at {metrics_path}")

In [None]:
# Predict on test set
preds = model.predict(X_test)
# preds is a list of arrays (one per output). Stack into (n_samples, n_outputs)
y_pred = np.vstack([p.ravel() for p in preds]).T
y_true = y_test
print("Prediction shape: {y_pred.shape}")

In [None]:
# Build DataFrame with true and predicted values (first 500 rows for CSV)
targets = ["Toneladas", "Quilograma Líquido", "Valor US$ FOB"]
df_pred = pd.DataFrame()
for i, t in enumerate(targets):
    df_pred[f"y_true_{t}"] = y_true[:, i]
    df_pred[f"y_pred_{t}"] = y_pred[:, i]

# Save sample CSV (first 500 rows)
df_pred.head(500).to_csv(pred_csv, index=False)
print("Saved predictions sample to {pred_csv}")

In [None]:
# Plotting: scatter y_true vs y_pred and residuals for each target
sns.set(style="whitegrid")
fig, axes = plt.subplots(3, 2, figsize=(12, 14))
for i, t in enumerate(targets):
    ax_scatter = axes[i, 0]
    ax_hist = axes[i, 1]

    y_t = y_true[:, i]
    y_p = y_pred[:, i]
    res = y_t - y_p

    # Scatter
    ax_scatter.scatter(y_t, y_p, alpha=0.4, s=10)
    lims = [np.nanmin(np.concatenate([y_t, y_p])), np.nanmax(np.concatenate([y_t, y_p]))]
    ax_scatter.plot(lims, lims, color="red", linestyle="--")
    ax_scatter.set_title(f"{t}: True vs Pred")
    ax_scatter.set_xlabel("True")
    ax_scatter.set_ylabel("Pred")

    # Residual histogram + KDE
    sns.histplot(res, kde=True, ax=ax_hist, bins=50)
    ax_hist.set_title(f"{t}: Residuals (true - pred)")

    # Print metrics
    mse = mean_squared_error(y_t, y_p)
    mae = mean_absolute_error(y_t, y_p)
    print(f"{t} - MSE: {mse:.4f}, MAE: {mae:.4f}")

plt.tight_layout()
fig.savefig(pred_fig, dpi=150)
print("Saved figure to {pred_fig}")
plt.show()

## Localização dos artefatos gerados

- Modelo salvo: `src/models/output_test/soja_model_saved.keras` (ou `soja_model_best.h5`)
- Scaler: `src/models/output_test/scaler.joblib`
- Métricas: `src/models/output_test/metrics.joblib`
- Previsões (amostra): `src/models/output_test/predictions_sample.csv`
- Figura com previsões: `src/models/output_test/soja_predictions.png`