# Comparação de Modelos Clássicos

Este notebook executa diversos algoritmos clássicos de classificação utilizando as features clássicas combinadas às features quânticas previstas pela rede neural profunda. O objetivo é verificar quais modelos se beneficiam do conjunto expandido de atributos.


## Dependências


In [None]:
from pathlib import Path

import math
from typing import List, Optional, Sequence

import numpy as np
import pandas as pd
import seaborn as sns
from IPython.display import display
from matplotlib import pyplot as plt
from sklearn.base import clone
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    balanced_accuracy_score,
    f1_score,
    mean_absolute_error,
    mean_squared_error,
    precision_score,
    r2_score,
    recall_score,
    roc_auc_score,
)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# Estilo padrão para gráficos
sns.set_style("whitegrid")
plt.rcParams.update({"figure.figsize": (10, 5), "axes.titlesize": 14, "axes.labelsize": 12})


## Carregamento dos dados


In [None]:
data_dir = Path("features")
fold_paths = sorted(data_dir.glob("features_y_fold*.csv"))
if not fold_paths:
    raise FileNotFoundError("Nenhum arquivo `features_y_fold*.csv` foi encontrado na pasta `features/`.")

fold_frames = []
for path in fold_paths:
    frame = pd.read_csv(path)
    frame["fold_name"] = path.stem
    fold_frames.append(frame)

fold_summary = (
    pd.concat(
        [df.assign(set=df["set"].str.lower())[["fold", "fold_name", "set"]] for df in fold_frames],
        ignore_index=True,
    )
    .value_counts()
    .unstack(fill_value=0)
)
fold_summary


## Preparação das features


In [None]:
classical_features = [col for col in fold_frames[0].columns if col.startswith("class_")]
quantum_features = [col for col in fold_frames[0].columns if col.startswith("qf_")]

metric_functions = {
    "AUC": lambda y_true, y_score, y_pred: roc_auc_score(y_true, y_score) if len(np.unique(y_true)) > 1 else np.nan,
    "F1 Score Overall": lambda y_true, y_score, y_pred: f1_score(y_true, y_pred),
    "Balanced Accuracy": lambda y_true, y_score, y_pred: balanced_accuracy_score(y_true, y_pred),
    "Precision Class 0": lambda y_true, y_score, y_pred: precision_score(y_true, y_pred, pos_label=0),
    "Precision Class 1": lambda y_true, y_score, y_pred: precision_score(y_true, y_pred, pos_label=1),
    "Recall Class 0": lambda y_true, y_score, y_pred: recall_score(y_true, y_pred, pos_label=0),
    "Recall Class 1": lambda y_true, y_score, y_pred: recall_score(y_true, y_pred, pos_label=1),
}

# Rede neural para prever as features quânticas
class DeepNumpyMLPRegressor:
    """Implementação simples de um MLP com ReLU e optimização Adam."""

    def __init__(
        self,
        hidden_layers: Sequence[int] = (256, 128, 64),
        learning_rate: float = 1e-3,
        epochs: int = 100,
        batch_size: int = 256,
        random_state: Optional[int] = 42,
        beta1: float = 0.9,
        beta2: float = 0.999,
        eps: float = 1e-8,
    ) -> None:
        self.hidden_layers = tuple(hidden_layers)
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.random_state = random_state
        self.beta1 = beta1
        self.beta2 = beta2
        self.eps = eps
        self.weights: List[np.ndarray] = []
        self.biases: List[np.ndarray] = []
        self.m_w: List[np.ndarray] = []
        self.v_w: List[np.ndarray] = []
        self.m_b: List[np.ndarray] = []
        self.v_b: List[np.ndarray] = []
        self._step = 0

    def _initialise(self, n_features: int, n_outputs: int) -> None:
        layer_sizes = [n_features, *self.hidden_layers, n_outputs]
        rng = np.random.default_rng(self.random_state)
        self.weights.clear()
        self.biases.clear()
        self.m_w.clear()
        self.v_w.clear()
        self.m_b.clear()
        self.v_b.clear()
        for in_dim, out_dim in zip(layer_sizes[:-1], layer_sizes[1:]):
            limit = math.sqrt(6.0 / (in_dim + out_dim))
            weight = rng.uniform(-limit, limit, size=(in_dim, out_dim)).astype(np.float64)
            bias = np.zeros(out_dim, dtype=np.float64)
            self.weights.append(weight)
            self.biases.append(bias)
            self.m_w.append(np.zeros_like(weight))
            self.v_w.append(np.zeros_like(weight))
            self.m_b.append(np.zeros_like(bias))
            self.v_b.append(np.zeros_like(bias))
        self._step = 0

    @staticmethod
    def _relu(values: np.ndarray) -> np.ndarray:
        return np.maximum(0.0, values)

    @staticmethod
    def _relu_grad(values: np.ndarray) -> np.ndarray:
        grad = np.zeros_like(values)
        grad[values > 0.0] = 1.0
        return grad

    def _forward(self, batch: np.ndarray):
        activations = [batch]
        pre_activations: List[np.ndarray] = []
        current = batch
        for idx, (weight, bias) in enumerate(zip(self.weights, self.biases)):
            linear = current @ weight + bias
            pre_activations.append(linear)
            if idx == len(self.weights) - 1:
                current = linear
            else:
                current = self._relu(linear)
            activations.append(current)
        return pre_activations, activations

    def _adam_step(self, grads_w: List[np.ndarray], grads_b: List[np.ndarray]) -> None:
        self._step += 1
        lr = self.learning_rate
        for idx, (grad_w, grad_b) in enumerate(zip(grads_w, grads_b)):
            self.m_w[idx] = self.beta1 * self.m_w[idx] + (1 - self.beta1) * grad_w
            self.v_w[idx] = self.beta2 * self.v_w[idx] + (1 - self.beta2) * (grad_w ** 2)
            self.m_b[idx] = self.beta1 * self.m_b[idx] + (1 - self.beta1) * grad_b
            self.v_b[idx] = self.beta2 * self.v_b[idx] + (1 - self.beta2) * (grad_b ** 2)
            m_hat_w = self.m_w[idx] / (1 - self.beta1 ** self._step)
            v_hat_w = self.v_w[idx] / (1 - self.beta2 ** self._step)
            m_hat_b = self.m_b[idx] / (1 - self.beta1 ** self._step)
            v_hat_b = self.v_b[idx] / (1 - self.beta2 ** self._step)
            self.weights[idx] -= lr * m_hat_w / (np.sqrt(v_hat_w) + self.eps)
            self.biases[idx] -= lr * m_hat_b / (np.sqrt(v_hat_b) + self.eps)

    def fit(self, features: np.ndarray, targets: np.ndarray) -> None:
        features = np.asarray(features, dtype=np.float64)
        targets = np.asarray(targets, dtype=np.float64)
        if features.ndim != 2:
            raise ValueError('As features devem ser uma matriz 2D.')
        if targets.ndim != 2:
            raise ValueError('Os alvos devem ser uma matriz 2D.')
        self._initialise(features.shape[1], targets.shape[1])
        rng = np.random.default_rng(self.random_state)
        n_samples = features.shape[0]
        indices = np.arange(n_samples)
        for epoch in range(1, self.epochs + 1):
            rng.shuffle(indices)
            batches = [indices[i:i + self.batch_size] for i in range(0, n_samples, self.batch_size)]
            for batch_indices in batches:
                batch_features = features[batch_indices]
                batch_targets = targets[batch_indices]
                pre_acts, activations = self._forward(batch_features)
                predictions = activations[-1]
                errors = predictions - batch_targets
                grads_w: List[np.ndarray] = [np.zeros_like(w) for w in self.weights]
                grads_b: List[np.ndarray] = [np.zeros_like(b) for b in self.biases]
                delta = 2.0 * errors / batch_features.shape[0]
                for layer_idx in reversed(range(len(self.weights))):
                    grads_w[layer_idx] = activations[layer_idx].T @ delta
                    grads_b[layer_idx] = np.sum(delta, axis=0)
                    if layer_idx > 0:
                        delta = (delta @ self.weights[layer_idx].T) * self._relu_grad(pre_acts[layer_idx - 1])
                self._adam_step(grads_w, grads_b)

    def predict(self, features: np.ndarray) -> np.ndarray:
        features = np.asarray(features, dtype=np.float64)
        _, activations = self._forward(features)
        return activations[-1]

dl_predicted_quantum_features = [f'dl_pred_{feature}' for feature in quantum_features]
for fold_df in fold_frames:
    train_mask = fold_df["set"] == "train"
    regressor = DeepNumpyMLPRegressor(epochs=150, learning_rate=5e-4, batch_size=128, hidden_layers=(256, 128, 64))
    regressor.fit(
        fold_df.loc[train_mask, classical_features].values,
        fold_df.loc[train_mask, quantum_features].values,
    )
    predicted_values = regressor.predict(fold_df[classical_features].values)
    for column_name, column_values in zip(dl_predicted_quantum_features, predicted_values.T):
        fold_df[column_name] = column_values

dl_feature_set = classical_features + dl_predicted_quantum_features


## Avaliação dos algoritmos


In [None]:
algorithm_estimators = {
    "Logistic Regression": Pipeline([
        ("scaler", StandardScaler()),
        ("model", LogisticRegression(max_iter=1000, random_state=42)),
    ]),
    "KNN (k=15)": Pipeline([
        ("scaler", StandardScaler()),
        ("model", KNeighborsClassifier(n_neighbors=15)),
    ]),
    "SVM (RBF)": Pipeline([
        ("scaler", StandardScaler()),
        ("model", SVC(kernel="rbf", probability=True, random_state=42)),
    ]),
    "Random Forest": RandomForestClassifier(n_estimators=300, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(random_state=42),
    "MLP Classifier": Pipeline([
        ("scaler", StandardScaler()),
        ("model", MLPClassifier(hidden_layer_sizes=(256, 128, 64), max_iter=400, random_state=42)),
    ]),
}

comparison_results = []
comparison_predictions = []
for algorithm_name, estimator in algorithm_estimators.items():
    for fold_idx, fold_df in enumerate(fold_frames):
        train_df = fold_df[fold_df['set'] == 'train']
        test_df = fold_df[fold_df['set'] == 'test']
        X_train = train_df[dl_feature_set]
        X_test = test_df[dl_feature_set]
        y_train = train_df['y']
        y_test = test_df['y']

        model = clone(estimator)
        model.fit(X_train, y_train)
        if hasattr(model, 'predict_proba'):
            y_proba = model.predict_proba(X_test)[:, 1]
        else:
            decision = model.decision_function(X_test)
            y_proba = (decision - decision.min()) / (decision.max() - decision.min() + 1e-9)
        y_pred = model.predict(X_test)

        for metric_name, metric_fn in metric_functions.items():
            value = metric_fn(y_test, y_proba, y_pred)
            comparison_results.append({
                'fold': fold_idx,
                'fold_name': fold_df['fold_name'].iat[0],
                'algorithm': algorithm_name,
                'metric': metric_name,
                'value': value,
            })

        comparison_predictions.append(pd.DataFrame({
            'fold': fold_idx,
            'fold_name': fold_df['fold_name'].iat[0],
            'row_id': test_df['row_id'].values,
            'algorithm': algorithm_name,
            'y_true': y_test.values,
            'y_pred': y_pred,
            'y_proba': y_proba,
        }))

comparison_results_df = pd.DataFrame(comparison_results)
comparison_predictions_df = pd.concat(comparison_predictions, ignore_index=True)
comparison_results_df.head()


## Métricas agregadas


In [None]:
comparison_summary = (
    comparison_results_df
    .groupby(['algorithm', 'metric'])['value']
    .agg(
        median=lambda s: np.nanmedian(s),
        q1=lambda s: np.nanquantile(s, 0.25),
        q3=lambda s: np.nanquantile(s, 0.75),
    )
    .reset_index()
)
comparison_summary['iqr'] = comparison_summary['q3'] - comparison_summary['q1']
comparison_summary['median_iqr'] = comparison_summary.apply(
    lambda row: f"{row['median']:.3f} (IQR: {row['q1']:.3f}-{row['q3']:.3f})",
    axis=1
)
comparison_summary


## Comparação visual


In [None]:
metric_order = [
    'AUC',
    'F1 Score Overall',
    'Balanced Accuracy',
    'Precision Class 0',
    'Precision Class 1',
    'Recall Class 0',
    'Recall Class 1',
]
plot_ready = (
    comparison_summary
    .pivot(index='metric', columns='algorithm', values='median')
    .reindex(metric_order)
)
ax = plot_ready.plot(kind='bar', figsize=(14, 6))
ax.set_ylabel('Mediana por fold')
ax.set_xlabel('Métrica')
ax.set_title('Desempenho por algoritmo com features expandidas')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
ax.legend(loc='upper right', ncol=2)
plt.tight_layout()
plt.show()
