# Classifica√ß√£o - 5 Bra√ßos Experimentais

Este notebook treina classificadores (SVM e SRC) para cada um dos 5 bra√ßos experimentais:

1. **Baseline CNN**
2. **ViT Puro**
3. **ViT + Contrastive**
4. **ViT + MIM**
5. **ViT + Sparse**

## M√©tricas Calculadas

- **Acur√°cia**
- **F1-Score (macro)**
- **Silhouette Score** (para an√°lise de clusters)

## Estrutura de Sa√≠da

Os resultados ser√£o salvos em:
```
results/
‚îú‚îÄ‚îÄ classifications/
‚îÇ   ‚îú‚îÄ‚îÄ baseline_cnn_svm_results.json
‚îÇ   ‚îú‚îÄ‚îÄ baseline_cnn_src_results.json
‚îÇ   ‚îî‚îÄ‚îÄ ...
```


In [None]:
import os
import sys
import json
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score, classification_report, silhouette_score
from sklearn.preprocessing import StandardScaler

# MLflow para rastreamento de experimentos
import mlflow
import mlflow.sklearn
from mlflow import log_metric, log_param, log_artifacts, log_model

# Importar SRCClassifier do notebook original
# (Voc√™ pode copiar a classe SRCClassifier do SRCClassifier.ipynb aqui ou import√°-la)
from sklearn.decomposition import DictionaryLearning
from sklearn.linear_model import Lasso

# ============================================
# DETEC√á√ÉO DE AMBIENTE (COLAB OU LOCAL)
# ============================================
try:
    import google.colab
    IN_COLAB = True
    from google.colab import drive
    drive.mount('/content/drive')
    print("‚úÖ Google Colab detectado - Drive montado")
except ImportError:
    IN_COLAB = False
    print("‚úÖ Ambiente local detectado")

# Configurar caminhos baseado no ambiente
if IN_COLAB:
    BASE_DIR = Path("/content/drive/MyDrive/Mestrado_TCC")
    FEATURES_DIR = BASE_DIR / "features"
    RESULTS_DIR = BASE_DIR / "results" / "classifications"
    MLRUNS_DIR = BASE_DIR / "mlruns"
    # Mudar para diret√≥rio do framework
    FRAMEWORK_DIR = BASE_DIR / "Framework"
    if FRAMEWORK_DIR.exists():
        os.chdir(FRAMEWORK_DIR)
else:
    BASE_DIR = Path("../")
    FEATURES_DIR = BASE_DIR / "features"
    RESULTS_DIR = BASE_DIR / "results" / "classifications"
    MLRUNS_DIR = BASE_DIR / "mlruns"

# Criar diret√≥rios
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

# Configura√ß√£o do MLflow
MLRUNS_DIR.mkdir(exist_ok=True)
mlflow.set_tracking_uri(str(MLRUNS_DIR.absolute()))

# Lista dos bra√ßos experimentais
EXPERIMENTAL_ARMS = [
    "baseline_cnn",
    "vit_pure",
    "vit_contrastive",
    "vit_mim",
    "vit_sparse"
]

print(f"\nüìÅ Diret√≥rios configurados:")
print(f"   Features: {FEATURES_DIR}")
print(f"   Resultados: {RESULTS_DIR}")
print(f"   MLflow: {MLRUNS_DIR}")


## Classe SRCClassifier

Classe para classifica√ß√£o baseada em Representa√ß√£o Esparsa (adaptada do notebook original).


In [None]:
class SRCClassifier:
    def __init__(self, n_atoms_per_class=50, alpha=0.001, output_dir='./src_output'):
        """
        Inicializa o classificador SRC.

        Args:
            n_atoms_per_class (int): N√∫mero de '√°tomos' a serem aprendidos para cada dicion√°rio de classe.
            alpha (float): Par√¢metro de regulariza√ß√£o para o LASSO, controla a esparsidade.
            output_dir (str): Diret√≥rio para salvar os dicion√°rios e features.
        """
        self.n_atoms_per_class = n_atoms_per_class
        self.alpha = alpha
        self.dictionaries = {}
        self.output_dir = output_dir
        os.makedirs(self.output_dir, exist_ok=True)

    def fit(self, X_train, y_train):
        """Aprende um dicion√°rio para cada classe a partir dos dados de treino."""
        unique_classes = sorted(np.unique(y_train))
        for class_label in unique_classes:
            X_class = X_train[y_train == class_label]
            if len(X_class) == 0:
                print(f"Aviso: Nenhuma amostra de treino para a classe {class_label}. Pulando.")
                continue

            print(f"  Aprendendo dicion√°rio para a classe {class_label}...")
            dict_learner = DictionaryLearning(
                n_components=self.n_atoms_per_class,
                fit_algorithm='lars',
                transform_algorithm='lasso_lars',
                random_state=42,
                n_jobs=-1
            )
            dict_learner.fit(X_class)
            self.dictionaries[class_label] = dict_learner.components_

    def predict(self, X_test):
        """Classifica amostras baseado no erro de reconstru√ß√£o."""
        if not self.dictionaries:
            raise RuntimeError("Os dicion√°rios devem ser aprendidos primeiro. Chame o m√©todo .fit().")

        predictions = []
        for x_sample in X_test:
            min_error = float('inf')
            predicted_class = None

            for class_label, dictionary in self.dictionaries.items():
                lasso = Lasso(alpha=self.alpha)
                lasso.fit(dictionary.T, x_sample)
                reconstructed = dictionary.T @ lasso.coef_
                error = np.linalg.norm(x_sample - reconstructed)
                
                if error < min_error:
                    min_error = error
                    predicted_class = class_label

            predictions.append(predicted_class)

        return np.array(predictions)

print("Classe SRCClassifier definida!")


## Fun√ß√µes Auxiliares


## Visualiza√ß√£o no MLflow UI

Ap√≥s executar o pipeline, voc√™ pode visualizar os resultados no MLflow UI:

```bash
mlflow ui --backend-store-uri ../mlruns
```

Ou se estiver usando o tracking URI configurado:

```bash
cd ..
mlflow ui
```

Acesse `http://localhost:5000` no navegador para ver todos os experimentos.


In [None]:
def load_features(arm_name, split="train"):
    """
    Carrega features e labels de um bra√ßo experimental.
    
    Args:
        arm_name: Nome do bra√ßo experimental
        split: Divis√£o do dataset ("train", "val", "test")
    
    Returns:
        features, labels: Arrays NumPy
    """
    features_path = FEATURES_DIR / arm_name / f"{split}_features.npy"
    labels_path = FEATURES_DIR / arm_name / f"{split}_labels.npy"
    
    if not features_path.exists() or not labels_path.exists():
        print(f"‚ö†Ô∏è  Arquivos n√£o encontrados para {arm_name} - {split}")
        return None, None
    
    features = np.load(features_path)
    labels = np.load(labels_path)
    
    print(f"‚úÖ Carregado {arm_name} - {split}: {features.shape}")
    return features, labels

def train_svm_classifier(train_features, train_labels, val_features, val_labels, 
                         test_features, test_labels, arm_name):
    """
    Treina classificador SVM com Grid Search.
    
    Returns:
        dict: Dicion√°rio com m√©tricas e resultados
    """
    print(f"\n{'='*60}")
    print(f"Treinando SVM para {arm_name}")
    print(f"{'='*60}")
    
    # Normaliza√ß√£o
    scaler = StandardScaler()
    train_features_scaled = scaler.fit_transform(train_features)
    val_features_scaled = scaler.transform(val_features)
    test_features_scaled = scaler.transform(test_features)
    
    # Grid Search
    svm = SVC(kernel='rbf', class_weight='balanced', probability=True)
    param_grid = {
        'C': [0.01, 0.1, 1, 10, 100],
        'gamma': ['scale', 'auto', 0.01, 0.1, 1]
    }
    
    print("  Executando Grid Search...")
    grid_search = GridSearchCV(
        svm,
        param_grid,
        scoring='f1_macro',
        cv=5,
        n_jobs=-1,
        verbose=1
    )
    grid_search.fit(train_features_scaled, train_labels)
    
    print(f"  Melhores par√¢metros: {grid_search.best_params_}")
    
    # Avalia√ß√£o
    best_svm = grid_search.best_estimator_
    
    # Valida√ß√£o
    val_predictions = best_svm.predict(val_features_scaled)
    val_accuracy = accuracy_score(val_labels, val_predictions)
    val_f1 = f1_score(val_labels, val_predictions, average='macro')
    
    # Teste
    test_predictions = best_svm.predict(test_features_scaled)
    test_accuracy = accuracy_score(test_labels, test_predictions)
    test_f1 = f1_score(test_labels, test_predictions, average='macro')
    
    # Silhouette Score
    try:
        silhouette = silhouette_score(test_features_scaled, test_predictions)
    except:
        silhouette = None
    
    results = {
        'arm': arm_name,
        'classifier': 'SVM',
        'best_params': grid_search.best_params_,
        'val_accuracy': float(val_accuracy),
        'val_f1_macro': float(val_f1),
        'test_accuracy': float(test_accuracy),
        'test_f1_macro': float(test_f1),
        'silhouette_score': float(silhouette) if silhouette is not None else None,
        'classification_report': classification_report(test_labels, test_predictions, output_dict=True)
    }
    
    print(f"  Valida√ß√£o - Acur√°cia: {val_accuracy:.4f}, F1: {val_f1:.4f}")
    print(f"  Teste - Acur√°cia: {test_accuracy:.4f}, F1: {test_f1:.4f}")
    if silhouette:
        print(f"  Silhouette Score: {silhouette:.4f}")
    
    return results

def train_src_classifier(train_features, train_labels, val_features, val_labels,
                        test_features, test_labels, arm_name, n_atoms=50, alpha=0.1):
    """
    Treina classificador SRC.
    
    Returns:
        dict: Dicion√°rio com m√©tricas e resultados
    """
    print(f"\n{'='*60}")
    print(f"Treinando SRC para {arm_name}")
    print(f"{'='*60}")
    
    # Normaliza√ß√£o
    scaler = StandardScaler()
    train_features_scaled = scaler.fit_transform(train_features)
    val_features_scaled = scaler.transform(val_features)
    test_features_scaled = scaler.transform(test_features)
    
    # Treinar SRC
    print("  Aprendendo dicion√°rios...")
    src = SRCClassifier(n_atoms_per_class=n_atoms, alpha=alpha)
    src.fit(train_features_scaled, train_labels)
    
    # Avalia√ß√£o
    print("  Fazendo predi√ß√µes...")
    val_predictions = src.predict(val_features_scaled)
    test_predictions = src.predict(test_features_scaled)
    
    val_accuracy = accuracy_score(val_labels, val_predictions)
    val_f1 = f1_score(val_labels, val_predictions, average='macro')
    test_accuracy = accuracy_score(test_labels, test_predictions)
    test_f1 = f1_score(test_labels, test_predictions, average='macro')
    
    # Silhouette Score
    try:
        silhouette = silhouette_score(test_features_scaled, test_predictions)
    except:
        silhouette = None
    
    results = {
        'arm': arm_name,
        'classifier': 'SRC',
        'n_atoms_per_class': n_atoms,
        'alpha': alpha,
        'val_accuracy': float(val_accuracy),
        'val_f1_macro': float(val_f1),
        'test_accuracy': float(test_accuracy),
        'test_f1_macro': float(test_f1),
        'silhouette_score': float(silhouette) if silhouette is not None else None,
        'classification_report': classification_report(test_labels, test_predictions, output_dict=True)
    }
    
    print(f"  Valida√ß√£o - Acur√°cia: {val_accuracy:.4f}, F1: {val_f1:.4f}")
    print(f"  Teste - Acur√°cia: {test_accuracy:.4f}, F1: {test_f1:.4f}")
    if silhouette:
        print(f"  Silhouette Score: {silhouette:.4f}")
    
    return results

print("Fun√ß√µes auxiliares criadas!")


## Pipeline de Classifica√ß√£o Completo

Executa classifica√ß√£o para todos os bra√ßos experimentais.


In [None]:
def run_classification_pipeline():
    """
    Executa o pipeline completo de classifica√ß√£o para todos os bra√ßos experimentais.
    """
    all_results = []
    
    for arm in EXPERIMENTAL_ARMS:
        print(f"\n{'#'*60}")
        print(f"Processando: {arm}")
        print(f"{'#'*60}")
        
        # Carregar features
        train_features, train_labels = load_features(arm, "train")
        val_features, val_labels = load_features(arm, "val")
        test_features, test_labels = load_features(arm, "test")
        
        if train_features is None:
            print(f"‚ö†Ô∏è  Pulando {arm} - features n√£o encontradas")
            continue
        
        # Treinar SVM
        try:
            svm_results = train_svm_classifier(
                train_features, train_labels,
                val_features, val_labels,
                test_features, test_labels,
                arm
            )
            all_results.append(svm_results)
            
            # Salvar resultados SVM
            output_path = RESULTS_DIR / f"{arm}_svm_results.json"
            with open(output_path, 'w') as f:
                json.dump(svm_results, f, indent=2)
            print(f"‚úÖ Resultados SVM salvos em: {output_path}")
            
        except Exception as e:
            print(f"‚ùå Erro ao treinar SVM para {arm}: {e}")
        
        # Treinar SRC
        try:
            src_results = train_src_classifier(
                train_features, train_labels,
                val_features, val_labels,
                test_features, test_labels,
                arm,
                n_atoms=50,
                alpha=0.1
            )
            all_results.append(src_results)
            
            # Salvar resultados SRC
            output_path = RESULTS_DIR / f"{arm}_src_results.json"
            with open(output_path, 'w') as f:
                json.dump(src_results, f, indent=2)
            print(f"‚úÖ Resultados SRC salvos em: {output_path}")
            
        except Exception as e:
            print(f"‚ùå Erro ao treinar SRC para {arm}: {e}")
    
    # Criar tabela resumo
    summary_data = []
    for result in all_results:
        summary_data.append({
            'Arm': result['arm'],
            'Classifier': result['classifier'],
            'Val Accuracy': f"{result['val_accuracy']:.4f}",
            'Val F1': f"{result['val_f1_macro']:.4f}",
            'Test Accuracy': f"{result['test_accuracy']:.4f}",
            'Test F1': f"{result['test_f1_macro']:.4f}",
            'Silhouette': f"{result['silhouette_score']:.4f}" if result['silhouette_score'] else "N/A"
        })
    
    summary_df = pd.DataFrame(summary_data)
    summary_path = RESULTS_DIR / "summary_results.csv"
    summary_df.to_csv(summary_path, index=False)
    print(f"\n‚úÖ Tabela resumo salva em: {summary_path}")
    print("\n" + summary_df.to_string())
    
    return all_results, summary_df

# Executar pipeline
# results, summary = run_classification_pipeline()
print("Pipeline de classifica√ß√£o preparado. Descomente a √∫ltima linha para executar.")
