In [3]:

import os
import numpy as np
from PIL import Image
import torch
from transformers import AutoImageProcessor, ViTModel
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, roc_auc_score
from scipy import stats

# --------------------------
# Configurações do dataset
# --------------------------
DIR_MELANOMA = r'/content/melanoma'
DIR_NAEVUS   = r'/content/naevus'
EXT          = ('.jpg', '.png', '.jpeg')  # ajuste se necessário

# --------------------------
# Configurações do modelo
# --------------------------
MODEL_NAME = "google/vit-base-patch16-224-in21k"
BATCH_SIZE = 8  # pode ajustar conforme sua GPU/CPU
SEED       = 42
NFOLDS     = 5  # K-Fold estratificado

torch.manual_seed(SEED)
np.random.seed(SEED)

# --------------------------
# Carregar lista de imagens + rótulos
# --------------------------
def list_images_from_dir(d, exts):
    files = sorted([os.path.join(d, f) for f in os.listdir(d) if f.lower().endswith(exts)])
    return files

paths_melanoma = list_images_from_dir(DIR_MELANOMA, EXT)
paths_naevus   = list_images_from_dir(DIR_NAEVUS, EXT)

X_paths = np.array(paths_melanoma + paths_naevus)
y       = np.array([1]*len(paths_melanoma) + [0]*len(paths_naevus))

print(f"# Imagens: {len(X_paths)} (melanoma={len(paths_melanoma)}, naevus={len(paths_naevus)})")

# --------------------------
# Carregar ViT e processor
# --------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
image_processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
model = ViTModel.from_pretrained(MODEL_NAME).to(device)
model.eval()

# --------------------------
# Extrair last_hidden_state para todas as imagens
# Saída: numpy array shape (N, tokens, hidden_dim)
# --------------------------
def load_image_rgb(path):
    return Image.open(path).convert('RGB')

def extract_last_hidden_states(paths, batch_size=8):
    all_feats = []
    with torch.no_grad():
        for i in range(0, len(paths), batch_size):
            batch_paths = paths[i:i+batch_size]
            imgs = [load_image_rgb(p) for p in batch_paths]
            inputs = image_processor(imgs, return_tensors="pt").to(device)
            outputs = model(**inputs)
            # last_hidden_state: (B, tokens, hidden_dim)
            feats = outputs.last_hidden_state.detach().cpu().numpy()
            all_feats.append(feats)
    return np.concatenate(all_feats, axis=0)

print("Extraindo last_hidden_state...")
LH = extract_last_hidden_states(X_paths, batch_size=BATCH_SIZE)
# LH shape: (N, tokens, hidden_dim) -> tokens=197, hidden_dim=768 para ViT-Base
N, T, D = LH.shape
print(f"last_hidden_state shape = {LH.shape}")

# --------------------------
# Métodos de agregação
# Cada função retorna vetor (D,) por imagem
# Por padrão, excetuando 'cls', agregações usam APENAS patches (tokens[1:])
# --------------------------
def agg_cls(x):  # x: (tokens, D)
    return x[0]  # [CLS] token (posição 0)

def agg_mean(x):
    return x[1:].mean(axis=0)

def agg_median(x):
    return np.median(x[1:], axis=0)

def agg_max(x):
    return x[1:].max(axis=0)

def agg_min(x):
    return x[1:].min(axis=0)

def agg_l2_norm(x):
    # Norma L2 por dimensão (agregando sobre tokens): sqrt(sum_j x_j^2)
    return np.sqrt(np.sum(np.square(x[1:]), axis=0))

def agg_energy(x):
    # Energia por dimensão (sum_j x_j^2)
    return np.sum(np.square(x[1:]), axis=0)

def agg_gem(x, p=3.0):
    # GeM pooling por dimensão: (mean(x^p))^(1/p), tokens sem CLS
    xp = np.power(np.clip(x[1:], a_min=0.0, a_max=None), p)  # clip para evitar negativos com p fracionário
    return np.power(xp.mean(axis=0), 1.0/p)

def agg_logsumexp(x):
    # LogSumExp por dimensão: log(sum(exp(x)))
    # Estável numericamente
    a = x[1:]
    m = np.max(a, axis=0, keepdims=True)
    return (m + np.log(np.sum(np.exp(a - m), axis=0, keepdims=True))).ravel()

AGG_FUNCS = {
    "cls": agg_cls,
    "mean": agg_mean,
    "median": agg_median,
    "max": agg_max,
    "min": agg_min,
    "l2_norm": agg_l2_norm,
    "energy": agg_energy,
    "gem_p3": lambda x: agg_gem(x, p=3.0),
    "logsumexp": agg_logsumexp,
}

# --------------------------
# Pré-computar features agregadas (N, D) para cada método
# --------------------------
def apply_aggregation_all(LH, agg_func):
    return np.stack([agg_func(LH[i]) for i in range(LH.shape[0])], axis=0)

print("Agregando features por método...")
agg_features = {name: apply_aggregation_all(LH, fn) for name, fn in AGG_FUNCS.items()}
for k, v in agg_features.items():
    print(f"  {k}: {v.shape}")

# --------------------------
# K-Fold estratificado + LDA
# Métrica: AUC ROC (positiva=classe 1) + Accuracy
# --------------------------
def evaluate_lda_kfold(X, y, n_splits=5, seed=42):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
    aucs, accs = [], []
    for tr_idx, te_idx in skf.split(X, y):
        X_tr, X_te = X[tr_idx], X[te_idx]
        y_tr, y_te = y[tr_idx], y[te_idx]

        # StandardScaler por fold (fit só no treino)
        scaler = StandardScaler()
        X_tr = scaler.fit_transform(X_tr)
        X_te = scaler.transform(X_te)

        lda = LinearDiscriminantAnalysis(solver='svd', shrinkage=None)
        lda.fit(X_tr, y_tr)
        y_proba = lda.predict_proba(X_te)[:, -1]
        y_pred  = lda.predict(X_te)

        aucs.append(roc_auc_score(y_te, y_proba))
        accs.append(accuracy_score(y_te, y_pred))
    return np.array(aucs), np.array(accs)

print("Avaliando LDA em K-Fold (estratificado)...")
results = {}
for name, X in agg_features.items():
    aucs, accs = evaluate_lda_kfold(X, y, n_splits=NFOLDS, seed=SEED)
    results[name] = {"auc": aucs, "acc": accs}
    print(f"  {name}: AUC média={aucs.mean():.4f} ± {aucs.std():.4f} | ACC média={accs.mean():.4f} ± {accs.std():.4f}")

# --------------------------
# Teste de Friedman (usando AUC por fold)
# --------------------------
# Organiza AUCs por método -> forma: (k métodos, N folds)
methods = list(results.keys())
k = len(methods)
auc_matrix = np.stack([results[m]["auc"] for m in methods], axis=0)  # (k, N)
N = auc_matrix.shape[1]

# Friedman (scipy.stats faz ranking internamente)
friedman_stat, friedman_p = stats.friedmanchisquare(*[auc_matrix[i] for i in range(k)])
print("\n# Teste de Friedman sobre AUCs (folds como blocos)")
print(f"  estatística={friedman_stat:.4f}, p-valor={friedman_p:.4f}")

# --------------------------
# Nemenyi (pós-hoc) com ranks médios
# CD = q_alpha * sqrt(k*(k+1)/(6*N)), q_alpha da distribuição studentized range
# Tenta usar scipy.stats.studentized_range; se não houver, usa tabela aproximada
# --------------------------
def mean_ranks(matrix):
    # matrix: (k, N) valores; rank por coluna (fold): melhor AUC -> rank 1
    ranks = np.zeros_like(matrix)
    for j in range(N):
        col = matrix[:, j]
        # rankdata coloca menor como 1; queremos maior (melhor) como 1 -> rank sobre -col
        r = stats.rankdata(-col, method='average')
        ranks[:, j] = r
    return ranks.mean(axis=1)  # (k,)

def q_alpha_nemenyi(k, alpha=0.05):
    try:
        # studentized_range.isf(alpha, k, np.inf) retorna q_alpha
        from scipy.stats import studentized_range
        return studentized_range.isf(alpha, k, np.inf)
    except Exception:
        # Tabela aproximada (Demsar 2006; ∞ df) para alpha=0.05
        table = {
            2: 1.960,  # não usual
            3: 2.343,
            4: 2.569,
            5: 2.728,
            6: 2.850,
            7: 2.948,
            8: 3.031,
            9: 3.102,
            10: 3.164,
        }
        return table.get(k, 3.164)  # fallback conservador
    # Nota: valores podem variar ligeiramente conforme tabela/implementação.

avg_ranks = mean_ranks(auc_matrix)  # (k,)
order = np.argsort(avg_ranks)       # menor rank (melhor) primeiro
sorted_methods = [methods[i] for i in order]
sorted_ranks   = avg_ranks[order]

q_alpha = q_alpha_nemenyi(k, alpha=0.05)
CD = q_alpha * np.sqrt(k*(k+1)/(6.0*N))

print("\n# Nemenyi pós-hoc (baseado em ranks médios de AUC)")
print(f"  k={k}, N(folds)={N}, q_alpha≈{q_alpha:.4f}, CD={CD:.4f}")
print("  Métodos ordenados por rank médio (menor=melhor):")
for m, r in zip(sorted_methods, sorted_ranks):
    print(f"    {m}: rank médio={r:.3f}")

# Pares significativamente diferentes se |rank_i - rank_j| > CD
print("\n  Pares com diferença de rank > CD (significativos a ~5%):")
sig_pairs = []
for i in range(k):
    for j in range(i+1, k):
        diff = abs(avg_ranks[i] - avg_ranks[j])
        if diff > CD:
            sig_pairs.append((methods[i], methods[j], diff))
            print(f"    {methods[i]} vs {methods[j]}: Δrank={diff:.3f} > CD")

if not sig_pairs:
    print("    Nenhum par significativo a ~5% pelo Nemenyi.")


# --------------------------
# Observações:
# - As agregações (exceto 'cls') consideram somente patches (tokens[1:]).
# - LDA é treinado por fold com StandardScaler aplicado (fit no treino, transform no teste).
# - Métrica principal usada para estatística é AUC; você pode trocar para ACC se preferir.
# - Caso tenha muitos dados, considere salvar/recuperar LH pré-computado para evitar reextração.
# --------------------------




# Imagens: 170 (melanoma=70, naevus=100)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Extraindo last_hidden_state...
last_hidden_state shape = (170, 197, 768)
Agregando features por método...
  cls: (170, 768)
  mean: (170, 768)
  median: (170, 768)
  max: (170, 768)
  min: (170, 768)
  l2_norm: (170, 768)
  energy: (170, 768)
  gem_p3: (170, 768)
  logsumexp: (170, 768)
Avaliando LDA em K-Fold (estratificado)...
  cls: AUC média=0.8743 ± 0.0339 | ACC média=0.7824 ± 0.0513
  mean: AUC média=0.9036 ± 0.0433 | ACC média=0.8000 ± 0.0655
  median: AUC média=0.8943 ± 0.0410 | ACC média=0.7882 ± 0.0655
  max: AUC média=0.8657 ± 0.0952 | ACC média=0.7765 ± 0.0824
  min: AUC média=0.8879 ± 0.0307 | ACC média=0.8176 ± 0.0432
  l2_norm: AUC média=0.8364 ± 0.0831 | ACC média=0.7647 ± 0.0789
  energy: AUC média=0.8379 ± 0.0812 | ACC média=0.7588 ± 0.0600
  gem_p3: AUC média=0.8671 ± 0.0602 | ACC média=0.7412 ± 0.0706
  logsumexp: AUC média=0.9050 ± 0.0398 | ACC média=0.7882 ± 0.0570

# Teste de Friedman sobre AUCs (folds como blocos)
  estatística=16.4958, p-valor=0.0358

# Nemenyi

In [6]:

import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
from transformers import AutoImageProcessor, ViTModel

from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, roc_auc_score
from scipy import stats
import scipy.io as sio

# --------------------------
# Configurações gerais
# --------------------------
DIR_MELANOMA = r'/content/melanoma'
DIR_NAEVUS   = r'/content/naevus'
EXT          = ('.jpg', '.png', '.jpeg')  # ajuste se necessário

MODEL_NAME   = "google/vit-base-patch16-224-in21k"
BATCH_SIZE   = 8
SEED         = 42

NFOLDS       = 5   # número de folds por repetição
NREPEATS     = 1   # número de repetições (use 1 para ter exatamente 5 linhas por fold)
OUT_DIR      = "outputs_vit_lda"
AGG_DIR      = os.path.join(OUT_DIR, "aggregations")
METRICS_DIR  = os.path.join(OUT_DIR, "metrics")
os.makedirs(AGG_DIR, exist_ok=True)
os.makedirs(METRICS_DIR, exist_ok=True)

torch.manual_seed(SEED)
np.random.seed(SEED)

# --------------------------
# Utilidades: listar imagens e rótulos
# --------------------------
def list_images_from_dir(d, exts):
    files = sorted([os.path.join(d, f) for f in os.listdir(d) if f.lower().endswith(exts)])
    return files

paths_melanoma = list_images_from_dir(DIR_MELANOMA, EXT)
paths_naevus   = list_images_from_dir(DIR_NAEVUS, EXT)

X_paths = np.array(paths_melanoma + paths_naevus)
y       = np.array([1]*len(paths_melanoma) + [0]*len(paths_naevus))

print(f"# Imagens: {len(X_paths)} (melanoma={len(paths_melanoma)}, naevus={len(paths_naevus)})")

# --------------------------
# Carregar ViT e processor
# --------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
image_processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
model = ViTModel.from_pretrained(MODEL_NAME).to(device)
model.eval()

# --------------------------
# Extração de last_hidden_state
# --------------------------
def load_image_rgb(path):
    return Image.open(path).convert('RGB')

def extract_last_hidden_states(paths, batch_size=8):
    all_feats = []
    with torch.no_grad():
        for i in range(0, len(paths), batch_size):
            batch_paths = paths[i:i+batch_size]
            imgs = [load_image_rgb(p) for p in batch_paths]
            inputs = image_processor(imgs, return_tensors="pt").to(device)
            outputs = model(**inputs)
            feats = outputs.last_hidden_state.detach().cpu().numpy()  # (B, tokens, hidden)
            all_feats.append(feats)
    return np.concatenate(all_feats, axis=0)

print("Extraindo last_hidden_state...")
LH = extract_last_hidden_states(X_paths, batch_size=BATCH_SIZE)
N, T, D = LH.shape  # esperado: (N, 197, 768) para ViT-Base
print(f"last_hidden_state shape = {LH.shape}")

# --------------------------
# Métodos de agregação
# --------------------------
def agg_cls(x):      return x[0]          # token [CLS]
def agg_mean(x):     return x[1:].mean(axis=0)
def agg_median(x):   return np.median(x[1:], axis=0)
def agg_max(x):      return x[1:].max(axis=0)
def agg_min(x):      return x[1:].min(axis=0)
def agg_l2_norm(x):  return np.sqrt(np.sum(np.square(x[1:]), axis=0))
def agg_energy(x):   return np.sum(np.square(x[1:]), axis=0)
def agg_gem(x, p=3.0):
    # GeM pooling: (mean(clip(x,0)^p))^(1/p)
    xp = np.power(np.clip(x[1:], a_min=0.0, a_max=None), p)
    return np.power(xp.mean(axis=0), 1.0/p)
def agg_logsumexp(x):
    a = x[1:]
    m = np.max(a, axis=0, keepdims=True)
    return (m + np.log(np.sum(np.exp(a - m), axis=0, keepdims=True))).ravel()

AGG_FUNCS = {
    "cls": agg_cls,
    "mean": agg_mean,
    "median": agg_median,
    "max": agg_max,
    "min": agg_min,
    "l2_norm": agg_l2_norm,
    "energy": agg_energy,
    "gem_p3": lambda x: agg_gem(x, p=3.0),
    "logsumexp": agg_logsumexp,
}

def apply_aggregation_all(LH, agg_func):
    return np.stack([agg_func(LH[i]) for i in range(LH.shape[0])], axis=0)

print("Agregando features por método...")
agg_features = {name: apply_aggregation_all(LH, fn) for name, fn in AGG_FUNCS.items()}
for k, v in agg_features.items():
    print(f"  {k}: {v.shape}")

# Salvar features agregadas por método em .mat e .csv (uma linha por amostra)
print("Salvando features agregadas...")
for name, X in agg_features.items():
    # .mat
    sio.savemat(os.path.join(AGG_DIR, f"{name}_features.mat"), {"X": X, "y": y})
    # .csv (atenção: pode ser grande)
    df = pd.DataFrame(X)
    df.insert(0, "label", y)
    df.to_csv(os.path.join(AGG_DIR, f"{name}_features.csv"), index=False)

# --------------------------
# Gerar splits Repeated Stratified K-Fold (fixos para todos os métodos)
# --------------------------
def get_repeated_stratified_splits(y, n_splits=5, n_repeats=1, seed=42):
    rskf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=seed)
    splits = list(rskf.split(np.zeros_like(y), y))  # usa y para estratificar
    return splits

splits = get_repeated_stratified_splits(y, NFOLDS, NREPEATS, SEED)
TOTAL_SPLITS = len(splits)
print(f"Total de splits (folds x repetições): {TOTAL_SPLITS} ({NFOLDS} x {NREPEATS})")

# --------------------------
# Avaliação LDA por método usando os splits fixos
# --------------------------
def evaluate_lda_on_splits(X, y, splits, n_folds, n_repeats):
    aucs, accs, rows = [], [], []
    for s_idx, (tr_idx, te_idx) in enumerate(splits):
        repeat = s_idx // n_folds
        fold   = s_idx % n_folds

        X_tr, X_te = X[tr_idx], X[te_idx]
        y_tr, y_te = y[tr_idx], y[te_idx]

        scaler = StandardScaler()
        X_tr = scaler.fit_transform(X_tr)
        X_te = scaler.transform(X_te)

        lda = LinearDiscriminantAnalysis(solver='svd', shrinkage=None)
        lda.fit(X_tr, y_tr)

        y_proba = lda.predict_proba(X_te)[:, -1]
        y_pred  = lda.predict(X_te)

        auc = roc_auc_score(y_te, y_proba)
        acc = accuracy_score(y_te, y_pred)

        aucs.append(auc)
        accs.append(acc)
        rows.append({
            "split_idx": s_idx,
            "repeat": repeat,
            "fold": fold,
            "auc": auc,
            "acc": acc
        })
    return np.array(aucs), np.array(accs), rows

print("Avaliando LDA (Repeated Stratified K-Fold)…")
results = {}
metrics_rows_all = []
for name, X in agg_features.items():
    aucs, accs, rows = evaluate_lda_on_splits(X, y, splits, NFOLDS, NREPEATS)
    results[name] = {"auc": aucs, "acc": accs}
    for r in rows:
        r["method"] = name
        metrics_rows_all.append(r)
    print(f"  {name}: AUC média={aucs.mean():.4f} ± {aucs.std():.4f} | ACC média={accs.mean():.4f} ± {accs.std():.4f}")

df_metrics = pd.DataFrame(metrics_rows_all)
df_metrics.to_csv(os.path.join(METRICS_DIR, "metrics_by_split.csv"), index=False)

# --------------------------
# Tabelas “largas”: por split e por fold (5 linhas se NREPEATS=1)
# --------------------------
# Por split (cada linha = split; útil quando NREPEATS > 1)
df_auc_split = df_metrics.pivot(index="split_idx", columns="method", values="auc").sort_index()
df_acc_split = df_metrics.pivot(index="split_idx", columns="method", values="acc").sort_index()
df_split_wide = pd.concat({"auc": df_auc_split, "acc": df_acc_split}, axis=1)
df_split_wide.to_csv(os.path.join(METRICS_DIR, "table_by_split.csv"))

# Por fold (agregando por média ao longo das repetições → 5 linhas)
df_auc_fold = df_metrics.pivot_table(index="fold", columns="method", values="auc", aggfunc="mean").sort_index()
df_acc_fold = df_metrics.pivot_table(index="fold", columns="method", values="acc", aggfunc="mean").sort_index()
df_rank_fold = df_auc_fold.rank(axis=1, ascending=False, method="average")  # rank 1 = melhor (maior AUC)
best_method_per_fold = df_auc_fold.idxmax(axis=1)
best_auc_per_fold = df_auc_fold.max(axis=1)
df_fold_wide = pd.concat({"auc": df_auc_fold, "acc": df_acc_fold, "rank": df_rank_fold}, axis=1)
df_fold_wide["best_method_auc"] = best_method_per_fold
df_fold_wide["best_auc"] = best_auc_per_fold
df_fold_wide.to_csv(os.path.join(METRICS_DIR, "table_by_fold.csv"))

# --------------------------
# Friedman (global) sobre AUCs (splits como blocos)
# --------------------------
methods = list(AGG_FUNCS.keys())
k = len(methods)

# Reconstrói a matriz AUC por método na ordem desejada
auc_matrix = np.stack([df_auc_split[c] for c in methods], axis=0)  # (k, N_blocks)
N_blocks = auc_matrix.shape[1]

friedman_stat, friedman_p = stats.friedmanchisquare(*[auc_matrix[i] for i in range(k)])
print("\n# Friedman (global, AUC):")
print(f"  estatística={friedman_stat:.4f}, p-valor={friedman_p:.6f}")

# --------------------------
# Bonferroni-Dunn (controle = 'mean') sobre ranks médios (globais)
# --------------------------
def mean_ranks(matrix):
    # Rank por coluna: maior AUC → rank 1
    ranks = np.zeros_like(matrix)
    for j in range(matrix.shape[1]):
        col = matrix[:, j]
        r = stats.rankdata(-col, method='average')
        ranks[:, j] = r
    return ranks.mean(axis=1), ranks  # (k,), (k, N_blocks)

avg_ranks, ranks_all = mean_ranks(auc_matrix)
control = "mean"
control_idx = methods.index(control)

# Erro padrão dos ranks (Demsar 2006)
SE = np.sqrt(k*(k+1)/(6.0*N_blocks))

# z crítico com Bonferroni (duas caudas) para k-1 comparações
alpha = 0.05
z_crit = stats.norm.ppf(1 - alpha / (2*(k-1)))
CD = z_crit * SE

# Tabela Bonferroni-Dunn
bd_rows = []
for i, m in enumerate(methods):
    if i == control_idx:
        continue
    diff = abs(avg_ranks[i] - avg_ranks[control_idx])
    z = diff / SE
    significant = z > z_crit
    bd_rows.append({
        "method": m,
        "avg_rank": avg_ranks[i],
        "Δrank_vs_mean": diff,
        "z": z,
        "z_crit": z_crit,
        "CD": CD,
        "significant_(p≈0.05, Bonferroni-Dunn)": bool(significant)
    })

df_bd = pd.DataFrame(bd_rows).sort_values("Δrank_vs_mean", ascending=False).reset_index(drop=True)
df_ranks = pd.DataFrame({"method": methods, "avg_rank": avg_ranks}).sort_values("avg_rank").reset_index(drop=True)

# Salvar resumos globais
df_ranks.to_csv(os.path.join(METRICS_DIR, "friedman_mean_ranks.csv"), index=False)
df_bd.to_csv(os.path.join(METRICS_DIR, "bonferroni_dunn_vs_mean.csv"), index=False)

print("\n# Bonferroni-Dunn (controle='mean') salvo em CSV.")
print(df_bd)

# --------------------------
# Também salva métricas em .mat (matrizes AUC/ACC por método)
# --------------------------
acc_matrix = np.stack([df_acc_split[c] for c in methods], axis=0)
sio.savemat(os.path.join(METRICS_DIR, "metrics.mat"), {
    "methods": np.array(methods, dtype=object),
    "auc_matrix": auc_matrix,
    "acc_matrix": acc_matrix,
    "friedman_stat": friedman_stat,
    "friedman_p": friedman_p,
    "avg_ranks": avg_ranks,
    "control": control,
    "CD": CD,
    "z_crit": z_crit,
})


# Imagens: 170 (melanoma=70, naevus=100)


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


Extraindo last_hidden_state...
last_hidden_state shape = (170, 197, 768)
Agregando features por método...
  cls: (170, 768)
  mean: (170, 768)
  median: (170, 768)
  max: (170, 768)
  min: (170, 768)
  l2_norm: (170, 768)
  energy: (170, 768)
  gem_p3: (170, 768)
  logsumexp: (170, 768)
Salvando features agregadas...
Total de splits (folds x repetições): 5 (5 x 1)
Avaliando LDA (Repeated Stratified K-Fold)…
  cls: AUC média=0.8743 ± 0.0339 | ACC média=0.7824 ± 0.0513
  mean: AUC média=0.9036 ± 0.0433 | ACC média=0.8000 ± 0.0655
  median: AUC média=0.8943 ± 0.0410 | ACC média=0.7882 ± 0.0655
  max: AUC média=0.8657 ± 0.0952 | ACC média=0.7765 ± 0.0824
  min: AUC média=0.8879 ± 0.0307 | ACC média=0.8176 ± 0.0432
  l2_norm: AUC média=0.8364 ± 0.0831 | ACC média=0.7647 ± 0.0789
  energy: AUC média=0.8379 ± 0.0812 | ACC média=0.7588 ± 0.0600
  gem_p3: AUC média=0.8671 ± 0.0602 | ACC média=0.7412 ± 0.0706
  logsumexp: AUC média=0.9050 ± 0.0398 | ACC média=0.7882 ± 0.0570

# Friedman (global,