# Feature Extraction TensorFlow

In [None]:
# feature_extraction.py

import os
import sys
import time
import numpy as np
import pandas as pd
from PIL import Image

import tensorflow as tf
from tensorflow.keras import layers, models

from transformers import TFAutoModel, AutoFeatureExtractor

# Mapeamento de categorias conforme o paper
MAP_CATEGORIES = {
    'A1': 0, 'L1': 1, 'P1': 2, 'G1': 3,   # Antrum
    'A2': 4, 'L2': 5, 'P2': 6, 'G2': 7,
    'A3': 8, 'L3': 9, 'P3': 10, 'G3': 11,
    'A4': 12, 'L4': 13, 'P4': 14, 'G4': 15,
    'A5': 16, 'L5': 17, 'P5': 18,
    'A6': 19, 'L6': 20, 'P6': 21,
    'OTHERCLASS': 22
}

def load_and_process_csv(official_split, label_column):
    """
    Carrega o CSV com a divisão oficial, substitui os rótulos usando MAP_CATEGORIES
    e remove entradas sem rótulo.
    """
    if not os.path.exists(official_split):
        print("Arquivo de split oficial não encontrado.")
        sys.exit(1)
    df = pd.read_csv(official_split, index_col=0)
    df[label_column] = df[label_column].replace(MAP_CATEGORIES).astype('Int64')
    df.dropna(subset=[label_column], inplace=True)
    df.reset_index(inplace=True, drop=True)
    return df

def preprocess_image(image_path, input_size):
    """
    Lê uma imagem do caminho informado, converte para RGB, redimensiona para (input_size, input_size)
    usando interpolação Lanczos e retorna a imagem.
    """
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [input_size, input_size], method=tf.image.ResizeMethod.LANCZOS3)
    return image

def build_dataset(df, data_dir, input_size, label_column, batch_size):
    """
    Cria um tf.data.Dataset a partir do DataFrame.
    Assume que há uma coluna 'filename' com os nomes dos arquivos e a coluna de rótulo.
    """
    filepaths = df['filename'].apply(lambda x: os.path.join(data_dir, x)).values
    labels = df[label_column].values.astype(np.int32)

    ds = tf.data.Dataset.from_tensor_slices((filepaths, labels))

    def _load_and_preprocess(path, label):
        image = preprocess_image(path, input_size)
        return image, label

    ds = ds.map(_load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

def build_feature_extractor(model_name, input_size):
    """
    Constrói um modelo que extrai features fixas usando o ViT Base do HuggingFace.
    Os pesos do ViT são congelados. O modelo retorna o vetor correspondente ao token [CLS].
    """
    feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
    vit_model = TFAutoModel.from_pretrained(model_name)
    vit_model.trainable = False  # Congela o modelo

    def vit_preprocessing(x):
        # Converte os pixels para float32 e normaliza para [0,1]
        x = tf.cast(x, tf.float32) / 255.0
        # Aplica a normalização com base nos parâmetros do feature extractor
        mean = tf.constant(feature_extractor.image_mean, shape=[1, 1, 1, 3], dtype=tf.float32)
        std = tf.constant(feature_extractor.image_std, shape=[1, 1, 1, 3], dtype=tf.float32)
        return (x - mean) / std

    inputs = layers.Input(shape=(input_size, input_size, 3), dtype=tf.float32)
    x = vit_preprocessing(inputs)

    # O ViT espera um argumento 'pixel_values'
    def call_vit(x):
        outputs = vit_model(pixel_values=x)
        return outputs.last_hidden_state[:, 0]  # vetor do token [CLS]

    features = layers.Lambda(call_vit, name="vit_features")(x)
    feature_extractor_model = models.Model(inputs=inputs, outputs=features)
    return feature_extractor_model

def extract_features(model, dataset):
    """
    Passa o dataset pelo modelo de extração e retorna dois arrays NumPy:
    - features: vetor de features para cada imagem.
    - labels: vetor de rótulos correspondentes.
    """
    features_list = []
    labels_list = []
    for batch_images, batch_labels in dataset:
        batch_features = model.predict(batch_images)
        features_list.append(batch_features)
        labels_list.append(batch_labels.numpy())
    features_np = np.concatenate(features_list, axis=0)
    labels_np = np.concatenate(labels_list, axis=0)
    return features_np, labels_np

def main():
    # Parâmetros de configuração
    MODEL_NAME = "google/vit-base-patch16-224"  # ViT Base do HuggingFace
    INPUT_SIZE = 224
    BATCH_SIZE = 30

    # Caminhos – ajuste conforme necessário (verifique se os caminhos estão corretos no Colab)
    DATA_DIR = os.path.join("..", "..", "data", "Labeled Images")
    OFFICIAL_SPLIT = os.path.join("..", "..", "data", "official_splits", "image_classification.csv")
    OUTPUT_DIR = os.path.join("..", "output", "vit_features")
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    LABEL_COLUMN = "Complete agreement"

    # Carrega e processa o CSV
    data_csv = load_and_process_csv(OFFICIAL_SPLIT, LABEL_COLUMN)

    # Segue a divisão oficial (por exemplo, assumindo que 'set_type' contém 'Train' e 'Validation')
    train_df = data_csv[data_csv['set_type'] == 'Train']
    test_df = data_csv[data_csv['set_type'] == 'Validation']  # ou 'Test', conforme disponível

    print("Total de imagens:", len(data_csv))
    print("Imagens de treino:", len(train_df))
    print("Imagens de teste:", len(test_df))

    # Cria os datasets do TensorFlow
    train_ds = build_dataset(train_df, DATA_DIR, INPUT_SIZE, LABEL_COLUMN, BATCH_SIZE)
    test_ds = build_dataset(test_df, DATA_DIR, INPUT_SIZE, LABEL_COLUMN, BATCH_SIZE)

    # Constrói o modelo de extração de features (ViT congelado)
    feature_extractor_model = build_feature_extractor(MODEL_NAME, INPUT_SIZE)
    feature_extractor_model.summary()

    # Extrai as features para os conjuntos de treino e teste
    train_features, train_labels = extract_features(feature_extractor_model, train_ds)
    test_features, test_labels = extract_features(feature_extractor_model, test_ds)

    print("Shape das features de treino:", train_features.shape)
    print("Shape dos rótulos de treino:", train_labels.shape)
    print("Shape das features de teste:", test_features.shape)
    print("Shape dos rótulos de teste:", test_labels.shape)

    # Salva os arrays NumPy
    np.save(os.path.join(OUTPUT_DIR, "train_features.npy"), train_features)
    np.save(os.path.join(OUTPUT_DIR, "train_labels.npy"), train_labels)
    np.save(os.path.join(OUTPUT_DIR, "test_features.npy"), test_features)
    np.save(os.path.join(OUTPUT_DIR, "test_labels.npy"), test_labels)

    print("Arquivos NumPy salvos em:", OUTPUT_DIR)

if __name__ == '__main__':
    main()


# EndoscopyDataset

In [None]:
# classification_pipeline.py

import os
import sys
import time
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from PIL import Image

import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score, precision_score, recall_score

# Mapeamento de categorias conforme o paper
MAP_CATEGORIES = {
    'A1': 0, 'L1': 1, 'P1': 2, 'G1': 3,    # Antrum
    'A2': 4, 'L2': 5, 'P2': 6, 'G2': 7,
    'A3': 8, 'L3': 9, 'P3': 10, 'G3': 11,
    'A4': 12, 'L4': 13, 'P4': 14, 'G4': 15,
    'A5': 16, 'L5': 17, 'P5': 18,
    'A6': 19, 'L6': 20, 'P6': 21,
    'OTHERCLASS': 22
}

# ------------------------------------------------------------------------------
# Dataset Module
# ------------------------------------------------------------------------------
class EndoscopyDataset(Dataset):
    def __init__(self, df, data_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.data_dir = data_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # Assumindo que o CSV possui uma coluna 'filename' e 'num_patient' (para stratificação)
        img_path = os.path.join(self.data_dir, self.df.loc[idx, 'filename'])
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = self.df.loc[idx, 'Complete agreement']
        return image, label

# ------------------------------------------------------------------------------
# Lightning Module for Classification
# ------------------------------------------------------------------------------
class ClassificationModule(pl.LightningModule):
    def __init__(self, model, nb_classes, class_weights, lr):
        """
        model: modelo base pré-treinado com uma cabeça de classificação (por exemplo, ViT com head)
        nb_classes: número de classes
        class_weights: tensor de pesos para cada classe (para a loss)
        lr: learning rate inicial
        """
        super().__init__()
        self.model = model
        self.nb_classes = nb_classes
        self.class_weights = class_weights
        self.lr = lr
        self.criterion = nn.CrossEntropyLoss(weight=self.class_weights)

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        images, labels = batch
        logits = self(images)
        loss = self.criterion(logits, labels)
        self.log("train_loss", loss, on_epoch=True, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        images, labels = batch
        logits = self(images)
        loss = self.criterion(logits, labels)
        preds = torch.argmax(logits, dim=1)
        return {"val_loss": loss, "preds": preds, "labels": labels}

    def validation_epoch_end(self, outputs):
        preds = torch.cat([x["preds"] for x in outputs])
        labels = torch.cat([x["labels"] for x in outputs])
        preds_np = preds.cpu().numpy()
        labels_np = labels.cpu().numpy()
        f1 = f1_score(labels_np, preds_np, average='macro')
        precision = precision_score(labels_np, preds_np, average='macro')
        recall = recall_score(labels_np, preds_np, average='macro')
        self.log("val_f1_macro", f1, prog_bar=True)
        self.log("val_precision", precision)
        self.log("val_recall", recall)
        # Também pode registrar a acurácia, etc.

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.lr)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.3)
        return [optimizer], [scheduler]

    def freeze_base(self):
        # Congela todos os parâmetros, exceto a cabeça de classificação
        for name, param in self.model.named_parameters():
            param.requires_grad = False
        # Supondo que a camada classificadora esteja em 'head'
        if hasattr(self.model, 'head'):
            for param in self.model.head.parameters():
                param.requires_grad = True
        else:
            # Se a cabeça estiver sob outro nome, adapte aqui
            raise AttributeError("Modelo não possui atributo 'head' para treinar.")

    def unfreeze_last_layers(self, n_layers):
        # Método específico para ViT: descongela os últimos n_layers do transformer
        # A implementação depende da arquitetura; a seguir um exemplo genérico:
        # Vamos supor que o modelo tenha um atributo 'blocks' que é uma lista de camadas.
        if hasattr(self.model, 'blocks'):
            for block in self.model.blocks[-n_layers:]:
                for param in block.parameters():
                    param.requires_grad = True
        else:
            raise AttributeError("Modelo não possui atributo 'blocks' para unfreeze.")

# ------------------------------------------------------------------------------
# Main Execution
# ------------------------------------------------------------------------------
def main():
    # Parâmetros gerais
    DATA_DIR = os.path.join("..", "..", "data", "Labeled Images")
    OFFICIAL_SPLIT = os.path.join("..", "..", "data", "official_splits", "image_classification.csv")
    OUTPUT_DIR = os.path.join("..", "output", "Complete agreement_40", "vit_base_patch16_224")
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    NB_CLASSES = 23
    LR_WARMUP = 0.001
    LR_FINETUNE = 0.0007
    NUM_EPOCHS_WARMUP = 10
    NUM_EPOCHS_FINETUNE = 100
    EARLY_STOPPING = 10
    UNFROZEN_LAYERS = 4  # Exemplo: descongela os últimos 4 blocos do ViT

    # Transformação das imagens conforme paper (redimensionamento para 224 e normalização específica)
    transform = transforms.Compose([
        transforms.Resize((224, 224), interpolation=Image.LANCZOS),
        transforms.ToTensor(),
        # Normalização: utilizar médias e desvios do ImageNet se aplicável ou os do paper
        transforms.Normalize([0.5990, 0.3664, 0.2769], [0.2847, 0.2190, 0.1772])
    ])

    # Carrega e processa o CSV
    if not os.path.exists(OFFICIAL_SPLIT):
        print("CSV oficial não encontrado.")
        sys.exit(1)
    data_csv = pd.read_csv(OFFICIAL_SPLIT, index_col=0)
    data_csv['Complete agreement'] = data_csv['Complete agreement'].replace(MAP_CATEGORIES).astype('Int64')
    data_csv.dropna(subset=['Complete agreement'], inplace=True)
    data_csv.reset_index(inplace=True, drop=True)

    # Separa os dados por set_type (Train, Validation, Test)
    train_df = data_csv[data_csv['set_type'] == 'Train']
    valid_df = data_csv[data_csv['set_type'] == 'Validation']
    test_df = data_csv[data_csv['set_type'] == 'Test'] if 'Test' in data_csv.columns else None

    print("Total de imagens:", len(data_csv))
    print("Imagens de treino:", len(train_df))
    print("Imagens de validação:", len(valid_df))
    if test_df is not None:
        print("Imagens de teste:", len(test_df))

    train_dataset = EndoscopyDataset(train_df, DATA_DIR, transform=transform)
    valid_dataset = EndoscopyDataset(valid_df, DATA_DIR, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=30, shuffle=True, num_workers=4, pin_memory=True)
    valid_loader = DataLoader(valid_dataset, batch_size=30, shuffle=False, num_workers=4, pin_memory=True)

    # Calcula os pesos das classes
    class_labels = train_df['Complete agreement'].values.astype(np.int32)
    classes = np.unique(class_labels)
    weights = compute_class_weight(class_weight='balanced', classes=classes, y=class_labels)
    class_weights_tensor = torch.Tensor(weights).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    print("Pesos das classes:", class_weights_tensor)

    # Inicialize o modelo ViT (usando, por exemplo, timm ou uma implementação customizada)
    # Aqui, assume-se que você tem uma função initialize_model que retorna (model, CNN_family)
    # Exemplo:
    from finetuning_models import frozen_vit  # função para manipular ViT
    from initialize_models import initialize_model

    model_ft, CNN_family = initialize_model("vit_base_patch16_224", NB_CLASSES, pretrained=True, input_size=(224, 224))

    # Fase 1: Warm-up – treine apenas a cabeça
    classification_module = ClassificationModule(model=model_ft, nb_classes=NB_CLASSES,
                                                 class_weights=class_weights_tensor,
                                                 lr=LR_WARMUP)
    classification_module.freeze_base()  # congela a base, treina somente a cabeça

    trainer_warmup = pl.Trainer(
        max_epochs=NUM_EPOCHS_WARMUP,
        devices=1 if torch.cuda.is_available() else 0,
        accelerator="gpu" if torch.cuda.is_available() else None,
        callbacks=[EarlyStopping(monitor="val_f1_macro", patience=EARLY_STOPPING, mode="max")]
    )
    trainer_warmup.fit(classification_module, train_loader, valid_loader)
    trained_warmup_model = classification_module.model

    # Fase 2: Fine-tuning – descongela os últimos blocos e treina novamente
    # Atualize o módulo para usar LR de fine-tuning
    classification_module.lr = LR_FINETUNE
    classification_module.unfreeze_last_layers(UNFROZEN_LAYERS)

    # Callback para salvar o melhor modelo (baseado em val_f1_macro)
    checkpoint_callback = ModelCheckpoint(
        monitor="val_f1_macro",
        dirpath=OUTPUT_DIR,
        filename="best_model-val_f1_macro",
        save_top_k=1,
        mode="max"
    )
    trainer_finetune = pl.Trainer(
        max_epochs=NUM_EPOCHS_FINETUNE,
        devices=1 if torch.cuda.is_available() else 0,
        accelerator="gpu" if torch.cuda.is_available() else None,
        callbacks=[EarlyStopping(monitor="val_f1_macro", patience=EARLY_STOPPING, mode="max"), checkpoint_callback]
    )
    trainer_finetune.fit(classification_module, train_loader, valid_loader)

    # Após o treinamento, você pode usar o modelo final para realizar inferências e calcular métricas finais
    print("Treinamento finalizado. Melhor modelo salvo em:", checkpoint_callback.best_model_path)

if __name__ == '__main__':
    main()


# Endoscopy 2.0

### Ativar cuda pra GPU se estiver ativo

In [None]:
from tensorflow.keras.mixed_precision import set_global_policy
# Verifique se a GPU está ativa
import tensorflow as tf
is_cuda = len(tf.config.list_physical_devices('GPU')) > 0

# Habilitar mixed precision
# Verificar GPU e configurar precisão mista
if len(tf.config.list_physical_devices('GPU')) > 0:
    policy = tf.keras.mixed_precision.Policy('mixed_float16')
else:
    policy = tf.keras.mixed_precision.Policy('float32')
set_global_policy(policy)


if is_cuda:
  print("GPU está ativa")
else:
  print("GPU não está ativa")

GPU está ativa


### Baixar arquivos

In [None]:
from zipfile import ZipFile
import os,shutil



def extract_tar_gz_contents(input_dir: str, output_dir: str):
    """Extrai o conteúdo dos arquivos .tar.gz de um diretório para outro."""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
#os.path.exists("bruto") and shutil.rmtree("./bruto")

#vit_features
extract_tar_gz_contents('./vit_features', './vit_features')



In [None]:

# loading the temp.zip and creating a zip object
with ZipFile("/content/drive/My Drive/Mestrado 2024/Projetos/Datasets/Labeled Images.zip", 'r') as zObject:

    # Extracting all the members of the zip
    # into a specific location.
    zObject.extractall(path="./bruto")

In [None]:
import os
import shutil

#store the path to your root directory
base='./bruto'

# traverse root directory, and list directories as dirs and files as files
for root, dirs, files in os.walk(base):
    path = root.split(os.sep)

    for file in files:
        if not os.path.isdir(file):

            # move file from nested folder into the base folder
            shutil.move(os.path.join(root,file),os.path.join(base,file))

In [None]:
from typing import Union
from pathlib import Path



def clear_directory_folders(directory_path: Union[str, Path]) -> list:
    """Irreversibly removes all folders (and their content) in the specified
    directory. Doesn't remove files of that specified directory. Returns a
    list with folder paths Python lacks permission to delete."""
    erroneous_paths = []
    for path_location in Path(directory_path).iterdir():
        if path_location.is_dir():
            try:
                shutil.rmtree(path_location)
            except PermissionError:
                erroneous_paths.append(path_location)
    return erroneous_paths

clear_directory_folders(r'./bruto')

[]

# Funções

In [None]:
# feature_extraction_generic.py

import os
import sys
import numpy as np
import pandas as pd
import tensorflow as tf
from PIL import Image
from tensorflow.keras import layers, models
from transformers import TFAutoModel, AutoFeatureExtractor

# Mapeamento de classes
MAP_CATEGORIES = {
    'A1': 0, 'L1': 1, 'P1': 2, 'G1': 3,
    'A2': 4, 'L2': 5, 'P2': 6, 'G2': 7,
    'A3': 8, 'L3': 9, 'P3': 10, 'G3': 11,
    'A4': 12, 'L4': 13, 'P4': 14, 'G4': 15,
    'A5': 16, 'L5': 17, 'P5': 18,
    'A6': 19, 'L6': 20, 'P6': 21,
    'OTHERCLASS': 22
}

def load_and_process_csv(official_split, label_column):
    """Carrega e processa o CSV com os rótulos mapeados."""
    if not os.path.exists(official_split):
        print("Arquivo de split oficial não encontrado.")
        sys.exit(1)

    df = pd.read_csv(official_split, index_col=0)
    df[label_column] = df[label_column].replace(MAP_CATEGORIES).astype('Int64')
    df.dropna(subset=[label_column], inplace=True)
    return df.reset_index(drop=True)

def preprocess_image(image_path, input_size, resample_method):
    """Pré-processamento da imagem com redimensionamento dinâmico."""
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3) # Ensure image is divisible by patch size (16 for DeiT-base-distilled-patch16-224)

    return tf.image.resize(
        image, [input_size, input_size],
        method=resample_method
    )

def build_dataset(df, data_dir, input_size, label_column, batch_size, resample_method):
    """Constrói dataset com parâmetros específicos do modelo."""
    filepaths = df['filename'].apply(lambda x: os.path.join(data_dir, x)).values
    labels = df[label_column].values.astype(np.int32)

    ds = tf.data.Dataset.from_tensor_slices((filepaths, labels))

    def _load_preprocess(path, label):
        image = preprocess_image(path, input_size, resample_method)
        return image, label

    return (ds
           .map(_load_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
           .batch(batch_size)
           .prefetch(tf.data.AUTOTUNE))





class ViTFeatureExtractor(layers.Layer):
    def __init__(self, feature_extractor, vit_model, **kwargs):
        super().__init__(**kwargs)
        self.feature_extractor = feature_extractor
        self.vit_model = vit_model
        self._hidden_size = vit_model.config.hidden_size

        # Configurar parâmetros de normalização como variáveis do layer
        self.mean = self.add_weight(
            name='mean',
            shape=(1, 1, 1, 3),
            initializer=tf.constant_initializer(feature_extractor.image_mean),
            trainable=False
        )
        self.std = self.add_weight(
            name='std',
            shape=(1, 1, 1, 3),
            initializer=tf.constant_initializer(feature_extractor.image_std),
            trainable=False
        )

    def call(self, inputs):
        # Conversão de tipo e normalização
        x = tf.cast(inputs, tf.float32)
        x = (x / 255.0 - self.mean) / self.std

        # Converter para formato channels-first
        x = tf.transpose(x, perm=[0, 3, 1, 2])

        # Chamada do modelo com verificação de tipo explícita
        outputs = self.vit_model(pixel_values=x)

        # Extrair o token [CLS] e garantir que é um tensor
        cls_token = outputs.last_hidden_state[:, 0]
        return tf.ensure_shape(cls_token, [None, self._hidden_size])

    def get_config(self):
        config = super().get_config()
        config.update({
            'hidden_size': self._hidden_size,
            'feature_extractor': self.feature_extractor,
            'vit_model': self.vit_model
        })
        return config

    @classmethod
    def from_config(cls, config):
        # Necessário para serialização correta
        return cls(**config)

def build_feature_extractor(feature_extractor, vit_model, input_size):
    inputs = layers.Input(shape=(input_size, input_size, 3), dtype=tf.float32)  # Tipo explícito
    features = ViTFeatureExtractor(feature_extractor, vit_model)(inputs)
    return models.Model(inputs=inputs, outputs=features)



def extract_features(model, dataset):
    # Separar features e labels
    features_dataset = dataset.map(lambda x, y: x)
    features = model.predict(features_dataset)
    labels = np.concatenate([y.numpy() for _, y in dataset], axis=0)
    return features, labels

In [None]:
def main():
    # Configurações
    MODEL_NAMES = [
        "google/vit-base-patch16-224",
        "google/vit-large-patch32-384",
        "facebook/deit-base-distilled-patch16-224",
        "facebook/deit-base-patch16-384",
        "facebook/vit-mae-base",
        "facebook/vit-mae-large"
    ]
    BATCH_SIZE = 64
    DATA_DIR = "./bruto"
    OFFICIAL_SPLIT = "/content/drive/My Drive/Mestrado 2024/Projetos/Datasets/official_splits/image_classification.csv"
    BASE_OUTPUT_DIR = "./vit_features"
    os.makedirs(BASE_OUTPUT_DIR, exist_ok=True)

    # Carregar dados
    df = load_and_process_csv(OFFICIAL_SPLIT, "Complete agreement")
    train_df = df[df['set_type'] == 'Train']
    validation_df = df[df['set_type'] == 'Validation']
    test_df = df[df['set_type'] == 'Test']
    # stats dataset
    print(f"Train dataset size : {train_df.value_counts()}")
    print(f"Validation dataset size : {validation_df.value_counts()}")
    print(f"Test dataset size : {test_df.value_counts()}")

    for model_name in MODEL_NAMES:
        print(f"\nProcessando modelo: {model_name}")

        # Carregar configurações do modelo
        feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
        vit_model = TFAutoModel.from_pretrained(model_name)
        vit_model.trainable = False

        # Determinar parâmetros dinâmicos
        if 'deit' in model_name.lower():
            #input_size = (224 // 16) * 16  # Fixed to 224 for this specific DeiT model
            input_size = feature_extractor.size["height"]
        else:
            input_size = feature_extractor.size["height"]
        resample_method = tf.image.ResizeMethod.BILINEAR  # Mapear conforme necessário

        # Construir datasets
        train_ds = build_dataset(train_df, DATA_DIR, input_size, "Complete agreement", BATCH_SIZE, resample_method)
        validation_ds = build_dataset(validation_df, DATA_DIR, input_size, "Complete agreement", BATCH_SIZE, resample_method)
        test_ds = build_dataset(test_df, DATA_DIR, input_size, "Complete agreement", BATCH_SIZE, resample_method)

        # stats dataset
        print(f"Train dataset size after build_dataset: {len(train_ds)}")
        print(f"Test dataset size after build_dataset: {len(test_ds)}")
        # Construir extrator de features
        model = build_feature_extractor(feature_extractor, vit_model, input_size)
        model.build(input_shape=(None, input_size, input_size, 3))

         # Verificação adicional para DeiT
        if 'deit' in model_name.lower():
            # Forçar inicialização do modelo
            dummy_input = tf.ones((1, input_size, input_size, 3))
            _ = model(dummy_input)
        # Extrair features
        train_features = model.predict(train_ds.map(lambda x, y: x))
        validation_features = model.predict(validation_ds.map(lambda x, y: x))
        test_features = model.predict(test_ds.map(lambda x, y: x))
        train_labels = np.concatenate([y.numpy() for _, y in train_ds], axis=0)
        validation_labels = np.concatenate([y.numpy() for _, y in validation_ds], axis=0)
        test_labels = np.concatenate([y.numpy() for _, y in test_ds], axis=0)

        # Salvar resultados
        model_slug = model_name.replace("/", "_")
        output_dir = os.path.join(BASE_OUTPUT_DIR, model_slug)
        os.makedirs(output_dir, exist_ok=True)

        np.save(os.path.join(output_dir, "train_features.npy"), train_features)
        np.save(os.path.join(output_dir, "train_labels.npy"), train_labels)
        np.save(os.path.join(output_dir, "validation_features.npy"), validation_features)
        np.save(os.path.join(output_dir, "validation_labels.npy"), validation_labels)
        np.save(os.path.join(output_dir, "test_features.npy"), test_features)
        np.save(os.path.join(output_dir, "test_labels.npy"), test_labels)


In [None]:
if __name__ == '__main__':
    print("Iniciando...")
    main()

Iniciando...
Train dataset size : num patient  filename                                  FG1 (Team A)  FG2 (Team A)  G1 (Team B)  G2 (Team B)  Complete agreement  Triple agreement  FG agreement  G agreement  FG1-G1 agreement  FG1-G2 agreement  FG2-G1 agreement  FG2-G2 agreement  set_type
387          c5d311b4-7d13-45fb-965c-e18e392f99ff.jpg  G4            G4            G4           G4           15                  G4                G4            G4           G4                G4                G4                G4                Train       1
1            00770fe2-9158-468b-bc3b-4a1ff6fd2fea.jpg  OTHERCLASS    OTHERCLASS    OTHERCLASS   OTHERCLASS   22                  OTHERCLASS        OTHERCLASS    OTHERCLASS   OTHERCLASS        OTHERCLASS        OTHERCLASS        OTHERCLASS        Train       1
             2de64b53-a39e-4832-82ce-f7e9ac179ec9.jpg  P2            P2            P2           P2           6                   P2                P2            P2           P2               

  df[label_column] = df[label_column].replace(MAP_CATEGORIES).astype('Int64')


preprocessor_config.json:   0%|          | 0.00/217 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]



tf_model.h5:   0%|          | 0.00/1.32G [00:00<?, ?B/s]

Some layers from the model checkpoint at facebook/vit-mae-large were not used when initializing TFViTMAEModel: ['decoder']
- This IS expected if you are initializing TFViTMAEModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTMAEModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFViTMAEModel were initialized from the model checkpoint at facebook/vit-mae-large.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTMAEModel for predictions without further training.


Train dataset size after build_dataset: 59
Test dataset size after build_dataset: 13
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 322ms/step
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 605ms/step
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 631ms/step


In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score, classification_report

# Defina os caminhos para os arquivos de features e labels
OUTPUT_DIR = os.path.join("..", "./output", "/content/drive/My Drive/Mestrado 2024/Projetos/Datasets/vit_features_gastroVision/google_vit-large-patch32-384")
TRAIN_FEATURES_PATH = os.path.join(OUTPUT_DIR, "train_features.npy")
TRAIN_LABELS_PATH   = os.path.join(OUTPUT_DIR, "train_labels.npy")
TEST_FEATURES_PATH  = os.path.join(OUTPUT_DIR, "test_features.npy")
TEST_LABELS_PATH    = os.path.join(OUTPUT_DIR, "test_labels.npy")

# Carrega os arrays NumPy
train_features = np.load(TRAIN_FEATURES_PATH)
train_labels   = np.load(TRAIN_LABELS_PATH)
test_features  = np.load(TEST_FEATURES_PATH)
test_labels    = np.load(TEST_LABELS_PATH)

print("Shape das features de treino:", train_features.shape)
print("Shape dos rótulos de treino:", train_labels.shape)
print("Shape das features de teste:", test_features.shape)
print("Shape dos rótulos de teste:", test_labels.shape)

# Define o classificador SVM com kernel linear e pesos balanceados
svm = SVC(kernel='linear', class_weight='balanced')

# Define os parâmetros para busca em grid (valores de C)
param_grid = {'C': [0.01, 0.1, 1, 10, 100]}

# Usa GridSearchCV com validação cruzada de 5 folds para selecionar o melhor hiperparâmetro
grid_search = GridSearchCV(svm, param_grid, scoring='f1_macro', cv=5, n_jobs=-1)
grid_search.fit(train_features, train_labels)
print("Melhores parâmetros encontrados:", grid_search.best_params_)

# Seleciona o melhor modelo SVM
best_svm = grid_search.best_estimator_

# Faz predições no conjunto de teste
predictions = best_svm.predict(test_features)

# Calcula métricas de desempenho
accuracy = accuracy_score(test_labels, predictions)
f1_macro = f1_score(test_labels, predictions, average='macro')

print("Acurácia no conjunto de teste:", accuracy)
print("F1 Macro no conjunto de teste:", f1_macro)
#print("\nRelatório de Classificação:\n", classification_report(test_labels, predictions,output_dict=True))

report = classification_report(test_labels, predictions, output_dict=True)
df_report = pd.DataFrame(report).transpose()
print("\nRelatório de Classificação:\n", df_report)

# Salva o relatório de classificação em um arquivo CSV
df_report.to_csv('./output/classification_report.csv', index=True)




In [None]:
793/64

12.390625