## Feature Extraction

In [3]:
import os
import random
from torch.utils.data import Subset
from torchvision import transforms
from modules import utils, globals
import torch
import torchvision
import numpy as np

In [4]:
# import kagglehub

# # # Download latest version
# path = kagglehub.dataset_download("birdy654/cifake-real-and-ai-generated-synthetic-images")

# print("Path to dataset files:", path)

### Carregando modelo pré-treinado

In [5]:
model = utils.load_model_from_file("resnet18_cifake_finetuned_float32.pth")
print("Modelo pré-treinado carregado.")
print(model)




Modelo 'resnet18_cifake_finetuned_float32.pth' carregado para avaliação de desempenho.
Modelo pré-treinado carregado.
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn

In [None]:
transform = transforms.Compose([
                                     transforms.Resize(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                          std=[0.229, 0.224, 0.225])
                  ])

train_dataset = torchvision.datasets.ImageFolder(
    root=os.path.join(globals.DATASET_PATH, 'train'),
    transform=transform
)
# Cria subsets para testar a lógica do modelo com um número menor do dataset (descomentar para usar)
subset_train_indices = list(range(globals.NUM_SAMPLES_TRAIN_DEBUGGER))

total_train_samples = len(train_dataset)
num_train_to_select = min(globals.NUM_SAMPLES_TRAIN_DEBUGGER, total_train_samples)
subset_train_indices = random.sample(range(total_train_samples), num_train_to_select)

train_dataset_debugger = Subset(train_dataset, subset_train_indices)

train_loader = torch.utils.data.DataLoader(
    train_dataset_debugger,
    batch_size=globals.BATCH_SIZE,
    shuffle=True,
    num_workers=0
)


### Removendo a última camada fc

A última camada da CNN pré-treinada é removida, porque utilizaremos esse modelo para extrair as carcterísticas que servirão de entrada para outros modelos como HDC e Wisard.

In [None]:
feature_extractor = torch.nn.Sequential(*list(model.children())[:-1])

### Extraindo as features e labels geradas pelo modelo

Nessa parte são percorridos os dados do train_loader e extraídas as características com um feature_extractor. Em seguida, todos os batches são concatenados em arrays únicos e salvos como arquivos .npy. Isso permite reutilizar os dados processados sem precisar extraí-los novamente a cada execução.

In [None]:
all_features = []
all_labels = []

with torch.no_grad():
    for k, batch in enumerate(train_loader):
        print(f'\r{k+1}/{len(train_loader)}', end='', flush=True) 

        dado, rotulo = batch
        dado = dado.to(globals.DEVICE)
        rotulo = rotulo.to(globals.DEVICE)
        features = feature_extractor(dado)

        features = features.view(features.size(0), -1)

        all_features.append(features.cpu())
        all_labels.append(rotulo)

features_array = np.concatenate(all_features, axis=0)
labels_array = np.concatenate(all_labels, axis=0)

print(f"\n{labels_array}")


np.save('features.npy', features_array)
np.save('labels.npy', labels_array)

print("Features salvas em 'features.npy' e labels em 'labels.npy")


125/125[0 1 0 1 1 0 0 1 1 0 1 0 1 0 0 1 0 1 1 1 0 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 1
 0 0 1 0 0 0 0 1 0 1 1 0 1 0 1 0 0 0 1 0 0 0 1 1 0 1 0 1 1 0 1 0 1 0 1 0 1
 1 0 1 0 1 0 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0
 0 0 0 1 1 1 1 1 0 0 0 1 0 1 1 0 0 0 1 0 0 1 0 1 0 0 1 0 1 0 1 1 0 0 0 1 0
 1 1 0 1 1 1 1 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 0 1 0 1 1 1 0 0 0 1 1 0
 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 1 1 0 1 0 1 1 1 0 1 0 1 0 1 1 0 0 0 0
 0 1 0 1 1 0 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 1 1 0 1 0 1 0 0 1 1 1 1 1 1 0 0
 0 0 0 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 0 0 1 0 0 0 1 0 0 1 0 1 0 0 0
 0 0 0 0 0 0 0 1 1 0 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 1 1 0 1
 0 0 0 1 0 1 0 1 0 1 0 1 1 0 1 0 1 0 0 1 0 0 1 0 0 1 0 1 1 1 1 0 0 1 1 0 0
 0 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 0 1 1 0 1 0 0 1 0 1 0 0 1 1 1 0 1 1 0 0 1
 0 0 0 0 0 1 0 0 0 1 1 0 0 1 0 1 1 1 1 1 0 0 1 1 1 1 0 1 0 1 1 0 0 0 1 1 0
 1 1 0 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1
 0 0 0 0 0 1 0 0 0