## CNN: Fine-Tunning com HDC

### Imports

In [1]:
from modules import utils, globals
import torch
from modules import encoders
from binhd.classifiers import BinHD
from modules.cifake import Cifake
import wisardpkg as wp
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.calibration import LabelEncoder
from modules.encoders import RecordEncoder

In [2]:
# import kagglehub

# # # Download latest version
# path = kagglehub.dataset_download("birdy654/cifake-real-and-ai-generated-synthetic-images")

# print("Path to dataset files:", path)

### Carregando o dataset pré-treinado

In [3]:
model = utils.load_model_from_file("resnet18_cifake_finetuned_float32.pth")
print("Modelo pré-treinado carregado.")
print(model)




Modelo 'resnet18_cifake_finetuned_float32.pth' carregado para avaliação de desempenho.
Modelo pré-treinado carregado.
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn

In [4]:
import os
import random
from torch.utils.data import Subset
from torchvision import models, transforms

import torchvision


transform = transforms.Compose([
                                     transforms.Resize(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                          std=[0.229, 0.224, 0.225])
                  ])

train_dataset = torchvision.datasets.ImageFolder(
    root=os.path.join(globals.DATASET_PATH, 'train'),
    transform=transform
)

test_dataset = torchvision.datasets.ImageFolder(
    root=os.path.join(globals.DATASET_PATH, 'test'),
    transform=transform
)

# Cria subsets para testar a lógica do modelo com um número menor do dataset (descomentar para usar)
subset_train_indices = list(range(globals.NUM_SAMPLES_TRAIN_DEBUGGER))
subset_test_indices = list(range(globals.NUM_SAMPLES_TEST_DEBUGGER))

total_train_samples = len(train_dataset)
num_train_to_select = min(globals.NUM_SAMPLES_TRAIN_DEBUGGER, total_train_samples)
subset_train_indices = random.sample(range(total_train_samples), num_train_to_select)

# Pega uma amostra aleatória de índices para o teste
total_test_samples = len(test_dataset)
num_test_to_select = min(globals.NUM_SAMPLES_TEST_DEBUGGER, total_test_samples)
subset_test_indices = random.sample(range(total_test_samples), num_test_to_select)

train_dataset_debugger = Subset(train_dataset, subset_train_indices)
test_dataset_debugger = Subset(test_dataset, subset_test_indices)

train_loader = torch.utils.data.DataLoader(
    train_dataset_debugger,
    batch_size=globals.BATCH_SIZE,
    shuffle=True,
    num_workers=0
)

test_loader = torch.utils.data.DataLoader(
    test_dataset_debugger,
    batch_size=globals.BATCH_SIZE,
    shuffle=False,
    num_workers=0
)
feature_extractor = torch.nn.Sequential(*list(model.children())[:-1])



In [5]:
all_features = []
all_labels = []

with torch.no_grad():
    for k, batch in enumerate(train_loader):
        print(f'\r{k+1}/{len(train_loader)}', end='', flush=True) 

        dado, rotulo = batch
        dado = dado.to(globals.DEVICE)
        rotulo = rotulo.to(globals.DEVICE)
        # Extrair features
        features = feature_extractor(dado)

        # Achatar (flatten) o tensor para (batch_size, num_features)
        features = features.view(features.size(0), -1)

        all_features.append(features.cpu())
        all_labels.append(rotulo)

print(all_features)
print(all_labels)

125/125[tensor([[0.5448, 0.0188, 2.0103,  ..., 0.1977, 0.2146, 0.8065],
        [0.1944, 0.1871, 0.3872,  ..., 0.9011, 0.4132, 1.0158],
        [0.4580, 0.3089, 2.6238,  ..., 1.2305, 0.3552, 1.8603],
        ...,
        [0.7531, 0.5428, 1.1822,  ..., 0.3329, 0.8339, 1.1802],
        [0.2124, 0.3787, 1.1635,  ..., 1.5608, 0.5434, 1.1616],
        [0.2695, 0.3442, 0.0702,  ..., 0.5647, 0.1584, 3.4848]]), tensor([[0.3956, 0.5014, 0.2577,  ..., 0.2808, 0.3647, 0.7328],
        [1.4389, 0.1017, 1.3864,  ..., 0.5428, 0.2667, 0.9597],
        [0.8829, 0.7686, 0.4811,  ..., 0.7473, 2.7996, 0.2421],
        ...,
        [0.9030, 1.0140, 0.8214,  ..., 0.1068, 2.7269, 0.1047],
        [0.9071, 0.5953, 0.8696,  ..., 1.5987, 1.1185, 0.5973],
        [0.2962, 0.8323, 1.5727,  ..., 0.9189, 0.4184, 0.6288]]), tensor([[0.4156, 0.3422, 0.6632,  ..., 1.5058, 0.2720, 1.5608],
        [1.7502, 0.1130, 1.7722,  ..., 1.2939, 1.4895, 0.7743],
        [0.4188, 0.7361, 1.5902,  ..., 0.1272, 0.5020, 0.7735],
  

In [6]:
features_array = np.concatenate(all_features, axis=0)
labels_array = np.concatenate(all_labels, axis=0)

print(labels_array)


np.save('features.npy', features_array)
np.save('labels.npy', labels_array)

print("Features salvas em 'features.npy' e labels em 'labels.npy")

[0 1 1 1 0 0 1 0 1 1 0 0 0 0 1 0 1 1 0 0 0 1 0 1 1 1 1 0 1 0 1 1 1 1 1 1 1
 1 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 0 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 1 0 0 1 0
 1 0 1 0 1 1 1 0 0 1 1 1 0 0 1 1 0 1 0 0 0 0 0 0 1 0 1 1 1 1 1 0 1 1 0 1 1
 1 0 1 1 1 1 1 0 0 0 0 1 0 1 1 0 1 1 0 1 1 0 0 0 1 1 1 1 0 0 1 0 0 0 0 1 1
 1 0 0 1 1 1 0 1 1 1 0 1 0 1 0 1 0 0 1 0 0 0 1 1 1 0 1 1 1 0 0 1 0 1 0 1 1
 0 1 1 0 0 0 0 0 0 0 1 1 0 0 0 1 1 0 1 0 1 0 1 1 1 0 0 1 0 0 0 1 1 0 0 0 1
 1 0 1 0 1 0 0 0 0 1 1 0 0 0 1 1 1 0 1 1 0 1 0 1 1 0 1 0 1 0 0 1 0 0 1 1 1
 1 0 1 0 0 1 1 1 1 0 1 0 1 1 0 1 0 0 1 1 0 0 0 0 0 1 1 1 0 1 1 1 1 0 0 0 1
 0 1 1 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 1 0 0 1 0 0 1 1 1 1 0 0 1 1 0 1 0
 1 0 0 0 0 0 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 1 0 0 0 0 1 1 0 0 1 0 0 0 1 1
 0 1 1 1 0 0 1 0 0 0 0 0 0 1 1 1 0 1 1 1 1 0 1 1 0 1 0 1 0 0 1 0 1 1 1 0 1
 0 0 1 0 0 1 1 1 0 0 0 0 1 1 1 1 0 1 1 0 1 0 1 0 0 1 0 1 1 0 1 1 0 1 1 0 1
 0 1 1 0 1 1 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 1 1 0 1 1 1 1 1 1 0 0 0
 1 1 0 0 1 1 0 1 1 0 0 1 

In [7]:
cifake = Cifake()
# Verifica as amostras e classes
print(f"Número de samples: {len(cifake.samples)}")
print(f"Classes encontradas: {cifake.features}")

Número de samples: 1000
Classes encontradas:        feat_0    feat_1    feat_2    feat_3    feat_4    feat_5    feat_6  \
0    0.544774  0.018806  2.010258  0.256591  0.062876  0.957191  0.587653   
1    0.194367  0.187050  0.387228  0.820511  1.306756  0.354742  1.281170   
2    0.457966  0.308896  2.623842  0.884696  0.872540  0.594843  1.210461   
3    0.284970  0.416701  0.461790  1.233918  0.684192  0.021006  0.493374   
4    0.989366  0.986020  0.166643  1.595618  0.974522  0.873319  0.038089   
..        ...       ...       ...       ...       ...       ...       ...   
995  0.586814  0.056999  0.574331  1.152458  1.450697  3.005274  0.051360   
996  0.178797  0.212316  0.702732  0.797693  0.789797  1.317514  0.420818   
997  2.132439  0.660115  0.751028  0.301110  0.776568  0.316860  0.526490   
998  0.507245  1.801676  1.681762  0.358656  1.130795  1.354531  0.483254   
999  1.151605  1.261966  0.240359  3.921093  2.435382  0.481169  0.063166   

       feat_7    feat_8    fea

In [8]:
min_val, max_val = cifake.get_min_max_values()
print(min_val, max_val)

0.0 9.409947395324707


In [None]:
dimension = 10000
num_levels = 500
low = min_val
high = max_val
oper = "bind"

In [10]:
X = cifake.features
print(X.shape)
print(X.dtypes.unique())

y = cifake.labels
le = LabelEncoder()
y_encoded = torch.tensor(le.fit_transform(y))


(1000, 512)
[dtype('float32')]


In [11]:
model = BinHD(dimension, cifake.num_classes)
print(X.dtypes)


feat_0      float32
feat_1      float32
feat_2      float32
feat_3      float32
feat_4      float32
             ...   
feat_507    float32
feat_508    float32
feat_509    float32
feat_510    float32
feat_511    float32
Length: 512, dtype: object


In [12]:
record_encoder = RecordEncoder(
            out_features=dimension,
            size=X.shape[1], 
            levels=num_levels,
            low=low,
            high=high
        )


In [19]:
y_encoded = torch.tensor(y_encoded).to(globals.DEVICE)

def run_encoders(X, device):
    # Garante que os rótulos estão no formato tensor e no dispositivo correto
    y_encoded_tensor = torch.tensor(y_encoded).to(device)

    # Lista para armazenar os vetores codificados por batch
    encoded_batches = []

    # Coloca o encoder no modo de avaliação e no dispositivo correto
    record_encoder.to(device)
    record_encoder.eval()

    # Número total de amostras
    num_samples = len(X)

    with torch.no_grad():
        for start_idx in range(0, num_samples, globals.BATCH_SIZE):
            end_idx = min(start_idx + globals.BATCH_SIZE, num_samples)

            # Seleciona um batch dos dados e converte para float32
            x_batch_np = X.iloc[start_idx:end_idx].values.astype(np.float32)

            # Converte para tensor e move para o dispositivo
            x_batch_tensor = torch.tensor(x_batch_np).to(device)

            # Codifica usando o encoder (mantendo a lógica original)
            encoded = record_encoder(x_batch_tensor)

            # Armazena o resultado no CPU
            encoded_batches.append(encoded.cpu())

            # Progresso
            print(f"\rProcessando amostras {start_idx} até {end_idx}", end='', flush=True)

    print("\nCodificação completa.")

    # Junta todos os batches em um único tensor
    encoded_all = torch.cat(encoded_batches, dim=0)

    return encoded_all



  y_encoded = torch.tensor(y_encoded).to(globals.DEVICE)


In [None]:
X_record_encoder = run_encoders(X, globals.DEVICE)
labels = torch.tensor(y).to(globals.DEVICE)

X_train, X_test, y_train, y_test = train_test_split(X_record_encoder, labels, test_size=0.3, random_state = 0)

print(X_train, y_train)

  y_encoded_tensor = torch.tensor(y_encoded).to(device)


Processando amostras 992 até 1000
Codificação completa.
BSCTensor([[0, 0, 0,  ..., 1, 1, 1],
           [0, 0, 0,  ..., 1, 1, 1],
           [0, 0, 0,  ..., 1, 1, 1],
           ...,
           [0, 0, 0,  ..., 1, 1, 1],
           [0, 0, 0,  ..., 1, 1, 1],
           [0, 0, 0,  ..., 1, 1, 1]], dtype=torch.int8) tensor([0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1,
        0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
        0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1,
        1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0,
        1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0,
        0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0,
        1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0,
        1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
        0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 

In [15]:
with torch.no_grad():
    model.fit(X_train,y_train)
    predictions = model.predict(X_test.to(torch.int8))  
    acc = accuracy_score(predictions, y_test)
    print("BinHD Record Encoder: Accuracy = ", acc)


BinHD Record Encoder: Accuracy =  0.47
