In [1]:
import numpy as np
import matplotlib.pyplot as plt
import datetime
import time
import scipy

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, ConcatDataset, Dataset


import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torchvision.transforms import ToTensor

import utils

from image_list import ImageList

In [2]:
# Define dataset
attack_dataset = ImageList(
    filename="txt_files/attack_dataset.txt", 
    root="data/IMAGENET/",
    color=True,
    # transform=transforms.ToTensor(),
    transform = transforms.Compose([transforms.Resize((32, 32)),
                                    ToTensor(),
                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                   ]),
    return_filename=True
)

# Create DataLoader
attack_dataloader = DataLoader(attack_dataset, batch_size=64, shuffle=False)

In [3]:
def extract_model(model, dataloader, output_file, device="cuda" if torch.cuda.is_available() else "cpu", n_bacthes=None):
    """
    Faz inferência com um modelo PyTorch e salva o caminho da imagem e a classe inferida em um arquivo de saída.

    Args:
        model (torch.nn.Module): Modelo PyTorch treinado.
        dataloader (torch.utils.data.DataLoader): DataLoader com imagens e seus caminhos.
        output_file (str): Caminho do arquivo de saída.
        save_softlabels (bool, optional): Se True, salva as probabilidades para todas as classes. Default: False.
        device (str, optional): Dispositivo para inferência ("cuda" ou "cpu"). Default: "cuda" se disponível.
    """
    model.to(device)
    model.eval()  # Modo de inferência
    total = 0
    with open(output_file, "w") as f, torch.no_grad():
        for images, _labels, paths in dataloader:  # Assumindo que o dataset retorna (imagem, caminho)
            if(n_bacthes != None and total == n_bacthes):
                break
            total+=1
            images = images.to(device)
            outputs = model(images)
    
            _, preds = torch.max(outputs, 1)  # Obtém a classe com maior probabilidade
            for path, pred in zip(paths, preds.cpu().numpy()):
                f.write(f"{path} {pred}\n")  # Salva caminho da imagem e classe inferida

    print(f"Inferência concluída. Resultados salvos em {output_file}")

In [None]:
def extract_model_soft(model, dataloader, output_file, device="cuda" if torch.cuda.is_available() else "cpu", n_batches=None):
    """
    Extracts soft labels from a trained model and saves them to a file.

    Args:
        model (torch.nn.Module): Trained PyTorch model.
        dataloader (torch.utils.data.DataLoader): DataLoader with (image, label, path).
        output_file (str): Path to the output file.
        device (str, optional): Inference device ("cuda" or "cpu"). Default: "cuda" if available.
        n_batches (int, optional): Number of batches to process. If None, processes all.
    """
    model.to(device)
    model.eval()  # Inference mode
    total = 0

    with open(output_file, "w") as f, torch.no_grad():
        for images, _labels, paths in dataloader:
            if n_batches is not None and total >= n_batches:
                break
            total += 1

            images = images.to(device)
            outputs = model(images)

            softlabels = torch.nn.functional.softmax(outputs, dim=1).cpu().numpy().tolist()
            for path, probs in zip(paths, softlabels):
                probs_str = " ".join(map(str, probs))
                f.write(f"{path} {probs_str}\n")

    print(f"Inference completed. Results saved in {output_file}")


In [None]:
model_mnist = utils.create_resnet_model('./pths/original/mnist32.pth')
model_svhn = utils.create_resnet_model('./pths/original/svhn32.pth')


In [8]:
# hardlabels = only 1 target
# extract_model(model_mnist, attack_dataloader, "txt_files/extracted_mnist.txt")
extract_model(model_svhn, attack_dataloader, "txt_files/extracted_svhn.txt")


Inferência concluída. Resultados salvos em txt_files/extracted_svhn.txt


In [None]:
# softlabels
extract_model_soft(model_mnist, attack_dataloader, "txt_files/extracted_soft_mnist.txt", save_softlabels=True)
extract_model_soft(model_svhn, attack_dataloader, "txt_files/extracted_soft_svhn.txt", save_softlabels=True)

In [5]:
model_combined = utils.create_resnet_model('./pths/combined32.pth')
extract_model(model_combined, attack_dataloader, "data/extracted_soft_combined.txt", True)

Inference completed. Results saved in data/extracted_soft_combined.txt


In [7]:
model_combined = utils.create_resnet_model('./pths/combined32.pth')
extract_model(model_combined, attack_dataloader, "data/extracted_combined.txt", False)

Inferência concluída. Resultados salvos em data/extracted_combined.txt


In [None]:
file1_path = "extracted_mnist.txt"
file2_path = "extracted_svhn.txt"

output1_path = "extracted_mnist_500k.txt"
output2_path = "extracted_svhn_500k.txt"

max_lines_per_output = 500_000

with open(file1_path, "r") as f1, open(file2_path, "r") as f2, \
     open(output1_path, "w") as out1, open(output2_path, "w") as out2:

    count1 = count2 = 0
    total_written = 0

    while count1 < max_lines_per_output or count2 < max_lines_per_output:
        # Alternar com base na paridade da linha
        line1 = f1.readline()
        line2 = f2.readline()
        if total_written % 2 == 0 and count1 < max_lines_per_output:
            if not line1: break
            out1.write(line1)
            count1 += 1
        elif count2 < max_lines_per_output:
            if not line2: break
            out2.write(line2)
            count2 += 1

        total_written += 1

print("✅ Arquivos gerados com 500k linhas alternadas e diferentes!")

In [None]:
# increase_labels.py

input_file = "extracted_svhn_500k.txt"   # Replace with your actual input file name
output_file = "extracted_svhn_500k_20l.txt" # Replace with your desired output file name

with open(input_file, "r") as infile, open(output_file, "w") as outfile:
    for line in infile:
        path, label = line.strip().rsplit(" ", 1)  # Split only on the last space
        new_label = int(label) + 10
        outfile.write(f"{path} {new_label}\n")


In [None]:
# add_zeros_to_labels.py

import sys

def add_zeros_to_labels(input_file, output_file, mode):

    with open(input_file, "r") as infile, open(output_file, "w") as outfile:
        for line in infile:
            parts = line.strip().split()
            path = parts[0]
            numbers = parts[1:]
            zeros = ['0'] * 10

            if mode == 'after':
                new_line = [path] + zeros + numbers
            else:  # mode == 'end'
                new_line = [path] + numbers + zeros

            outfile.write(' '.join(new_line) + '\n')

In [None]:
add_zeros_to_labels('extracted_soft_svhn.txt', 'extracted_soft_svhn_20l.txt', 'after')
add_zeros_to_labels('extracted_soft_mnist.txt', 'extracted_soft_mnist_10l.txt', 'end')