In [82]:
import numpy as np
import matplotlib.pyplot as plt

T=10

np.set_printoptions(threshold=np.inf)

In [83]:
class SVM_Auxiliary_Classifier:
    def __init__(self, C, k, n_features):
        self.C = C  # Regularization parameter
        self.k = k  # Number of classes
        self.omega = np.random.rand(k, n_features)  # Weights
        self.b = np.random.rand(k)  # Bias

    def fit(self, X, y):
        n_samples, n_features = X.shape
        L = np.zeros((n_samples, self.k))
        mask = np.zeros((n_samples, self.k))

        print("SVM Shapes", self.omega.T.shape, X[0].shape, self.b.shape, y.shape)

        # Compute the loss matrix L and the mask
        for i in range(n_samples):
            for j in range(self.k):
                if j != y[i]:
                    margin = self.omega[y[i], j] @ X[i] - self.omega[y[i]] @ X[i] + self.b[j] - self.b[y[i]]
                    L[i, j] = max(0, 1 - margin)
                    if L[i, j] > 0:
                        mask[i, j] = 1

        # print(X.shape, mask.shape, L.shape, y.shape)

        # Gradient update for weights and bias
        for j in range(self.k):
            if j in y:
                # Update rule for class j
                indices = np.where(y == j)[0]
                omega_grad = np.sum(X[indices][:, np.newaxis, :] * mask[indices][:, :, np.newaxis], axis=0)
                self.omega[j] -= omega_grad.mean(axis=0) - self.C * self.omega[j]

                # Update rule for bias j
                b_grad = mask[indices].sum(axis=0)
                self.b[j] -= b_grad.mean() - self.C * self.b[j]

        return mask, L

    def predict(self, X):
        # Make predictions by selecting the class with the highest decision function value
        decision_function = X @ self.omega.T + self.b
        return np.argmax(decision_function, axis=1)
    
# Dummy data for demonstration purposes

# np.random.seed(0)

# X_dummy = np.random.randn(100, 10)  # 100 samples, 10 features

# y_dummy = np.random.choice([1, -1], size=100)  # Binary classification problem

# # Create an instance of SVM_Auxiliary_Classifier with dummy parameters
# svm_aux = SVM_Auxiliary_Classifier(C=1, k=2, n_features=10)

# # Fit the classifier to the dummy data
# mask, L = svm_aux.fit(X_dummy, y_dummy)

# # Output the mask and loss matrix for verification
# mask, L


In [84]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import random

# Assuming the SVM_Auxiliary_Classifier is defined as above

def pgd_attack(image, epsilon, data_grad, num_steps=10, step_size=2/255):
    # Initialize the perturbed image as a copy of the original image
    perturbed_image = image.clone().detach()
    
    # Iteratively apply gradient ascent for the specified number of steps
    for _ in range(num_steps):
        # Collect the element-wise sign of the data gradient
        sign_data_grad = data_grad.sign()
        # Add a small step in the direction of the sign of the gradient
        perturbed_image += step_size * sign_data_grad
        # Clip the perturbed image to stay within the epsilon budget
        perturbed_image = torch.clamp(perturbed_image, image - epsilon, image + epsilon)
        # Clip the perturbed image to ensure it stays within the valid image range [0, 1]
        perturbed_image = torch.clamp(perturbed_image, 0, 1)
        
    return perturbed_image.detach().to("cpu").numpy()

# Functions for generating PGD attacks and computing loss L_margin
# These should be defined according to the problem specification
def generate_pgd_attacks(model, images, labels):
    # This function should generate and return adversarial examples using PGD
    perturbed_images = []
    images.requires_grad = True
    output = model(images)
    loss = criterion(output, labels)
    model.zero_grad()
    loss.backward()
    data_grad = images.grad.data
    for i in range(images.shape[0]):
        perturbed_images.append(pgd_attack(images[i], 8/255, data_grad[i]))
    
    return np.array(perturbed_images)

def compute_loss_lmargin(output, labels, mask, svm_classifier):
    # This function should compute and return the L_margin loss
    pass

# Load CIFAR-10 data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=625, shuffle=True, drop_last=True)

test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(test_dataset, batch_size=625, shuffle=False, drop_last=True)

# Initialize ResNet-18 network
resnet18 = models.resnet18(pretrained=False, num_classes=10)
feature_extractor = torch.nn.Sequential(*list(resnet18.children())[:-1])

device = torch.device("mps")

resnet18 = resnet18.to(device)

# Criterion and optimizer
criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(resnet18.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = StepLR(optimizer, step_size=75, gamma=0.1)

print(resnet18.fc.in_features)

# Training process
svm_classifier = SVM_Auxiliary_Classifier(C=1, k=10, n_features=trainloader.batch_size * 2)
resnet18.train()
# print("ghello")

for epoch in range(T):
    for i, (inputs, labels) in enumerate(trainloader, 0):
        # Generate PGD attack examples
        inputs, labels = inputs.to(device), labels.to(device)

        # print(inputs.shape)

        adv_inputs = torch.tensor(generate_pgd_attacks(resnet18, inputs, labels))
        adv_inputs = adv_inputs.to(device)

        total_inputs = torch.cat((adv_inputs, inputs), 0)

        labels = torch.cat((torch.tensor([0] * labels.shape[0]).to(device), labels), 0)

        # print(adv_inputs.shape, inputs.shape)

        # print("PGD attack done", type(adv_inputs))

        adv_inputs = adv_inputs.to(device)

        # Get features before the last FC layer
        features = feature_extractor(total_inputs)
        print(features.shape)

        # Train SVM auxiliary classifier
        mask, L_margin = svm_classifier.fit(features.cpu().detach().squeeze().numpy(), labels.cpu().numpy())

        print(mask.shape)

        # Compute loss L_margin
        outputs = resnet18(total_inputs)
        # L_margin = compute_loss_lmargin(outputs, labels, mask, svm_classifier)

        labels = labels.to(device)
        outputs = outputs.to(device)
        # print("Shapes: ", labels.shape, outputs.shape, mask.shape, L_margin.shape)

        # Compute the masked cross-entropy loss
        loss = (criterion(outputs, labels) * torch.from_numpy(mask).float().to(device)).mean()

        loss = loss.to(device)
        L_margin = torch.tensor(L_margin).float().to(device)

        total_loss = (loss + L_margin).mean().clone().detach().requires_grad_(True)
        # total_loss = total_loss.requires_grad_(True)
        # print(total_loss, loss, L_margin.mean())
        total_loss.backward()

        optimizer.zero_grad()
        optimizer.step()

    print(f'Epoch {epoch+1}/{T} finished, Loss: {total_loss.item()}')

print('Finished Training. SVM auxiliary classifier will be removed for inference.')

resnet18.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = resnet18(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy: {100 * correct / total}%')

Files already downloaded and verified
Files already downloaded and verified




512
torch.Size([1250, 512, 1, 1])
SVM Shapes (1250, 10) (512,) (10,) (1250,)


ValueError: matmul: Input operand 0 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)