In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import DatasetFolder
import numpy as np
from math import ceil
from sklearn.model_selection import train_test_split
from torchsummary import summary
from torchmetrics.classification import AUROC, StatScores

  warn(


In [3]:
class BinaryTransform:
    def __init__(self, input_length):
        self.input_length = input_length

    def __call__(self, binary_data):
        binary_data = np.frombuffer(binary_data, dtype=np.uint8)
        
        l = len(binary_data)

        # Pad or truncate the binary data
        if l < self.input_length:
            padding = np.zeros(self.input_length - l, dtype=np.uint8)
            binary_data = np.concatenate((binary_data, padding))
        elif l > self.input_length:
            excess = ceil(l / self.input_length)
            padding = np.zeros(self.input_length * excess - l, dtype=np.uint8)
            binary_data = np.concatenate((binary_data, padding))
            binary_data = binary_data.reshape(len(binary_data)//excess, -1)
            binary_data = np.mean(binary_data, axis=1)
            
        # Scale the data to [0, 1]
        scaled_data = binary_data / 255.0
        tensor = torch.tensor(scaled_data, dtype=torch.float32)
        return tensor.unsqueeze(0)

In [4]:
# There are two versions of the assigment, so we created two versions, either of them works

# Assignment on Teams
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=(10,), stride=(1,))
        self.fc1 = nn.Linear(65496, 1)  # Adjust the input size based on your data size

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        x = x.view(-1, 65496)
        return F.sigmoid(self.fc1(x))

# Assignmanet on Moodle (Linear input size adjusted to meet expected output dimension)
# class ConvNet(nn.Module):
#     def __init__(self):
#         super(ConvNet, self).__init__()
#         self.conv1 = nn.Conv1d(1, 16, kernel_size=(10,), stride=(1,))
#         self.fc1 = nn.Linear(2*65488, 1)  # Adjust the input size based on your data size

#     def forward(self, x):
#         x = F.relu(self.conv1(x))
#         x = F.max_pool1d(x, kernel_size=4, stride=2, padding=0, dilation=1, ceil_mode=False)
#         x = x.view(-1, 2*65488)
#         return F.sigmoid(self.fc1(x))

In [5]:
# Set up device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
# Define data paths
train_data_path = "data/train"
test_data_path = "data/test"

In [7]:
# Define the input length and instantiate the transform
input_length = 16384
transform = BinaryTransform(input_length)

In [8]:
# Create dataset
train_dataset = DatasetFolder(root=train_data_path, loader=lambda x: open(x, 'rb').read(), extensions=('',), transform=transform)
test_dataset = DatasetFolder(root=test_data_path, loader=lambda x: open(x, 'rb').read(), extensions=('',), transform=transform)

In [9]:
# Split dataset into train and validation sets
indices = np.arange(len(train_dataset))
np.random.shuffle(indices)
train_indices, val_indices = train_test_split(indices, test_size=0.3, random_state=42)

In [10]:
train_loader = DataLoader(torch.utils.data.Subset(train_dataset, train_indices), batch_size=64, shuffle=True)
val_loader = DataLoader(torch.utils.data.Subset(train_dataset, val_indices), batch_size=64)
test_loader = DataLoader(test_dataset, batch_size=64)

In [11]:
# Instantiate model
model = ConvNet().to(device)

In [12]:
summary(model,(1,16384))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1            [-1, 16, 16375]             176
            Linear-2                    [-1, 1]          65,497
Total params: 65,673
Trainable params: 65,673
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.06
Forward/backward pass size (MB): 2.00
Params size (MB): 0.25
Estimated Total Size (MB): 2.31
----------------------------------------------------------------


In [13]:
# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()

In [14]:
# Training loop
best_val_loss = float('inf')
patience = 4
counter = 0

In [15]:
for epoch in range(10):  # You can adjust the number of epochs
    model.train()
    train_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data).squeeze()
        loss = criterion(output, target.float())
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # Validate
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(val_loader):
            data, target = data.to(device), target.to(device)
            output = model(data).squeeze()
            loss = criterion(output, target.float())
            val_loss += loss.item()
            total += target.size(0)
            pred_label = output > 0.5
            correct += pred_label.eq(target).sum().item()

    print(f'Epoch {epoch}, Train Loss: {train_loss/len(train_loader.dataset)}, Val Loss: {val_loss/len(val_loader.dataset)}, Val Acc: {100.*correct/total}')

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping")
            break

Epoch 0, Train Loss: 0.012068889158791918, Val Loss: 0.0035988709271123948, Val Acc: 93.82716049382717
Epoch 1, Train Loss: 0.003090372425077599, Val Loss: 0.0025605520008862414, Val Acc: 93.73219373219374
Epoch 2, Train Loss: 0.002068530737832158, Val Loss: 0.0018050057666707017, Val Acc: 96.77113010446344
Epoch 3, Train Loss: 0.0015388828515399992, Val Loss: 0.0015230983255142727, Val Acc: 96.9610636277303
Epoch 4, Train Loss: 0.0013180894213912445, Val Loss: 0.001265048386364581, Val Acc: 97.62583095916429
Epoch 5, Train Loss: 0.0009429900655670772, Val Loss: 0.001143952826319257, Val Acc: 97.91073124406458
Epoch 6, Train Loss: 0.0007922101400468133, Val Loss: 0.001160141950913644, Val Acc: 98.38556505223171
Epoch 7, Train Loss: 0.0006616503890265669, Val Loss: 0.0009018354124945906, Val Acc: 98.76543209876543
Epoch 8, Train Loss: 0.0005480432147550592, Val Loss: 0.0007136163194747846, Val Acc: 98.86039886039886
Epoch 9, Train Loss: 0.00042826019297249684, Val Loss: 0.00067328181248

In [16]:
torch.save(model.state_dict(), "model.pt")

In [17]:
saved_model = ConvNet().to(device)
saved_model.load_state_dict(torch.load("model.pt"))

<All keys matched successfully>

In [18]:
roc_metric = AUROC(task="binary")
stat_scores = StatScores(task="binary")

test_loss = 0
total = 0
correct = 0

with torch.no_grad():
    for batch_idx, (data, target) in enumerate(test_loader):
        data, target = data.to(device), target.to(device)
        output = saved_model(data).squeeze()
        loss = criterion(output, target.float())
        test_loss += loss.item()
        total += target.size(0)
        pred_label = output > 0.5
        correct += pred_label.eq(target).sum().item()
        roc_metric.update(pred_label.float(), target.float())
        stat_scores.update(pred_label.float(), target.float())

In [19]:
"Mean acc. on test set: ", correct / total

('Mean acc. on test set: ', 0.9897435897435898)

In [20]:
"ROC AUC: ", roc_metric.compute().item()

('ROC AUC: ', 0.9897435903549194)

In [21]:
# tp, fp, tn, fn = stat_scores.compute()
scores = stat_scores.compute()
tp = scores[0]
fp = scores[1]
tn = scores[2]
fn = scores[3]

In [22]:
"TPR: ", (tp / (tp + fn)).item()

('TPR: ', 0.9897435903549194)

In [23]:
"TNR: ", (tn / (tn + fp)).item()

('TNR: ', 0.9897435903549194)

In [24]:
"FPR: ", (fp / (fp + tn)).item()

('FPR: ', 0.010256410576403141)

In [25]:
"FNR: ", (fn / (fn + tp)).item()

('FNR: ', 0.010256410576403141)

# White-box PGD attack

In [26]:
eps = 8/255
alpha = 2/255
steps = 10
batch_size = 64

In [27]:
victim_path = "data/victim"

In [28]:
victim_dataset = DatasetFolder(root=train_data_path, loader=lambda x: open(x, 'rb').read(), extensions=('',), transform=transform)
victim_loader = DataLoader(victim_dataset, batch_size=batch_size, shuffle=True)

In [28]:
def pgd_attack(inputs,labels):
    adv_inputs = inputs.clone().detach()
    for _ in range(steps):
        adv_inputs.requires_grad = True
        outputs = model(adv_inputs)
        
        target_labels = torch.zeros(labels.size()).unsqueeze(1)
        
        cost = -criterion(outputs,target_labels)
        
        grad = torch.autograd.grad(
            cost, adv_inputs, retain_graph=False, create_graph=False
        )[0]
        
        adv_inputs = adv_inputs.detach() + alpha * grad.sign()
        delta = torch.clamp(adv_inputs - inputs, min=-eps, max=eps)
        adv_inputs = torch.clamp(inputs + delta, min=0, max=1).detach()
        
    return adv_inputs

In [None]:
pgd_correct = 0
model.eval()
for inputs, labels in victim_loader:    
    inputs, labels = inputs.to(device), labels.to(device)
    adv_inputs = pgd_attack(inputs,labels)
    outputs = model(adv_inputs)
    _,predicted = torch.max(outputs.data,1)
    pgd_correct += (predicted == labels).sum().item()
pgd_accuracy = pgd_correct / len(test_loader.dataset)
print(f"Attack accuracy: {pgd_accuracy:.4f}")