# NetCLR Fine-tuning

In this notebook, we fine-tune the pre-trained base model of NetCLR in a closed world scenario. 

We evaluate NetCLR using two datasets: AWF and Drift datasets. 

N defines the number of labeled samples that we use for fine-tuning.  

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import warnings
warnings.filterwarnings('ignore')
import numpy as np

from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import RandomSampler, SequentialSampler
import torch
from torch import nn
import torch.nn.functional as F
from torch import optim
from torch.autograd import Variable
# from torchvision import datasets, transforms
import tqdm
import pickle
import argparse
from torch.cuda.amp import GradScaler, autocast

import random
import sys
import os
import collections

## GPU Allocation

In [2]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu", 0)
kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {}
print (f'Device: {device}')

Device: cuda:0


## Parameters

In [3]:
batch_size = 32

## Loading the Fine-tuning Datasets

In [4]:
DATASET = 'AWF' # 'Drift'

if DATASET == 'AWF':    
    data_path = '/path/to/AWF/fine-tuning-data' # AWF-attack
    data = pickle.load(open(f'{data_path}', 'rb'))
    
elif DATASET == 'Drift':
    data_path = '/path/to/Drift/fine-tuning-data' # Drift90
    data = pickle.load(open(f'{data_path}', 'rb'))

x_train_total = data['x_train']
y_train_total = data['y_train']
x_test_sup = data['x_test_fast']
y_test_sup = data['y_test_fast']
x_test_inf = data ['x_test_slow']
y_test_inf = data['y_test_slow']

num_classes = len(np.unique(y_train_total))
print ("Number of classes:", num_classes)

Number of classes: 69


In [5]:
print (f'Data shapes: {x_train_total.shape}, {x_test_sup.shape}, {x_test_inf.shape}')

Data shapes: (6847, 5000), (3450, 5000), (3450, 5000)


In [6]:
# This function randomly samples N traces per website
def sample_traces(x, y, N):
    train_index = []
    
    for c in range(num_classes):
        idx = np.where(y == c)[0]
        idx = np.random.choice(idx, min(N, len(idx)), False)
        train_index.extend(idx)
        
    train_index = np.array(train_index)
    np.random.shuffle(train_index)
    
    x_train = x[train_index]
    y_train = y[train_index]
    
    return x_train, y_train

## Backbone Model

In [7]:
class DFNet(nn.Module):
    def __init__(self, out_dim):
        super(DFNet, self).__init__()
        kernel_size = 8
        channels = [1, 32, 64, 128, 256]
        conv_stride = 1
        pool_stride = 4
        pool_size = 8
        
        self.conv1 = nn.Conv1d(1, 32, kernel_size, stride = conv_stride)
        self.conv1_1 = nn.Conv1d(32, 32, kernel_size, stride = conv_stride)
        
        self.conv2 = nn.Conv1d(32, 64, kernel_size, stride = conv_stride)
        self.conv2_2 = nn.Conv1d(64, 64, kernel_size, stride = conv_stride)
       
        self.conv3 = nn.Conv1d(64, 128, kernel_size, stride = conv_stride)
        self.conv3_3 = nn.Conv1d(128, 128, kernel_size, stride = conv_stride)
       
        self.conv4 = nn.Conv1d(128, 256, kernel_size, stride = conv_stride)
        self.conv4_4 = nn.Conv1d(256, 256, kernel_size, stride = conv_stride)
       
        
        self.batch_norm1 = nn.BatchNorm1d(32)
        self.batch_norm2 = nn.BatchNorm1d(64)
        self.batch_norm3 = nn.BatchNorm1d(128)
        self.batch_norm4 = nn.BatchNorm1d(256)
        
        self.max_pool_1 = nn.MaxPool1d(kernel_size=pool_size, stride=pool_stride)
        self.max_pool_2 = nn.MaxPool1d(kernel_size=pool_size, stride=pool_stride)
        self.max_pool_3 = nn.MaxPool1d(kernel_size=pool_size, stride=pool_stride)
        self.max_pool_4 = nn.MaxPool1d(kernel_size=pool_size, stride=pool_stride)
        
        self.dropout1 = nn.Dropout(p=0.1)
        self.dropout2 = nn.Dropout(p=0.1)
        self.dropout3 = nn.Dropout(p=0.1)
        self.dropout4 = nn.Dropout(p=0.1)
        
        
#         self.projection = nn.Sequential(
#             nn.Linear(5120, 512),
#             nn.ReLU(),
#             nn.Dropout(p=0.7),
#             nn.Linear(512, 512),
#             nn.BatchNorm1d(512),
#             nn.ReLU(),
#             nn.Dropout(p=0.5)
#         )
        
        self.fc = nn.Linear(5120, out_dim)

        
    def weight_init(self):
        for n, m in self.named_modules():
            if isinstance(m, nn.Linear) or isinstance(m, nn.Conv1d):
#                 m.weight.data.xavier_uniform_()
                print (n)
                torch.nn.init.xavier_uniform(m.weight)
                m.bias.data.zero_()
            
        
    def forward(self, inp):
        x = inp
        # ==== first block ====
        x = F.pad(x, (3,4))
        x = F.elu((self.conv1(x)))
        x = F.pad(x, (3,4))
        x = F.elu(self.batch_norm1(self.conv1_1(x)))
#         x = F.elu(self.conv1_1(x))
        x = F.pad(x, (3, 4))
        x = self.max_pool_1(x)
        x = self.dropout1(x)
        
        # ==== second block ====
        x = F.pad(x, (3,4))
        x = F.relu((self.conv2(x)))
        x = F.pad(x, (3,4))
        x = F.relu(self.batch_norm2(self.conv2_2(x)))
#         x = F.relu(self.conv2_2(x))
        x = F.pad(x, (3,4))
        x = self.max_pool_2(x)
        x = self.dropout2(x)
        
        # ==== third block ====
        x = F.pad(x, (3,4))
        x = F.relu((self.conv3(x)))
        x = F.pad(x, (3,4))
        x = F.relu(self.batch_norm3(self.conv3_3(x)))
#         x = F.relu(self.conv3_3(x))
        x = F.pad(x, (3,4))
        x = self.max_pool_3(x)
        x = self.dropout3(x)
        
        # ==== fourth block ====
        x = F.pad(x, (3,4))
        x = F.relu((self.conv4(x)))
        x = F.pad(x, (3,4))
        x = F.relu(self.batch_norm4(self.conv4_4(x)))
#         x = F.relu(self.conv4_4(x))
        x = F.pad(x, (3,4))
        x = self.max_pool_4(x)
        x = self.dropout4(x)

                
        x = x.view(x.size(0), -1)
        
#         x = self.projection(x)

        x = self.fc(x)
                
        return x    
        

In [8]:
class DFsimCLR(nn.Module):
    def __init__(self, df, out_dim):
        super(DFsimCLR, self).__init__()
        
        self.backbone = df
        self.backbone.weight_init()
        dim_mlp = self.backbone.fc.in_features
        self.backbone.fc = nn.Sequential(
            nn.Linear(dim_mlp, dim_mlp),
            nn.BatchNorm1d(dim_mlp),
            nn.ReLU(),
            nn.Linear(dim_mlp, out_dim)
        )
        
    def forward(self, inp):
        out = self.backbone(inp)
        return out

## Data Loader

In [9]:
class Data(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

## Loading the Pre-trained Model

In [10]:
def load_checkpoint():

    model = DFNet(out_dim=num_classes).to(device)

    checkpoint = torch.load('/path/to/pre-trained/model/')

    for k in list(checkpoint.keys()):
        if k.startswith('backbone.'):
            if k.startswith('backbone') and not k.startswith('backbone.fc'):
          # remove prefix
                checkpoint[k[len("backbone."):]] = checkpoint[k]
        del checkpoint[k]

    log = model.load_state_dict(checkpoint, strict=False)
    assert log.missing_keys == ['fc.weight', 'fc.bias']
    
    return model

## Initating Test Data Loaders

In [11]:
test_dataset_inf = Data(x_test_inf, y_test_inf)
test_loader_inf = DataLoader(test_dataset_inf, batch_size=batch_size, drop_last=True)

test_dataset_sup = Data(x_test_sup, y_test_sup)
test_loader_sup = DataLoader(test_dataset_sup, batch_size=batch_size, drop_last=True)

## Function for Train and Test

In [12]:
def train(model, device, train_loader, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.view(data.size(0), 1, data.size(1)).float().to(device)
        target = target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        # print (output.size())
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx%100 == 0:
            print ("Loss: {:0.6f}".format(loss.item()))
    
def test(model, device, loader):
    model.eval()
    correct = 0
    temp = 0
    with torch.no_grad():
        for data, target in loader:
            data = data.view(data.size(0), 1, data.size(1)).float().to(device)
            target = target.to(device)
            
            output = model(data)
            output = torch.softmax(output, dim=1)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).float().sum().item()
    return correct / len(loader.dataset)

## Running for 5 Times

In [13]:
# N defines the number of labeled samples we use to perform fine-tuning
N = 5

In [14]:
accuracies_inf = []
accuracies_sup = []
for _ in range(5): 
    x_train, y_train = sample_traces(x_train_total, y_train_total, N)
    
    print ("Input size:", x_train.shape, y_train.shape)
    
    train_dataset = Data(x_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    
    model = load_checkpoint()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    
    
    best_acc_inf = 0
    best_acc_sup = 0
    for epoch in range(31):
        print ('Epoch: ', epoch)
        train(model, device, train_loader, optimizer)
        
        acc_inf = test(model, device, test_loader_inf)
        acc_sup = test(model, device, test_loader_sup)
        
        best_acc_inf = max(best_acc_inf, acc_inf)
        best_acc_sup = max(best_acc_sup, acc_sup)
        
        if epoch%10 == 0:
            print (f"Accuracy on inferior dataset: {acc_inf*100:.2f}")
            print (f"Accuracy on superior dataset: {acc_sup*100:.2f}")
                
    accuracies_inf.append(best_acc_inf)
    accuracies_sup.append(best_acc_sup)
    
    
    print ('------------------------------------------------')

Input size: (345, 5000) (345,)
Epoch:  0
Loss: 4.332883
Accuracy on inferior dataset: 13.48
Accuracy on superior dataset: 15.30
Epoch:  1
Loss: 4.049552
Epoch:  2
Loss: 3.552467
Epoch:  3
Loss: 2.826560
Epoch:  4
Loss: 2.153902
Epoch:  5
Loss: 1.454585
Epoch:  6
Loss: 0.888584
Epoch:  7
Loss: 0.569001
Epoch:  8
Loss: 0.448165
Epoch:  9
Loss: 0.256711
Epoch:  10
Loss: 0.234288
Accuracy on inferior dataset: 78.17
Accuracy on superior dataset: 90.70
Epoch:  11
Loss: 0.146979
Epoch:  12
Loss: 0.167850
Epoch:  13
Loss: 0.091809
Epoch:  14
Loss: 0.092474
Epoch:  15
Loss: 0.073701
Epoch:  16
Loss: 0.073582
Epoch:  17
Loss: 0.065222
Epoch:  18
Loss: 0.048629
Epoch:  19
Loss: 0.048955
Epoch:  20
Loss: 0.043468
Accuracy on inferior dataset: 80.81
Accuracy on superior dataset: 91.59
Epoch:  21
Loss: 0.039777
Epoch:  22
Loss: 0.031476
Epoch:  23
Loss: 0.040288
Epoch:  24
Loss: 0.034849
Epoch:  25
Loss: 0.029477
Epoch:  26
Loss: 0.035239
Epoch:  27
Loss: 0.021454
Epoch:  28
Loss: 0.026317
Epoch:  2

In [15]:
accuracies_inf = np.array(accuracies_inf)
accuracies_sup = np.array(accuracies_sup)

print (f"Test accuracy on inferior traces: avg -> {np.mean(accuracies_inf)*100:.1f}, std -> {np.std(accuracies_inf)*100:.1f}")
print (f"Test accuracy on Superior traces: avg -> {np.mean(accuracies_sup)*100:.1f}, std -> {np.std(accuracies_sup)*100:.1f}")

Test accuracy on inferior traces: avg -> 79.8, std -> 1.0
Test accuracy on Superior traces: avg -> 90.5, std -> 1.1
