# NetFM

In this notebook, we implement NetFM, a WF attack based on semi-supervised learning.

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import warnings
warnings.filterwarnings('ignore')
import numpy as np

from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import RandomSampler, SequentialSampler
from torch.optim.lr_scheduler import LambdaLR
import torch
from torch import nn
import torch.nn.functional as F
from torch import optim
from torch.autograd import Variable
import tqdm
import pickle
import argparse
import random
import math
import os

import bisect

## GPU Allocation

In [2]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu", 0)
kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {}
print (f'Device: {device}')

Device: cuda:0


## Parameters

N defines the number of labeled samples per website.

mu defines the ratio of unlabeled data to labeled data.

In [3]:
batch_size = 32
N = 5
mu = 7
num_epoches = 100

data_path = '/path/to/AWF/data'

## Loading Data

In [4]:
r_train = 500.0 / 6.0
r_test = 100.0 / 6.0
num_classes = 100
mon_instance = 2498.0


def split_awf_closed(r_train, r_test, nClass, mon_instance, dim):
    mon_data = np.load(f'{data_path}/awf1.npz')
    mon_x = mon_data['feature']
    
    num_mtrain_instance = mon_instance * (r_train / (r_train + r_test))
    mon_random = np.array(range(int(mon_instance)))
    np.random.shuffle(mon_random)
    
    mon_train_ins = mon_random[:int(num_mtrain_instance)] #1666
    mon_test_ins = mon_random[int(num_mtrain_instance):]
    print('mon_test_ins', len(mon_test_ins))
    
    train_feature = np.zeros((nClass*len(mon_train_ins), dim), dtype=int)
    test_feature = np.zeros((nClass*len(mon_test_ins),dim), dtype=int)
    print('test_feature', len(test_feature))
    train_label = np.zeros((nClass*len(mon_train_ins),), dtype=int)
    test_label = np.zeros((nClass*len(mon_test_ins),), dtype=int)

    print(len(mon_train_ins))
    print(len(mon_test_ins))
    i = 0
    mon_instance = int(mon_instance)
    print('Monitored training set partitioning...')
    print(nClass)
    print(len(mon_train_ins))
    for c in range(nClass):
        c=int(c)
        # print(c)
        for instance in mon_train_ins:
            train_label[i] = c
            train_feature[i] = mon_x[(c*mon_instance)+instance][:dim]
            i += 1

    print('Monitored testing set partitioning...')
    j = 0
    for c in range(nClass):
        c = int(c)
        for instance in mon_test_ins:
            test_label[j]=c
            test_feature[j]=mon_x[(c*mon_instance)+instance][:dim]
            j += 1

    print('train_feature: ', len(train_feature))
    print('train_label: ', len(train_label))
    print('test_feature: ', len(test_feature))
    print('test_label: ', len(test_label))
    print('train_dim: ', len(train_feature[0]))
    print('test_dim: ', len(test_feature[0]))


    return train_feature, train_label, test_feature, test_label


x_train, y_train, x_test, y_test = split_awf_closed(r_train, r_test, num_classes, mon_instance, 5000)

mon_test_ins 417
test_feature 41700
2081
417
Monitored training set partitioning...
100
2081
Monitored testing set partitioning...
train_feature:  208100
train_label:  208100
test_feature:  41700
test_label:  41700
train_dim:  5000
test_dim:  5000


## Randomly Sample Indices for Labeled Data

In [5]:
def x_u_split():
    labels = y_train
    labeled_index = []
    unlabeled_index = np.array(range(len(labels)))
    for c in range(num_classes):
        idx = np.where(labels == c)[0]
        idx = np.random.choice(idx, N, False)
        labeled_index.extend(idx)
    labeled_index = np.array(labeled_index)
    np.random.shuffle(labeled_index)
    return labeled_index, unlabeled_index


def x_u_split_mu(mu):
    labels = y_train
    labeled_index = []
    unlabeled_index = []
    for c in range(num_classes):
        idx = np.where(labels == c)[0]
        idx_u = np.random.choice(idx, N*(mu+1), False)
        idx_l = idx_u[:N]
        labeled_index.extend(idx_l)
        unlabeled_index.extend(idx_u)
    labeled_index = np.array(labeled_index)
    unlabeled_index = np.array(unlabeled_index)
    np.random.shuffle(labeled_index)
    np.random.shuffle(unlabeled_index)
    return labeled_index, unlabeled_index


labeled_index, unlabeled_index = x_u_split_mu(mu)


## DF Model

In [6]:
class DFNet(nn.Module):
    def __init__(self, out_dim):
        super(DFNet, self).__init__()
        kernel_size = 8
        channels = [1, 32, 64, 128, 256]
        conv_stride = 1
        pool_stride = 4
        pool_size = 8
        
        
        self.conv1 = nn.Conv1d(1, 32, kernel_size, stride = conv_stride)
        self.conv1_1 = nn.Conv1d(32, 32, kernel_size, stride = conv_stride)
        
        self.conv2 = nn.Conv1d(32, 64, kernel_size, stride = conv_stride)
        self.conv2_2 = nn.Conv1d(64, 64, kernel_size, stride = conv_stride)
       
        self.conv3 = nn.Conv1d(64, 128, kernel_size, stride = conv_stride)
        self.conv3_3 = nn.Conv1d(128, 128, kernel_size, stride = conv_stride)
       
        self.conv4 = nn.Conv1d(128, 256, kernel_size, stride = conv_stride)
        self.conv4_4 = nn.Conv1d(256, 256, kernel_size, stride = conv_stride)
       
        
        self.batch_norm1 = nn.BatchNorm1d(32)
        self.batch_norm2 = nn.BatchNorm1d(64)
        self.batch_norm3 = nn.BatchNorm1d(128)
        self.batch_norm4 = nn.BatchNorm1d(256)
        
        self.max_pool_1 = nn.MaxPool1d(kernel_size=pool_size, stride=pool_stride)
        self.max_pool_2 = nn.MaxPool1d(kernel_size=pool_size, stride=pool_stride)
        self.max_pool_3 = nn.MaxPool1d(kernel_size=pool_size, stride=pool_stride)
        self.max_pool_4 = nn.MaxPool1d(kernel_size=pool_size, stride=pool_stride)
        
        
        
        self.dropout1 = nn.Dropout(p=0.1)
        self.dropout2 = nn.Dropout(p=0.1)
        self.dropout3 = nn.Dropout(p=0.1)
        self.dropout4 = nn.Dropout(p=0.1)
        
        
        self.projection = nn.Sequential(
            nn.Linear(5120, 512),
            nn.ReLU(),
            nn.Dropout(p=0.7),
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(p=0.5)
        )
        
        self.fc = nn.Linear(512, out_dim)

        
    def weight_init(self):
        for n, m in self.named_modules():
            if isinstance(m, nn.Linear) or isinstance(m, nn.Conv1d):
                torch.nn.init.xavier_uniform(m.weight)
                m.bias.data.zero_()
            
        
    def forward(self, inp):
        x = inp
        # ==== first block ====
        x = F.pad(x, (3,4))
        x = F.elu((self.conv1(x)))
        x = F.pad(x, (3,4))
        x = F.elu(self.batch_norm1(self.conv1_1(x)))
        x = F.pad(x, (3, 4))
        x = self.max_pool_1(x)
        x = self.dropout1(x)
        
        # ==== second block ====
        x = F.pad(x, (3,4))
        x = F.relu((self.conv2(x)))
        x = F.pad(x, (3,4))
        x = F.relu(self.batch_norm2(self.conv2_2(x)))
        x = F.pad(x, (3,4))
        x = self.max_pool_2(x)
        x = self.dropout2(x)
        
        # ==== third block ====
        x = F.pad(x, (3,4))
        x = F.relu((self.conv3(x)))
        x = F.pad(x, (3,4))
        x = F.relu(self.batch_norm3(self.conv3_3(x)))
        x = F.pad(x, (3,4))
        x = self.max_pool_3(x)
        x = self.dropout3(x)
        
        # ==== fourth block ====
        x = F.pad(x, (3,4))
        x = F.relu((self.conv4(x)))
        x = F.pad(x, (3,4))
        x = F.relu(self.batch_norm4(self.conv4_4(x)))
        x = F.pad(x, (3,4))
        x = self.max_pool_4(x)
        x = self.dropout4(x)

                
        x = x.view(x.size(0), -1)
        
        x = self.projection(x)
        
        x = self.fc(x)
        
        return x    
        

## NetAugment

In [7]:
def find_bursts(x):
    
    direction = x[0]
    bursts = []
    start = 0
    temp_burst = x[0]
    for i in range(1, len(x)):
        if x[i] == 0.0:
            break
        
        elif x[i] == direction:
            temp_burst += x[i]
            
        else:
            bursts.append((start, i, temp_burst))
            start = i
            temp_burst = x[i]
            direction *= -1
            
    return bursts

outgoing_burst_sizes = []

x_random = x_train[np.random.choice(range(len(x_train)), size=1000, replace=False)]



for x in x_random:
    bursts = find_bursts(x)
    
    outgoing_burst_sizes += [x[2] for x in bursts if x[2] > 0]

### Empirical Distribution of Outgoing Bursts

In [8]:
count, bins = np.histogram(outgoing_burst_sizes, bins=77)
PDF = count/np.sum(count)
OUTGOING_BURST_SIZE_CDF = np.zeros_like(bins)
OUTGOING_BURST_SIZE_CDF[1:] = np.cumsum(PDF)

In [9]:
class Augmentor():
    def __init__(self):
        methods = {
            'merge downstream burst',
            'change downstream burst sizes',
            'merge downstream and upstream bursts',
            'add upstream bursts',
            'remove upstrean bursts',
            'divide bursts'
        }
        
        rules = {
            'change content',
            
        }
        
        self.large_burst_threshold = 10
        
        # changing the content
        self.upsample_rate = 1.0
        self.downsample_rate = 0.5
        
        # merging bursts
        self.num_bursts_to_merge = 5
        self.merge_burst_rate = 0.1
        
        # add incoming bursts
        self.add_outgoing_burst_rate = 0.3
        self.outgoing_burst_sizes = list(range(73))
        
        # shift
        self.shift_param = 10
        
        
        
    def find_bursts(self, x):
        direction = x[0]
        bursts = []
        start = 0
        temp_burst = x[0]
        for i in range(1, len(x)):
            if x[i] == 0.0:
                break

            elif x[i] == direction:
                temp_burst += x[i]

            else:
                bursts.append((start, i, temp_burst))
                start = i
                temp_burst = x[i]
                direction *= -1

        return bursts
        
        
    # representing the change of website content
    def increase_incoming_bursts(self, burst_sizes):
        out = []
        for i, size in enumerate(burst_sizes):
            if size <= -self.large_burst_threshold:
                up_sample_rate = random.random()*self.upsample_rate
                new_size = int(size * (1+up_sample_rate))
                out.append(new_size)
            else:
                out.append(size)
                
        return out
        
        
    def decrease_incoming_bursts(self, burst_sizes):
        out = []
        for i, size in enumerate(burst_sizes):
            if size <= -self.large_burst_threshold:
                up_sample_rate = random.random()*self.downsample_rate
                new_size = int(size * (1-up_sample_rate))
                out.append(new_size)
            else:
                out.append(size)
                
        return out
        
        
    def change_content(self, trace):
        bursts = self.find_bursts(trace)
        burst_sizes = [x[2] for x in bursts]
        
        if len(trace) < 1000:
            new_burst_sizes = self.increase_incoming_bursts(burst_sizes)
            
        elif len(trace) > 4000:
            new_burst_sizes = self.decrease_incoming_bursts(burst_sizes)
            
        else:
            p = random.random()
            if p >= 0.5:
                new_burst_sizes = self.increase_incoming_bursts(burst_sizes)
                
            else:
                new_burst_sizes = self.decrease_incoming_bursts(burst_sizes)
                
                
        return new_burst_sizes
    
    
    def merge_incoming_bursts(self, burst_sizes):
        
        out = []
        
        # skipping first 20 cells
        i = 0
        num_cells = 0
        while i < len(burst_sizes) and num_cells < 20:
            num_cells += abs(burst_sizes[i])
            out.append(burst_sizes[i])
            i += 1
            
        
        while i < len(burst_sizes) - self.num_bursts_to_merge:
            prob = random.random()
            
            # ignore outgoing bursts
            if burst_sizes[i] > 0:
                out.append(burst_sizes[i])
                i+= 1
                continue
            
            if prob < self.merge_burst_rate:
                num_merges = random.randint(2, self.num_bursts_to_merge)
                merged_size = 0
                
                # merging the incoming bursts
                while i < len(burst_sizes) and num_merges > 0:
                    if burst_sizes[i] < 0:
                        merged_size += burst_sizes[i]
                        num_merges -= 1
                    i += 1     
                out.append(merged_size)
                    
            else:
                out.append(burst_sizes[i])
                i += 1
                
        return out
    
    
    def add_outgoing_burst(self, burst_sizes):
        
        out = []
        
        i = 0
        num_cells = 0
        while i < len(burst_sizes) and num_cells < 20:
            num_cells += abs(burst_sizes[i])
            out.append(burst_sizes[i])
            i += 1
            
        
        for size in burst_sizes[i:]:
            if size > -10 :
                out.append(size)
                continue
            
            prob = random.random()
            
            if prob < self.add_outgoing_burst_rate:
                outgoing_burst_prob = random.random()
                index = bisect.bisect_left(OUTGOING_BURST_SIZE_CDF, outgoing_burst_prob)
                # print (index)
                outgoing_burst_size = self.outgoing_burst_sizes[index]
                # print (outgoing_burst_size)
                divide_place = random.randint(3, abs(size) - 3)
                
                out += [-divide_place, outgoing_burst_size, -(abs(size) - divide_place)]
                
            else:
                out.append(size)
                
        return out
                
        
    def create_trace_from_burst_sizes(self, burst_sizes):
        out = []
        
        for size in burst_sizes:
            val = 1 if size > 0 else -1
            
            out += [val]*(int(abs(size)))
            
        if len(out) < 5000:
            out += [0]*(5000 - len(out))
            
        return np.array(out)[:5000]
    
    def shift(self, x):
        pad = np.random.randint(0, 2, size = (self.shift_param, ))
        pad = 2*pad-1
        zpad = np.zeros_like(pad)
        
        shift_val = np.random.randint(-self.shift_param, self.shift_param+1, 1)[0]
        shifted = np.concatenate((x, zpad, pad), axis=-1)
        shifted = np.roll(shifted, shift_val, axis=-1)
        shifted = shifted[:5000]
        
        return shifted
        
    
    def augment(self, trace):
        
        mapping = {
            0: self.change_content,
            1: self.merge_incoming_bursts,
            2: self.add_outgoing_burst
        }
        
        bursts = self.find_bursts(trace)
        
        burst_sizes = [x[2] for x in bursts]
        
        
        aug_method = mapping[random.randint(0, len(mapping)-1)]
        
        augmented_sizes = aug_method(burst_sizes)
        
        augmented_trace = self.create_trace_from_burst_sizes(augmented_sizes)
        
        return self.shift(augmented_trace)

## Weak Augmentor: Random Direction Flips

For weak augmentation in NetFM, we just randomly flip the direction of Tor cells with 0.1 probability.

In [10]:
class WeakAugmentor():
    def __init__(self, param):
        self.param = param
        
    def augment(self, x):
        z = np.random.uniform(size=(5000, ))
        
        noised = np.where(z > self.param, x, -x)
        
        return noised

## Data Loaders

In [11]:
class LabeledData(Dataset):
    def __init__(self, x_train, y_train, idx, weak_augmentor):
        self.x = x_train[idx]
        self.y = y_train[idx]
        self.weak_augmentor = weak_augmentor
        
    def __getitem__(self, index):
        return self.weak_augmentor.augment(self.x[index]), self.y[index]
    
    def __len__(self):
        return len(self.x)
    
class TestData(Dataset):
    def __init__(self, x_test, y_test):
        self.x = x_test
        self.y = y_test
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)
    
class ValidData(Dataset):
    def __init__(self, x_valid, y_valid):
        self.x = x_valid
        self.y = y_valid
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)
    
class UnLabeledData(Dataset):
    def __init__(self, x_train, idx, weak_augmentor, strong_augmentor):
        self.x = x_train[idx]
        self.weak_augmentor = weak_augmentor
        self.strong_augmentor = strong_augmentor    
    
    def __getitem__(self, index):
        return self.weak_augmentor.augment(self.x[index]), self.strong_augmentor.augment(self.x[index])
    
    def __len__(self):
        return len(self.x)
        

In [12]:
weak_augmentor = WeakAugmentor(0.1)
strong_augmentor = Augmentor()

In [13]:
labeled_data = LabeledData(x_train, y_train, labeled_index, weak_augmentor)
labeled_loader = DataLoader(labeled_data, batch_size=batch_size, shuffle=True, drop_last=True)

unlabeled_data = UnLabeledData(x_train, unlabeled_index, weak_augmentor, strong_augmentor)
unlabeled_loader = DataLoader(unlabeled_data, batch_size=(mu+1)*batch_size, shuffle=True, drop_last=True)

test_data = TestData(x_test, y_test)
test_loader = DataLoader(test_data, batch_size=batch_size)

## NetFM Train Function

In the NetFM train function, we generate pseudo labels using weakly augmented traces. We then use the pseudo labels as the acutal label of strongly augmented traces.

In [14]:
def train(model, device, labeled_loader, unlabeled_loader, optimizer, schedular):
    model.train()
    labeled_iterator = iter(labeled_loader)
    for batch_idx, (x_w, x_s) in enumerate(unlabeled_loader):
        try:
            x_l, target = next(labeled_iterator)
        except StopIteration:
            labeled_iterator = iter(labeled_loader)
            x_l, target = next(labeled_iterator)
        
        x_w = x_w.view(x_w.size(0), 1, x_w.size(1)).float().to(device)
        x_s = x_s.view(x_s.size(0), 1, x_s.size(1)).float().to(device)
        x_l = x_l.view(x_l.size(0), 1, x_l.size(1)).float().to(device)
        
        target = target.to(device)

        inputs = torch.cat((x_l, x_w, x_s))
        optimizer.zero_grad()
        logits = model(inputs)
        logits_x = logits[:batch_size]

        logits_w, logits_s = logits[batch_size:].chunk(2)

        del logits

        Lx = F.cross_entropy(logits_x, target)

        pseudo_label = torch.softmax(logits_w.detach()/T, dim=-1)
        max_probs, targets_u = torch.max(pseudo_label, dim=-1)
        mask = max_probs.ge(threshold).float()
        
        Lu = (F.cross_entropy(logits_s, targets_u, reduction='none')*mask).mean()

        loss = Lx + lambda_u*Lu

        loss.backward()
        optimizer.step()
        schedular.step()
        

    return (Lx.item(), Lu.item(), loss.item())

        
        
def test(model, device, loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in loader:
            data = data.view(data.size(0), 1, data.size(1)).float().to(device)
            target = target.to(device)
            output = model(data)
            output = torch.softmax(output, dim=1)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).float().sum().item()

    return correct / len(loader.dataset)

## Cosine Scheduler

According to original implementation of FixMatch, we add a cosine scheduler to NetFM optimizer

In [15]:
def get_cosine_schedule_with_warmup(optimizer,
                                    num_warmup_steps,
                                    num_training_steps,
                                    num_cycles=7./16.,
                                    last_epoch=-1):
    
    def _lr_lambda(current_step):
        if current_step < num_warmup_steps:
            return float(current_step) / float(max(1, num_warmup_steps))
        no_progress = float(current_step - num_warmup_steps) / \
            float(max(1, num_training_steps - num_warmup_steps))
        return max(0., math.cos(math.pi * num_cycles * no_progress))

    return LambdaLR(optimizer, _lr_lambda, last_epoch)

In [16]:
model = DFNet(out_dim=num_classes).to(device)
model.weight_init()
optimizer = optim.SGD(model.parameters(),lr = 0.01, momentum=0.9, nesterov=True)
schedular = get_cosine_schedule_with_warmup(optimizer, 0, num_epoches*(len(unlabeled_data)/batch_size))

In [17]:
T = 1
threshold = 0.95
lambda_u = 1

In [18]:
num_epochs = 101
best_acc = 0
for epoch in tqdm.tqdm(range(num_epochs)):
    lx, lu, loss = train(model, device, labeled_loader, unlabeled_loader, optimizer, schedular)
    # acc = test(model, device, test_loader)
    print ('======== Epoch %d ========'%epoch)
    print ('Lx: ', lx, 'Lu: ', lu, 'loss: ', loss)
        

  0%|          | 0/101 [00:19<?, ?it/s]

KeyboardInterrupt

