# The notebook contains
### Code for _Multi-krum_ aggregation algorithm
### Evaluation of all of the attacks (Fang, LIE, and our SOTA AGR-tailored and AGR-agnstic) on Multi-krum

In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [1]:
from __future__ import print_function
import argparse, os, sys, csv, shutil, time, random, operator, pickle, ast, math
import numpy as np
import pandas as pd
from torch.optim import Optimizer
import torch.nn.functional as F
import torch
import pickle
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data as data
import torch.multiprocessing as mp

sys.path.insert(0,'./../utils/')
from logger import *
from eval import *
from misc import *

from cifar10_normal_train import *
from cifar10_util import *
from adam import Adam
from sgd import SGD

## Get cifar10 data and split it in IID fashion among 50 clients

In [4]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets
data_loc='/mnt/nfs/work1/amir/vshejwalkar/cifar10_data/'
# load the train dataset

train_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

cifar10_train = datasets.CIFAR10(root=data_loc, train=True, download=True, transform=train_transform)

cifar10_test = datasets.CIFAR10(root=data_loc, train=False, download=True, transform=train_transform)

X=[]
Y=[]
for i in range(len(cifar10_train)):
    X.append(cifar10_train[i][0].numpy())
    Y.append(cifar10_train[i][1])

for i in range(len(cifar10_test)):
    X.append(cifar10_test[i][0].numpy())
    Y.append(cifar10_test[i][1])

X=np.array(X)
Y=np.array(Y)

print('total data len: ',len(X))

if not os.path.isfile('./cifar10_shuffle.pkl'):
    all_indices = np.arange(len(X))
    np.random.shuffle(all_indices)
    pickle.dump(all_indices,open('./cifar10_shuffle.pkl','wb'))
else:
    all_indices=pickle.load(open('./cifar10_shuffle.pkl','rb'))

X=X[all_indices]
Y=Y[all_indices]

Files already downloaded and verified
Files already downloaded and verified


In [6]:
# data loading

nusers=50
user_tr_len=1000

total_tr_len=user_tr_len*nusers
val_len=5000
te_len=5000

print('total data len: ',len(X))

if not os.path.isfile('./cifar10_shuffle.pkl'):
    all_indices = np.arange(len(X))
    np.random.shuffle(all_indices)
    pickle.dump(all_indices,open('./cifar10_shuffle.pkl','wb'))
else:
    all_indices=pickle.load(open('./cifar10_shuffle.pkl','rb'))

total_tr_data=X[:total_tr_len]
total_tr_label=Y[:total_tr_len]

val_data=X[total_tr_len:(total_tr_len+val_len)]
val_label=Y[total_tr_len:(total_tr_len+val_len)]

te_data=X[(total_tr_len+val_len):(total_tr_len+val_len+te_len)]
te_label=Y[(total_tr_len+val_len):(total_tr_len+val_len+te_len)]

total_tr_data_tensor=torch.from_numpy(total_tr_data).type(torch.FloatTensor)
total_tr_label_tensor=torch.from_numpy(total_tr_label).type(torch.LongTensor)

val_data_tensor=torch.from_numpy(val_data).type(torch.FloatTensor)
val_label_tensor=torch.from_numpy(val_label).type(torch.LongTensor)

te_data_tensor=torch.from_numpy(te_data).type(torch.FloatTensor)
te_label_tensor=torch.from_numpy(te_label).type(torch.LongTensor)

print('total tr len %d | val len %d | test len %d'%(len(total_tr_data_tensor),len(val_data_tensor),len(te_data_tensor)))

#==============================================================================================================

user_tr_data_tensors=[]
user_tr_label_tensors=[]

for i in range(nusers):
    
    user_tr_data_tensor=torch.from_numpy(total_tr_data[user_tr_len*i:user_tr_len*(i+1)]).type(torch.FloatTensor)
    user_tr_label_tensor=torch.from_numpy(total_tr_label[user_tr_len*i:user_tr_len*(i+1)]).type(torch.LongTensor)

    user_tr_data_tensors.append(user_tr_data_tensor)
    user_tr_label_tensors.append(user_tr_label_tensor)
    print('user %d tr len %d'%(i,len(user_tr_data_tensor)))

total data len:  60000
total tr len 50000 | val len 5000 | test len 5000
user 0 tr len 1000
user 1 tr len 1000
user 2 tr len 1000
user 3 tr len 1000
user 4 tr len 1000
user 5 tr len 1000
user 6 tr len 1000
user 7 tr len 1000
user 8 tr len 1000
user 9 tr len 1000
user 10 tr len 1000
user 11 tr len 1000
user 12 tr len 1000
user 13 tr len 1000
user 14 tr len 1000
user 15 tr len 1000
user 16 tr len 1000
user 17 tr len 1000
user 18 tr len 1000
user 19 tr len 1000
user 20 tr len 1000
user 21 tr len 1000
user 22 tr len 1000
user 23 tr len 1000
user 24 tr len 1000
user 25 tr len 1000
user 26 tr len 1000
user 27 tr len 1000
user 28 tr len 1000
user 29 tr len 1000
user 30 tr len 1000
user 31 tr len 1000
user 32 tr len 1000
user 33 tr len 1000
user 34 tr len 1000
user 35 tr len 1000
user 36 tr len 1000
user 37 tr len 1000
user 38 tr len 1000
user 39 tr len 1000
user 40 tr len 1000
user 41 tr len 1000
user 42 tr len 1000
user 43 tr len 1000
user 44 tr len 1000
user 45 tr len 1000
user 46 tr len 10

## Code for Multi-krum aggregation

In [None]:
def multi_krum(all_updates, n_attackers, multi_k=False):

    candidates = []
    candidate_indices = []
    remaining_updates = all_updates
    all_indices = np.arange(len(all_updates))

    while len(remaining_updates) > 2 * n_attackers + 2:
        torch.cuda.empty_cache()
        distances = []
        for update in remaining_updates:
            distance = []
            for update_ in remaining_updates:
                distance.append(torch.norm((update - update_)) ** 2)
            distance = torch.Tensor(distance).float()
            distances = distance[None, :] if not len(distances) else torch.cat((distances, distance[None, :]), 0)

        distances = torch.sort(distances, dim=1)[0]
        scores = torch.sum(distances[:, :len(remaining_updates) - 2 - n_attackers], dim=1)
        indices = torch.argsort(scores)[:len(remaining_updates) - 2 - n_attackers]

        candidate_indices.append(all_indices[indices[0].cpu().numpy()])
        all_indices = np.delete(all_indices, indices[0].cpu().numpy())
        candidates = remaining_updates[indices[0]][None, :] if not len(candidates) else torch.cat((candidates, remaining_updates[indices[0]][None, :]), 0)
        remaining_updates = torch.cat((remaining_updates[:indices[0]], remaining_updates[indices[0] + 1:]), 0)
        if not multi_k:
            break
    # print(len(remaining_updates))

    aggregate = torch.mean(candidates, dim=0)

    return aggregate, np.array(candidate_indices)

## Code for Fang attack on Multi-krum

In [7]:
def compute_lambda_fang(all_updates, model_re, n_attackers):

    distances = []
    n_benign, d = all_updates.shape
    for update in all_updates:
        distance = torch.norm((all_updates - update), dim=1)
        distances = distance[None, :] if not len(distances) else torch.cat((distances, distance[None, :]), 0)

    distances[distances == 0] = 10000
    distances = torch.sort(distances, dim=1)[0]
    scores = torch.sum(distances[:, :n_benign - 2 - n_attackers], dim=1)
    min_score = torch.min(scores)
    term_1 = min_score / ((n_benign - n_attackers - 1) * torch.sqrt(torch.Tensor([d]))[0])
    max_wre_dist = torch.max(torch.norm((all_updates - model_re), dim=1)) / (torch.sqrt(torch.Tensor([d]))[0])

    return (term_1 + max_wre_dist)


def get_malicious_updates_fang(all_updates, model_re, deviation, n_attackers):

    lamda = compute_lambda_fang(all_updates, model_re, n_attackers)
    threshold = 1e-5

    mal_updates = []
    while lamda > threshold:
        mal_update = (- lamda * deviation)

        mal_updates = torch.stack([mal_update] * n_attackers)
        mal_updates = torch.cat((mal_updates, all_updates), 0)

        agg_grads, krum_candidate = multi_krum(mal_updates, n_attackers, multi_k=False)
        
        if krum_candidate < n_attackers:
            return mal_updates
        
        lamda *= 0.5

    if not len(mal_updates):
        print(lamda, threshold)
        mal_update = (model_re - lamda * deviation)
        
        mal_updates = torch.stack([mal_update] * n_attackers)
        mal_updates = torch.cat((mal_updates, all_updates), 0)

    return mal_updates

In [10]:
batch_size=250
resume=0
nepochs=1200
schedule=[1000]
nbatches = user_tr_len//batch_size

gamma=.5
opt = 'sgd'
fed_lr=0.5
criterion=nn.CrossEntropyLoss()
use_cuda = torch.cuda.is_available()

aggregation='mkrum'
multi_k = False
candidates = []

at_type='fang'
z=0
n_attackers=[10]

arch='alexnet'
chkpt='./'+aggregation

for n_attacker in n_attackers:
    epoch_num = 0
    best_global_acc = 0
    best_global_te_acc = 0

    torch.cuda.empty_cache()
    r=np.arange(user_tr_len)

    fed_model, _ = return_model(arch, 0.1, 0.9, parallel=False)
    optimizer_fed = SGD(fed_model.parameters(), lr=fed_lr)

    while epoch_num <= nepochs:
        user_grads=[]
        if not epoch_num and epoch_num%nbatches == 0:
            np.random.shuffle(r)
            for i in range(nusers):
                user_tr_data_tensors[i]=user_tr_data_tensors[i][r]
                user_tr_label_tensors[i]=user_tr_label_tensors[i][r]

        for i in range(n_attacker, nusers):

            inputs = user_tr_data_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]
            targets = user_tr_label_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]

            inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

            outputs = fed_model(inputs)
            loss = criterion(outputs, targets)
            fed_model.zero_grad()
            loss.backward(retain_graph=True)

            param_grad=[]
            for param in fed_model.parameters():
                param_grad=param.grad.data.view(-1) if not len(param_grad) else torch.cat((param_grad,param.grad.view(-1)))

            user_grads=param_grad[None, :] if len(user_grads)==0 else torch.cat((user_grads,param_grad[None,:]), 0)

        malicious_grads = user_grads

        if epoch_num in schedule:
            for param_group in optimizer_fed.param_groups:
                param_group['lr'] *= gamma
                print('New learnin rate ', param_group['lr'])

        if n_attacker > 0:
            if at_type == 'paf':
                malicious_grads=get_malicious_predictions_poison_all_far_sign(malicious_grads,nusers,n_attacker)
            elif at_type == 'lie':
                malicious_grads = get_malicious_updates_lie(malicious_grads, n_attacker, z, epoch_num)
            elif at_type == 'fang':
                agg_grads = torch.mean(malicious_grads, 0)
                deviation = torch.sign(agg_grads)
                malicious_grads = get_malicious_updates_fang(malicious_grads, agg_grads, deviation, n_attacker)
            elif at_type == 'our':
                agg_grads = torch.mean(malicious_grads, 0)
                malicious_grads = our_attack_krum(malicious_grads, agg_grads, n_attacker, compression, q_level, norm)

        if not epoch_num:
            print(malicious_grads.shape)
            
        if aggregation=='median':
            agg_grads=torch.median(malicious_grads,dim=0)[0]

        elif aggregation=='average':
            agg_grads=torch.mean(malicious_grads,dim=0)

        elif aggregation=='krum' or aggregation=='mkrum':

            multi_k = True if aggregation == 'mkrum' else False
            if epoch_num == 0:
                print('multi krum is ', multi_k)

            agg_grads, krum_candidate = multi_krum(malicious_grads, n_attacker, multi_k=multi_k)

        elif aggregation=='bulyan':
            agg_grads,bulyan_candidate=bulyan(malicious_grads, n_attacker)

            print(np.sum(bulyan_candidate<n_attacker))

            if n_attacker:
                if epoch_num > 0 and (epoch_num%50==0 or epoch_num == (nepochs-1)):
                    try:
                        print('number of malicious grads chosen are ', np.array(candidates).reshape(5, 10))
                    except:
                        print('number of malicious grads chosen are ', np.array(candidates))
                    candidates = []
                    candidates.append(np.sum(bulyan_candidate<n_attacker))
                else:
                    candidates.append(np.sum(bulyan_candidate<n_attacker))

        del user_grads

        start_idx=0

        optimizer_fed.zero_grad()

        model_grads=[]

        for i, param in enumerate(fed_model.parameters()):
            param_=agg_grads[start_idx:start_idx+len(param.data.view(-1))].reshape(param.data.shape)
            start_idx=start_idx+len(param.data.view(-1))
            param_=param_.cuda()
            model_grads.append(param_)

        optimizer_fed.step(model_grads)

        val_loss, val_acc = test(val_data_tensor,val_label_tensor,fed_model,criterion,use_cuda)
        te_loss, te_acc = test(te_data_tensor,te_label_tensor, fed_model, criterion, use_cuda)

        is_best = best_global_acc < val_acc

        best_global_acc = max(best_global_acc, val_acc)

        if is_best:
            best_global_te_acc = te_acc

        if epoch_num%1==0 or epoch_num==nepochs-1:
            print('%s: at %s n_at %d n_mal_sel %d e %d val loss %.4f val acc %.4f best val_acc %f te_acc %f'%(aggregation, at_type, n_attacker, np.sum(krum_candidate < n_attacker), epoch_num, val_loss, val_acc, best_global_acc,best_global_te_acc))

        epoch_num+=1

torch.Size([50, 2472266])
multi krum is  True
mkrum: at fang n_at 10 n_mal_sel 9 e 0 val loss 2.3026 val acc 9.9838 best val_acc 9.983766 te_acc 9.618506
mkrum: at fang n_at 10 n_mal_sel 10 e 1 val loss 2.3024 val acc 10.3084 best val_acc 10.308442 te_acc 9.882305
mkrum: at fang n_at 10 n_mal_sel 10 e 2 val loss 2.3022 val acc 10.7752 best val_acc 10.775162 te_acc 10.450487
mkrum: at fang n_at 10 n_mal_sel 10 e 3 val loss 2.3019 val acc 10.5519 best val_acc 10.775162 te_acc 10.450487
mkrum: at fang n_at 10 n_mal_sel 9 e 4 val loss 2.3017 val acc 11.7695 best val_acc 11.769481 te_acc 11.444805
mkrum: at fang n_at 10 n_mal_sel 10 e 5 val loss 2.3015 val acc 12.4594 best val_acc 12.459416 te_acc 12.134740
mkrum: at fang n_at 10 n_mal_sel 10 e 6 val loss 2.3013 val acc 13.2102 best val_acc 13.210227 te_acc 13.311688
mkrum: at fang n_at 10 n_mal_sel 10 e 7 val loss 2.3010 val acc 13.1088 best val_acc 13.210227 te_acc 13.311688
mkrum: at fang n_at 10 n_mal_sel 8 e 8 val loss 2.3007 val acc 1

mkrum: at fang n_at 10 n_mal_sel 10 e 73 val loss 2.1576 val acc 19.8255 best val_acc 24.675325 te_acc 24.452110
mkrum: at fang n_at 10 n_mal_sel 10 e 74 val loss 2.1515 val acc 19.8661 best val_acc 24.675325 te_acc 24.452110
mkrum: at fang n_at 10 n_mal_sel 10 e 75 val loss 2.1457 val acc 21.4692 best val_acc 24.675325 te_acc 24.452110
mkrum: at fang n_at 10 n_mal_sel 10 e 76 val loss 2.1443 val acc 19.2167 best val_acc 24.675325 te_acc 24.452110
mkrum: at fang n_at 10 n_mal_sel 10 e 77 val loss 2.1479 val acc 20.3937 best val_acc 24.675325 te_acc 24.452110
mkrum: at fang n_at 10 n_mal_sel 8 e 78 val loss 2.1449 val acc 17.9383 best val_acc 24.675325 te_acc 24.452110
mkrum: at fang n_at 10 n_mal_sel 6 e 79 val loss 2.1624 val acc 18.2427 best val_acc 24.675325 te_acc 24.452110
mkrum: at fang n_at 10 n_mal_sel 5 e 80 val loss 2.1635 val acc 17.6745 best val_acc 24.675325 te_acc 24.452110
mkrum: at fang n_at 10 n_mal_sel 4 e 81 val loss 2.2458 val acc 16.5179 best val_acc 24.675325 te_a

mkrum: at fang n_at 10 n_mal_sel 5 e 146 val loss 2.2232 val acc 15.2597 best val_acc 25.446429 te_acc 23.802760
mkrum: at fang n_at 10 n_mal_sel 4 e 147 val loss 2.1675 val acc 18.0804 best val_acc 25.446429 te_acc 23.802760
mkrum: at fang n_at 10 n_mal_sel 6 e 148 val loss 2.1218 val acc 18.1412 best val_acc 25.446429 te_acc 23.802760
mkrum: at fang n_at 10 n_mal_sel 6 e 149 val loss 2.1071 val acc 20.7995 best val_acc 25.446429 te_acc 23.802760
mkrum: at fang n_at 10 n_mal_sel 7 e 150 val loss 2.1072 val acc 20.7183 best val_acc 25.446429 te_acc 23.802760
mkrum: at fang n_at 10 n_mal_sel 5 e 151 val loss 2.1277 val acc 22.1591 best val_acc 25.446429 te_acc 23.802760
mkrum: at fang n_at 10 n_mal_sel 5 e 152 val loss 2.2448 val acc 11.9724 best val_acc 25.446429 te_acc 23.802760
mkrum: at fang n_at 10 n_mal_sel 2 e 153 val loss 2.0940 val acc 19.8458 best val_acc 25.446429 te_acc 23.802760
mkrum: at fang n_at 10 n_mal_sel 5 e 154 val loss 2.0397 val acc 25.1218 best val_acc 25.446429 

mkrum: at fang n_at 10 n_mal_sel 8 e 219 val loss 1.8986 val acc 32.1226 best val_acc 32.122565 te_acc 32.081981
mkrum: at fang n_at 10 n_mal_sel 8 e 220 val loss 1.9475 val acc 29.0179 best val_acc 32.122565 te_acc 32.081981
mkrum: at fang n_at 10 n_mal_sel 4 e 221 val loss 1.9669 val acc 30.6818 best val_acc 32.122565 te_acc 32.081981
mkrum: at fang n_at 10 n_mal_sel 4 e 222 val loss 2.1928 val acc 19.6226 best val_acc 32.122565 te_acc 32.081981
mkrum: at fang n_at 10 n_mal_sel 2 e 223 val loss 1.9749 val acc 27.8003 best val_acc 32.122565 te_acc 32.081981
mkrum: at fang n_at 10 n_mal_sel 7 e 224 val loss 1.8959 val acc 30.6818 best val_acc 32.122565 te_acc 32.081981
mkrum: at fang n_at 10 n_mal_sel 8 e 225 val loss 1.8584 val acc 32.0414 best val_acc 32.122565 te_acc 32.081981
mkrum: at fang n_at 10 n_mal_sel 10 e 226 val loss 1.8589 val acc 33.1981 best val_acc 33.198052 te_acc 33.035714
mkrum: at fang n_at 10 n_mal_sel 9 e 227 val loss 1.8621 val acc 31.5747 best val_acc 33.198052

mkrum: at fang n_at 10 n_mal_sel 3 e 292 val loss 2.1892 val acc 17.2687 best val_acc 33.928571 te_acc 33.969156
mkrum: at fang n_at 10 n_mal_sel 3 e 293 val loss 2.3057 val acc 10.5317 best val_acc 33.928571 te_acc 33.969156
mkrum: at fang n_at 10 n_mal_sel 3 e 294 val loss 2.2515 val acc 11.8709 best val_acc 33.928571 te_acc 33.969156
mkrum: at fang n_at 10 n_mal_sel 2 e 295 val loss 2.1421 val acc 19.3994 best val_acc 33.928571 te_acc 33.969156
mkrum: at fang n_at 10 n_mal_sel 3 e 296 val loss 2.0399 val acc 24.2492 best val_acc 33.928571 te_acc 33.969156
mkrum: at fang n_at 10 n_mal_sel 10 e 297 val loss 2.0281 val acc 24.0869 best val_acc 33.928571 te_acc 33.969156
mkrum: at fang n_at 10 n_mal_sel 10 e 298 val loss 2.0192 val acc 24.2898 best val_acc 33.928571 te_acc 33.969156
mkrum: at fang n_at 10 n_mal_sel 10 e 299 val loss 2.0123 val acc 24.0463 best val_acc 33.928571 te_acc 33.969156
mkrum: at fang n_at 10 n_mal_sel 10 e 300 val loss 2.0078 val acc 23.2955 best val_acc 33.928

mkrum: at fang n_at 10 n_mal_sel 4 e 365 val loss 2.1153 val acc 20.3937 best val_acc 34.375000 te_acc 34.212662
mkrum: at fang n_at 10 n_mal_sel 2 e 366 val loss 1.8437 val acc 29.2817 best val_acc 34.375000 te_acc 34.212662
mkrum: at fang n_at 10 n_mal_sel 8 e 367 val loss 1.7937 val acc 33.9489 best val_acc 34.375000 te_acc 34.212662
mkrum: at fang n_at 10 n_mal_sel 10 e 368 val loss 1.7795 val acc 35.1055 best val_acc 35.105519 te_acc 35.369318
mkrum: at fang n_at 10 n_mal_sel 10 e 369 val loss 1.8054 val acc 32.3255 best val_acc 35.105519 te_acc 35.369318
mkrum: at fang n_at 10 n_mal_sel 6 e 370 val loss 1.8872 val acc 31.2703 best val_acc 35.105519 te_acc 35.369318
mkrum: at fang n_at 10 n_mal_sel 4 e 371 val loss 2.0466 val acc 24.6347 best val_acc 35.105519 te_acc 35.369318
mkrum: at fang n_at 10 n_mal_sel 4 e 372 val loss 1.8706 val acc 30.4789 best val_acc 35.105519 te_acc 35.369318
mkrum: at fang n_at 10 n_mal_sel 5 e 373 val loss 1.7626 val acc 34.1924 best val_acc 35.10551

mkrum: at fang n_at 10 n_mal_sel 3 e 438 val loss 1.7504 val acc 33.5633 best val_acc 40.300325 te_acc 39.549513
mkrum: at fang n_at 10 n_mal_sel 5 e 439 val loss 1.8546 val acc 30.0122 best val_acc 40.300325 te_acc 39.549513
mkrum: at fang n_at 10 n_mal_sel 5 e 440 val loss 1.7290 val acc 36.1201 best val_acc 40.300325 te_acc 39.549513
mkrum: at fang n_at 10 n_mal_sel 5 e 441 val loss 1.9007 val acc 30.8847 best val_acc 40.300325 te_acc 39.549513
mkrum: at fang n_at 10 n_mal_sel 5 e 442 val loss 1.7120 val acc 35.3084 best val_acc 40.300325 te_acc 39.549513
mkrum: at fang n_at 10 n_mal_sel 7 e 443 val loss 1.6158 val acc 40.0771 best val_acc 40.300325 te_acc 39.549513
mkrum: at fang n_at 10 n_mal_sel 10 e 444 val loss 1.5823 val acc 41.7005 best val_acc 41.700487 te_acc 40.604708
mkrum: at fang n_at 10 n_mal_sel 10 e 445 val loss 1.5684 val acc 41.2135 best val_acc 41.700487 te_acc 40.604708
mkrum: at fang n_at 10 n_mal_sel 10 e 446 val loss 1.5657 val acc 42.0860 best val_acc 42.0860

mkrum: at fang n_at 10 n_mal_sel 5 e 511 val loss 1.6267 val acc 41.2744 best val_acc 44.744318 te_acc 44.338474
mkrum: at fang n_at 10 n_mal_sel 6 e 512 val loss 1.7636 val acc 36.0390 best val_acc 44.744318 te_acc 44.338474
mkrum: at fang n_at 10 n_mal_sel 6 e 513 val loss 1.6948 val acc 37.2159 best val_acc 44.744318 te_acc 44.338474
mkrum: at fang n_at 10 n_mal_sel 5 e 514 val loss 1.7629 val acc 36.3839 best val_acc 44.744318 te_acc 44.338474
mkrum: at fang n_at 10 n_mal_sel 6 e 515 val loss 1.6852 val acc 36.5869 best val_acc 44.744318 te_acc 44.338474
mkrum: at fang n_at 10 n_mal_sel 8 e 516 val loss 1.5395 val acc 43.5471 best val_acc 44.744318 te_acc 44.338474
mkrum: at fang n_at 10 n_mal_sel 9 e 517 val loss 1.5051 val acc 44.9675 best val_acc 44.967532 te_acc 44.947240
mkrum: at fang n_at 10 n_mal_sel 10 e 518 val loss 1.4882 val acc 45.6372 best val_acc 45.637175 te_acc 45.556006
mkrum: at fang n_at 10 n_mal_sel 10 e 519 val loss 1.4768 val acc 45.7792 best val_acc 45.77922

mkrum: at fang n_at 10 n_mal_sel 8 e 584 val loss 1.4897 val acc 45.1907 best val_acc 46.834416 te_acc 46.550325
mkrum: at fang n_at 10 n_mal_sel 9 e 585 val loss 1.4994 val acc 45.6575 best val_acc 46.834416 te_acc 46.550325
mkrum: at fang n_at 10 n_mal_sel 9 e 586 val loss 1.4587 val acc 46.0227 best val_acc 46.834416 te_acc 46.550325
mkrum: at fang n_at 10 n_mal_sel 10 e 587 val loss 1.4921 val acc 45.5560 best val_acc 46.834416 te_acc 46.550325
mkrum: at fang n_at 10 n_mal_sel 9 e 588 val loss 1.5325 val acc 42.9180 best val_acc 46.834416 te_acc 46.550325
mkrum: at fang n_at 10 n_mal_sel 8 e 589 val loss 1.5428 val acc 43.9529 best val_acc 46.834416 te_acc 46.550325
mkrum: at fang n_at 10 n_mal_sel 8 e 590 val loss 1.4792 val acc 44.6631 best val_acc 46.834416 te_acc 46.550325
mkrum: at fang n_at 10 n_mal_sel 9 e 591 val loss 1.4319 val acc 47.6461 best val_acc 47.646104 te_acc 47.605519
mkrum: at fang n_at 10 n_mal_sel 9 e 592 val loss 1.4247 val acc 47.7273 best val_acc 47.727273

mkrum: at fang n_at 10 n_mal_sel 6 e 657 val loss 1.5364 val acc 42.5933 best val_acc 50.020292 te_acc 50.284091
mkrum: at fang n_at 10 n_mal_sel 9 e 658 val loss 1.4818 val acc 46.6721 best val_acc 50.020292 te_acc 50.284091
mkrum: at fang n_at 10 n_mal_sel 9 e 659 val loss 1.4754 val acc 46.2459 best val_acc 50.020292 te_acc 50.284091
mkrum: at fang n_at 10 n_mal_sel 8 e 660 val loss 1.4704 val acc 46.7735 best val_acc 50.020292 te_acc 50.284091
mkrum: at fang n_at 10 n_mal_sel 8 e 661 val loss 1.4835 val acc 45.5357 best val_acc 50.020292 te_acc 50.284091
mkrum: at fang n_at 10 n_mal_sel 8 e 662 val loss 1.5230 val acc 43.7500 best val_acc 50.020292 te_acc 50.284091
mkrum: at fang n_at 10 n_mal_sel 8 e 663 val loss 1.4310 val acc 47.0982 best val_acc 50.020292 te_acc 50.284091
mkrum: at fang n_at 10 n_mal_sel 9 e 664 val loss 1.3710 val acc 49.8174 best val_acc 50.020292 te_acc 50.284091
mkrum: at fang n_at 10 n_mal_sel 10 e 665 val loss 1.3793 val acc 49.7971 best val_acc 50.020292

mkrum: at fang n_at 10 n_mal_sel 7 e 730 val loss 1.5220 val acc 43.9732 best val_acc 52.211851 te_acc 52.435065
mkrum: at fang n_at 10 n_mal_sel 8 e 731 val loss 1.4043 val acc 49.4724 best val_acc 52.211851 te_acc 52.435065
mkrum: at fang n_at 10 n_mal_sel 10 e 732 val loss 1.3535 val acc 50.5885 best val_acc 52.211851 te_acc 52.435065
mkrum: at fang n_at 10 n_mal_sel 9 e 733 val loss 1.3578 val acc 50.5276 best val_acc 52.211851 te_acc 52.435065
mkrum: at fang n_at 10 n_mal_sel 10 e 734 val loss 1.4037 val acc 49.4724 best val_acc 52.211851 te_acc 52.435065
mkrum: at fang n_at 10 n_mal_sel 9 e 735 val loss 1.4417 val acc 46.6315 best val_acc 52.211851 te_acc 52.435065
mkrum: at fang n_at 10 n_mal_sel 8 e 736 val loss 1.4709 val acc 46.6518 best val_acc 52.211851 te_acc 52.435065
mkrum: at fang n_at 10 n_mal_sel 9 e 737 val loss 1.4281 val acc 47.5041 best val_acc 52.211851 te_acc 52.435065
mkrum: at fang n_at 10 n_mal_sel 9 e 738 val loss 1.3564 val acc 51.2784 best val_acc 52.21185

mkrum: at fang n_at 10 n_mal_sel 10 e 803 val loss 1.3052 val acc 52.3133 best val_acc 53.064123 te_acc 52.942370
mkrum: at fang n_at 10 n_mal_sel 10 e 804 val loss 1.3140 val acc 52.7394 best val_acc 53.064123 te_acc 52.942370
mkrum: at fang n_at 10 n_mal_sel 10 e 805 val loss 1.3104 val acc 51.9886 best val_acc 53.064123 te_acc 52.942370
mkrum: at fang n_at 10 n_mal_sel 10 e 806 val loss 1.3348 val acc 51.7248 best val_acc 53.064123 te_acc 52.942370
mkrum: at fang n_at 10 n_mal_sel 9 e 807 val loss 1.3792 val acc 49.6144 best val_acc 53.064123 te_acc 52.942370
mkrum: at fang n_at 10 n_mal_sel 10 e 808 val loss 1.4393 val acc 47.6055 best val_acc 53.064123 te_acc 52.942370
mkrum: at fang n_at 10 n_mal_sel 7 e 809 val loss 1.5047 val acc 44.5008 best val_acc 53.064123 te_acc 52.942370
mkrum: at fang n_at 10 n_mal_sel 8 e 810 val loss 1.4478 val acc 48.4375 best val_acc 53.064123 te_acc 52.942370
mkrum: at fang n_at 10 n_mal_sel 8 e 811 val loss 1.3671 val acc 50.2435 best val_acc 53.06

mkrum: at fang n_at 10 n_mal_sel 10 e 883 val loss 1.3874 val acc 50.2841 best val_acc 54.586039 te_acc 54.403409
mkrum: at fang n_at 10 n_mal_sel 9 e 884 val loss 1.3516 val acc 51.5828 best val_acc 54.586039 te_acc 54.403409
mkrum: at fang n_at 10 n_mal_sel 9 e 885 val loss 1.3976 val acc 49.7768 best val_acc 54.586039 te_acc 54.403409
mkrum: at fang n_at 10 n_mal_sel 8 e 886 val loss 1.4825 val acc 44.5414 best val_acc 54.586039 te_acc 54.403409
mkrum: at fang n_at 10 n_mal_sel 9 e 887 val loss 1.3617 val acc 50.3044 best val_acc 54.586039 te_acc 54.403409
mkrum: at fang n_at 10 n_mal_sel 9 e 888 val loss 1.2881 val acc 52.6380 best val_acc 54.586039 te_acc 54.403409
mkrum: at fang n_at 10 n_mal_sel 9 e 889 val loss 1.2981 val acc 53.8555 best val_acc 54.586039 te_acc 54.403409
mkrum: at fang n_at 10 n_mal_sel 9 e 890 val loss 1.3164 val acc 52.4756 best val_acc 54.586039 te_acc 54.403409
mkrum: at fang n_at 10 n_mal_sel 10 e 891 val loss 1.3303 val acc 53.0844 best val_acc 54.58603

mkrum: at fang n_at 10 n_mal_sel 10 e 956 val loss 1.2895 val acc 54.7687 best val_acc 55.783279 te_acc 55.255682
mkrum: at fang n_at 10 n_mal_sel 9 e 957 val loss 1.2957 val acc 53.7135 best val_acc 55.783279 te_acc 55.255682
mkrum: at fang n_at 10 n_mal_sel 9 e 958 val loss 1.2924 val acc 55.2760 best val_acc 55.783279 te_acc 55.255682
mkrum: at fang n_at 10 n_mal_sel 10 e 959 val loss 1.3541 val acc 52.6989 best val_acc 55.783279 te_acc 55.255682
mkrum: at fang n_at 10 n_mal_sel 10 e 960 val loss 1.3562 val acc 53.2670 best val_acc 55.783279 te_acc 55.255682
mkrum: at fang n_at 10 n_mal_sel 9 e 961 val loss 1.4139 val acc 51.0146 best val_acc 55.783279 te_acc 55.255682
mkrum: at fang n_at 10 n_mal_sel 9 e 962 val loss 1.4284 val acc 50.3044 best val_acc 55.783279 te_acc 55.255682
mkrum: at fang n_at 10 n_mal_sel 9 e 963 val loss 1.4089 val acc 49.6550 best val_acc 55.783279 te_acc 55.255682
mkrum: at fang n_at 10 n_mal_sel 9 e 964 val loss 1.3528 val acc 51.5016 best val_acc 55.7832

mkrum: at fang n_at 10 n_mal_sel 10 e 1028 val loss 1.3130 val acc 56.8994 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1029 val loss 1.3003 val acc 56.2703 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1030 val loss 1.3285 val acc 56.4529 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1031 val loss 1.2981 val acc 56.3312 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1032 val loss 1.3354 val acc 56.6356 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1033 val loss 1.3098 val acc 56.0877 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1034 val loss 1.3561 val acc 55.8644 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1035 val loss 1.3340 val acc 54.8498 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1036 val loss 1.3827 val acc 55.3977 best 

mkrum: at fang n_at 10 n_mal_sel 10 e 1100 val loss 1.4059 val acc 53.0235 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1101 val loss 1.5678 val acc 52.8815 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1102 val loss 1.4094 val acc 52.4148 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1103 val loss 1.3288 val acc 56.0268 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1104 val loss 1.3992 val acc 55.2354 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1105 val loss 1.4895 val acc 52.2727 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1106 val loss 1.6966 val acc 49.7362 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 9 e 1107 val loss 1.5846 val acc 46.8141 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 9 e 1108 val loss 1.3693 val acc 52.2930 best va

mkrum: at fang n_at 10 n_mal_sel 10 e 1172 val loss 1.2847 val acc 56.8182 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1173 val loss 1.3120 val acc 56.6356 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1174 val loss 1.3482 val acc 56.7979 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1175 val loss 1.3286 val acc 56.5138 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1176 val loss 1.4254 val acc 56.4123 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1177 val loss 1.4774 val acc 51.7451 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 10 e 1178 val loss 1.8028 val acc 49.9594 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 8 e 1179 val loss 1.7768 val acc 43.9732 best val_acc 57.426948 te_acc 57.670455
mkrum: at fang n_at 10 n_mal_sel 9 e 1180 val loss 1.4699 val acc 49.8985 best va

## Code for LIE attack

In [8]:
def lie_attack(all_updates, z):
    avg = torch.mean(all_updates, dim=0)
    std = torch.std(all_updates, dim=0)
    return avg + z * std

In [10]:
batch_size=250
resume=0
nepochs=1200
schedule=[1000]
nbatches = user_tr_len//batch_size

gamma=.5
opt = 'sgd'
fed_lr=0.5
criterion=nn.CrossEntropyLoss()
use_cuda = torch.cuda.is_available()

aggregation='mkrum'
multi_k = False
candidates = []

at_type='LIE'
z_values={3:0.69847, 5:0.7054, 8:0.71904, 10:0.72575, 12:0.73891}
n_attackers=[10]

arch='alexnet'
chkpt='./'+aggregation

for n_attacker in n_attackers:
    epoch_num = 0
    best_global_acc = 0
    best_global_te_acc = 0

    torch.cuda.empty_cache()
    r=np.arange(user_tr_len)

    fed_model, _ = return_model(arch, 0.1, 0.9, parallel=False)
    optimizer_fed = SGD(fed_model.parameters(), lr=fed_lr)

    while epoch_num <= nepochs:
        user_grads=[]
        if not epoch_num and epoch_num%nbatches == 0:
            np.random.shuffle(r)
            for i in range(nusers):
                user_tr_data_tensors[i]=user_tr_data_tensors[i][r]
                user_tr_label_tensors[i]=user_tr_label_tensors[i][r]

        for i in range(n_attacker, nusers):

            inputs = user_tr_data_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]
            targets = user_tr_label_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]

            inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

            outputs = fed_model(inputs)
            loss = criterion(outputs, targets)
            fed_model.zero_grad()
            loss.backward(retain_graph=True)

            param_grad=[]
            for param in fed_model.parameters():
                param_grad=param.grad.data.view(-1) if not len(param_grad) else torch.cat((param_grad,param.grad.view(-1)))

            user_grads=param_grad[None, :] if len(user_grads)==0 else torch.cat((user_grads,param_grad[None,:]), 0)

        malicious_grads = user_grads

        if epoch_num in schedule:
            for param_group in optimizer_fed.param_groups:
                param_group['lr'] *= gamma
                print('New learnin rate ', param_group['lr'])

        if n_attacker > 0:
            if at_type == 'lie':
                mal_update = lie_attack(malicious_grads, z_values[n_attacker])
                malicious_grads = torch.cat((torch.stack([mal_update]*n_attacker), malicious_grads))
            elif at_type == 'fang':
                agg_grads = torch.mean(malicious_grads, 0)
                deviation = torch.sign(agg_grads)
                malicious_grads = get_malicious_updates_fang_trmean(malicious_grads, deviation, n_attacker, epoch_num)
            elif at_type == 'our-agr':
                agg_grads = torch.mean(malicious_grads, 0)
                malicious_grads = our_attack_krum(malicious_grads, agg_grads, n_attacker, compression=compression, q_level=q_level, norm=norm)

        if not epoch_num : 
            print(malicious_grads.shape)

        if aggregation=='median':
            agg_grads=torch.median(malicious_grads,dim=0)[0]

        elif aggregation=='average':
            agg_grads=torch.mean(malicious_grads,dim=0)

        elif aggregation=='trmean':
            agg_grads=tr_mean(malicious_grads, n_attacker)

        elif aggregation=='krum' or aggregation=='mkrum':
            multi_k = True if aggregation == 'mkrum' else False
            if epoch_num == 0: print('multi krum is ', multi_k)
            agg_grads, krum_candidate = multi_krum(malicious_grads, n_attacker, multi_k=multi_k)

        elif aggregation=='bulyan':
            agg_grads, krum_candidate=bulyan(malicious_grads, n_attacker)

        del user_grads

        start_idx=0

        optimizer_fed.zero_grad()

        model_grads=[]

        for i, param in enumerate(fed_model.parameters()):
            param_=agg_grads[start_idx:start_idx+len(param.data.view(-1))].reshape(param.data.shape)
            start_idx=start_idx+len(param.data.view(-1))
            param_=param_.cuda()
            model_grads.append(param_)

        optimizer_fed.step(model_grads)

        val_loss, val_acc = test(val_data_tensor,val_label_tensor,fed_model,criterion,use_cuda)
        te_loss, te_acc = test(te_data_tensor,te_label_tensor, fed_model, criterion, use_cuda)

        is_best = best_global_acc < val_acc

        best_global_acc = max(best_global_acc, val_acc)

        if is_best:
            best_global_te_acc = te_acc

        if epoch_num%10==0 or epoch_num==nepochs-1:
            print('%s: at %s n_at %d n_mal_sel %d e %d fed_model val loss %.4f val acc %.4f best val_acc %f te_acc %f'%(aggregation, at_type, n_attacker, np.sum(krum_candidate < n_attacker), epoch_num, val_loss, val_acc, best_global_acc,best_global_te_acc))

        if val_loss > 10:
            print('val loss %f too high'%val_loss)
            break

        epoch_num+=1

torch.Size([40, 2472266])
multi krum is  True


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1603729138878/work/torch/csrc/utils/python_arg_parser.cpp:882.)
  p.data.add_(-group['lr'], d_p)


mkrum: at LIE n_at 10 n_mal_sel 3 e 0 fed_model val loss 2.3021 val acc 10.5925 best val_acc 10.592532 te_acc 10.146104
mkrum: at LIE n_at 10 n_mal_sel 6 e 10 fed_model val loss 2.2930 val acc 12.2565 best val_acc 14.285714 te_acc 14.346591
mkrum: at LIE n_at 10 n_mal_sel 2 e 20 fed_model val loss 2.2692 val acc 13.5146 best val_acc 20.596591 te_acc 20.211039
mkrum: at LIE n_at 10 n_mal_sel 8 e 30 fed_model val loss 2.2304 val acc 12.1956 best val_acc 20.596591 te_acc 20.211039
mkrum: at LIE n_at 10 n_mal_sel 4 e 40 fed_model val loss 2.2415 val acc 16.3149 best val_acc 21.895292 te_acc 21.043019
mkrum: at LIE n_at 10 n_mal_sel 8 e 50 fed_model val loss 2.1699 val acc 20.0893 best val_acc 22.524351 te_acc 21.387987
mkrum: at LIE n_at 10 n_mal_sel 5 e 60 fed_model val loss 2.0669 val acc 23.1128 best val_acc 23.112825 te_acc 22.605519
mkrum: at LIE n_at 10 n_mal_sel 6 e 70 fed_model val loss 2.0266 val acc 23.7825 best val_acc 23.782468 te_acc 23.579545
mkrum: at LIE n_at 10 n_mal_sel 4

mkrum: at LIE n_at 10 n_mal_sel 4 e 680 fed_model val loss 1.5174 val acc 50.9537 best val_acc 55.905032 te_acc 55.661526
mkrum: at LIE n_at 10 n_mal_sel 6 e 690 fed_model val loss 1.4842 val acc 49.3912 best val_acc 56.493506 te_acc 55.803571
mkrum: at LIE n_at 10 n_mal_sel 4 e 700 fed_model val loss 1.2874 val acc 54.9919 best val_acc 56.493506 te_acc 55.803571
mkrum: at LIE n_at 10 n_mal_sel 5 e 710 fed_model val loss 1.3465 val acc 53.3279 best val_acc 56.493506 te_acc 55.803571
mkrum: at LIE n_at 10 n_mal_sel 4 e 720 fed_model val loss 1.3827 val acc 51.8060 best val_acc 56.493506 te_acc 55.803571
mkrum: at LIE n_at 10 n_mal_sel 6 e 730 fed_model val loss 1.3784 val acc 51.0552 best val_acc 56.757305 te_acc 57.224026
mkrum: at LIE n_at 10 n_mal_sel 5 e 740 fed_model val loss 1.7745 val acc 43.1209 best val_acc 56.757305 te_acc 57.224026
mkrum: at LIE n_at 10 n_mal_sel 6 e 750 fed_model val loss 1.4053 val acc 53.0032 best val_acc 56.757305 te_acc 57.224026
mkrum: at LIE n_at 10 n_

## Code for our AGR-tailored attack on Multi-krum

In [13]:
def our_attack_mkrum(all_updates, model_re, n_attackers, dev_type='unit_vec'):

    if dev_type == 'unit_vec':
        deviation = model_re / torch.norm(model_re)
    elif dev_type == 'sign':
        deviation = torch.sign(model_re)
    elif dev_type == 'std':
        deviation = torch.std(all_updates, 0)

    lamda = torch.Tensor([3.0]).cuda()

    threshold_diff = 1e-5
    lamda_fail = lamda
    lamda_succ = 0

    while torch.abs(lamda_succ - lamda) > threshold_diff:
        mal_update = (model_re - lamda * deviation)
        mal_updates = torch.stack([mal_update] * n_attackers)
        mal_updates = torch.cat((mal_updates, all_updates), 0)

        agg_grads, krum_candidate = multi_krum(mal_updates, n_attackers, multi_k=True)
        if np.sum(krum_candidate < n_attackers) == n_attackers:
            # print('successful lamda is ', lamda)
            lamda_succ = lamda
            lamda = lamda + lamda_fail / 2
        else:
            lamda = lamda - lamda_fail / 2

        lamda_fail = lamda_fail / 2

    mal_update = (model_re - lamda_succ * deviation)
    mal_updates = torch.stack([mal_update] * n_attackers)
    mal_updates = torch.cat((mal_updates, all_updates), 0)

    return mal_updates

In [14]:
batch_size=250
resume=0
nepochs=1200
schedule=[1000]
nbatches = user_tr_len//batch_size

gamma=.5
opt = 'sgd'
fed_lr=0.5
criterion=nn.CrossEntropyLoss()
use_cuda = torch.cuda.is_available()

aggregation='mkrum'
multi_k = False
candidates = []

at_type='our-agr'
dev_type ='std'
z=0
n_attackers=[10]

arch='alexnet'
chkpt='./'+aggregation


for n_attacker in n_attackers:
    epoch_num = 0
    best_global_acc = 0
    best_global_te_acc = 0

    torch.cuda.empty_cache()
    r=np.arange(user_tr_len)

    fed_model, _ = return_model(arch, 0.1, 0.9, parallel=False)
    optimizer_fed = SGD(fed_model.parameters(), lr=fed_lr)

    while epoch_num <= nepochs:
        user_grads=[]
        if not epoch_num and epoch_num%nbatches == 0:
            np.random.shuffle(r)
            for i in range(nusers):
                user_tr_data_tensors[i]=user_tr_data_tensors[i][r]
                user_tr_label_tensors[i]=user_tr_label_tensors[i][r]

        for i in range(n_attacker, nusers):

            inputs = user_tr_data_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]
            targets = user_tr_label_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]

            inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

            outputs = fed_model(inputs)
            loss = criterion(outputs, targets)
            fed_model.zero_grad()
            loss.backward(retain_graph=True)

            param_grad=[]
            for param in fed_model.parameters():
                param_grad=param.grad.data.view(-1) if not len(param_grad) else torch.cat((param_grad,param.grad.view(-1)))

            user_grads=param_grad[None, :] if len(user_grads)==0 else torch.cat((user_grads,param_grad[None,:]), 0)

        malicious_grads = user_grads

        if epoch_num in schedule:
            for param_group in optimizer_fed.param_groups:
                param_group['lr'] *= gamma
                print('New learnin rate ', param_group['lr'])

        if n_attacker > 0:
            if at_type == 'lie':
                malicious_grads = get_malicious_updates_lie(malicious_grads, n_attacker, z, epoch_num)
            elif at_type == 'fang':
                agg_grads = torch.mean(malicious_grads, 0)
                deviation = torch.sign(agg_grads)
                malicious_grads = get_malicious_updates_fang(malicious_grads, agg_grads, deviation, n_attacker)
            elif at_type == 'our-agr':
                agg_grads = torch.mean(malicious_grads, 0)
                malicious_grads = our_attack_mkrum(malicious_grads, agg_grads, n_attacker, dev_type=dev_type)

        if not malicious_grads.shape[0]==50: print('malicious grads shape ', malicious_grads.shape)
        if aggregation=='median':
            agg_grads=torch.median(malicious_grads,dim=0)[0]

        elif aggregation=='average':
            agg_grads=torch.mean(malicious_grads,dim=0)

        elif aggregation=='krum' or aggregation=='mkrum':

            multi_k = True if aggregation == 'mkrum' else False
            if epoch_num == 0: print('multi krum is ', multi_k)
            agg_grads, krum_candidate = multi_krum(malicious_grads, n_attacker, multi_k=multi_k)

        elif aggregation=='bulyan':
            agg_grads, krum_candidate=bulyan(malicious_grads, n_attacker)

        del user_grads

        start_idx=0

        optimizer_fed.zero_grad()

        model_grads=[]

        for i, param in enumerate(fed_model.parameters()):
            param_=agg_grads[start_idx:start_idx+len(param.data.view(-1))].reshape(param.data.shape)
            start_idx=start_idx+len(param.data.view(-1))
            param_=param_.cuda()
            model_grads.append(param_)

        optimizer_fed.step(model_grads)

        val_loss, val_acc = test(val_data_tensor,val_label_tensor,fed_model,criterion,use_cuda)
        te_loss, te_acc = test(te_data_tensor,te_label_tensor, fed_model, criterion, use_cuda)

        is_best = best_global_acc < val_acc

        best_global_acc = max(best_global_acc, val_acc)

        if is_best:
            best_global_te_acc = te_acc

        if epoch_num%1==0 or epoch_num==nepochs-1:
            print('%s: at %s n_at %d n_mal_sel %d e %d | val loss %.4f val acc %.4f best val_acc %f'%(aggregation, at_type, n_attacker, np.sum(krum_candidate < n_attacker), epoch_num, val_loss, val_acc, best_global_acc))
        
        if val_loss > 1000:
            print('val loss %f too high'%val_loss)
            break
            
        epoch_num+=1

multi krum is  True
mkrum: at our-agr n_at 10 n_mal_sel 10 e 0 | val loss 2.3026 val acc 10.3693 best val_acc 10.369318
mkrum: at our-agr n_at 10 n_mal_sel 10 e 1 | val loss 2.3021 val acc 10.3693 best val_acc 10.369318
mkrum: at our-agr n_at 10 n_mal_sel 10 e 2 | val loss 2.3017 val acc 10.9375 best val_acc 10.937500
mkrum: at our-agr n_at 10 n_mal_sel 10 e 3 | val loss 2.3012 val acc 13.9205 best val_acc 13.920455
mkrum: at our-agr n_at 10 n_mal_sel 10 e 4 | val loss 2.3008 val acc 9.8011 best val_acc 13.920455
mkrum: at our-agr n_at 10 n_mal_sel 10 e 5 | val loss 2.3002 val acc 10.5519 best val_acc 13.920455
mkrum: at our-agr n_at 10 n_mal_sel 10 e 6 | val loss 2.2996 val acc 11.3231 best val_acc 13.920455
mkrum: at our-agr n_at 10 n_mal_sel 10 e 7 | val loss 2.2990 val acc 9.7200 best val_acc 13.920455
mkrum: at our-agr n_at 10 n_mal_sel 10 e 8 | val loss 2.2982 val acc 9.6388 best val_acc 13.920455
mkrum: at our-agr n_at 10 n_mal_sel 10 e 9 | val loss 2.2973 val acc 9.6794 best va

mkrum: at our-agr n_at 10 n_mal_sel 10 e 82 | val loss 2.2386 val acc 14.9959 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 83 | val loss 2.1652 val acc 17.5122 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 84 | val loss 2.1300 val acc 16.9237 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 85 | val loss 2.6410 val acc 11.9521 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 86 | val loss 3.8455 val acc 9.9432 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 87 | val loss 2.3176 val acc 9.5576 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 88 | val loss 2.3137 val acc 9.3344 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 89 | val loss 2.3100 val acc 9.8214 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 90 | val loss 2.3068 val acc 9.8011 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 91 | val loss 2.3038 val acc 9.8214 best val_acc 21.306

mkrum: at our-agr n_at 10 n_mal_sel 10 e 163 | val loss 2.2003 val acc 18.8109 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 164 | val loss 2.3385 val acc 14.0219 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 165 | val loss 2.3020 val acc 10.6737 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 166 | val loss 2.2787 val acc 14.1843 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 167 | val loss 2.2668 val acc 14.8742 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 168 | val loss 2.2514 val acc 14.8742 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 169 | val loss 2.2820 val acc 15.5235 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 170 | val loss 2.3100 val acc 10.0852 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 171 | val loss 2.3024 val acc 10.5317 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 172 | val loss 2.2859 val acc 12.2971 bes

mkrum: at our-agr n_at 10 n_mal_sel 10 e 244 | val loss 2.2555 val acc 14.0828 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 245 | val loss 2.1967 val acc 18.0601 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 246 | val loss 2.1891 val acc 17.1063 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 247 | val loss 2.1985 val acc 16.4164 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 248 | val loss 2.1812 val acc 16.9237 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 249 | val loss 2.2180 val acc 17.2687 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 250 | val loss 2.1761 val acc 19.1558 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 251 | val loss 2.1885 val acc 16.1729 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 252 | val loss 2.1685 val acc 17.5731 best val_acc 21.306818
mkrum: at our-agr n_at 10 n_mal_sel 10 e 253 | val loss 2.2225 val acc 15.4830 bes

mkrum: at our-agr n_at 10 n_mal_sel 10 e 325 | val loss 2.1659 val acc 21.1039 best val_acc 24.066558
mkrum: at our-agr n_at 10 n_mal_sel 10 e 326 | val loss 2.0675 val acc 22.0170 best val_acc 24.066558
mkrum: at our-agr n_at 10 n_mal_sel 10 e 327 | val loss 2.0049 val acc 26.4610 best val_acc 26.461039
mkrum: at our-agr n_at 10 n_mal_sel 10 e 328 | val loss 2.0165 val acc 24.1071 best val_acc 26.461039
mkrum: at our-agr n_at 10 n_mal_sel 10 e 329 | val loss 2.6459 val acc 18.6891 best val_acc 26.461039
mkrum: at our-agr n_at 10 n_mal_sel 10 e 330 | val loss 2.2029 val acc 12.9261 best val_acc 26.461039
mkrum: at our-agr n_at 10 n_mal_sel 10 e 331 | val loss 2.1291 val acc 16.9440 best val_acc 26.461039
mkrum: at our-agr n_at 10 n_mal_sel 10 e 332 | val loss 2.2342 val acc 13.4131 best val_acc 26.461039
mkrum: at our-agr n_at 10 n_mal_sel 10 e 333 | val loss 2.0822 val acc 21.2865 best val_acc 26.461039
mkrum: at our-agr n_at 10 n_mal_sel 10 e 334 | val loss 2.0293 val acc 22.5852 bes

mkrum: at our-agr n_at 10 n_mal_sel 10 e 406 | val loss 2.0843 val acc 18.9732 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 407 | val loss 2.0596 val acc 24.8377 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 408 | val loss 2.0728 val acc 25.1623 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 409 | val loss 2.0463 val acc 24.4521 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 410 | val loss 2.1024 val acc 23.8231 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 411 | val loss 2.0139 val acc 26.9481 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 412 | val loss 2.0362 val acc 25.6494 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 413 | val loss 2.0539 val acc 25.5479 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 414 | val loss 2.1987 val acc 20.6981 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 415 | val loss 2.1054 val acc 20.0487 bes

mkrum: at our-agr n_at 10 n_mal_sel 10 e 487 | val loss 2.1209 val acc 15.7468 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 488 | val loss 2.4431 val acc 16.5990 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 489 | val loss 2.9001 val acc 11.7492 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 490 | val loss 2.3138 val acc 13.9610 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 491 | val loss 2.1497 val acc 18.9123 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 492 | val loss 2.2393 val acc 13.9002 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 493 | val loss 2.1143 val acc 20.1502 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 494 | val loss 2.0546 val acc 23.0722 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 495 | val loss 2.0179 val acc 22.7476 best val_acc 29.159903
mkrum: at our-agr n_at 10 n_mal_sel 10 e 496 | val loss 1.9778 val acc 23.8839 bes

## Code for our first AGR-agnostic attack called Min-max

In [15]:
'''
MIN-MAX attack
'''
def our_attack_dist(all_updates, model_re, n_attackers, dev_type='unit_vec'):

    if dev_type == 'unit_vec':
        deviation = model_re / torch.norm(model_re)  # unit vector, dir opp to good dir
    elif dev_type == 'sign':
        deviation = torch.sign(model_re)
    elif dev_type == 'std':
        deviation = torch.std(all_updates, 0)

    lamda = torch.Tensor([10.0]).float().cuda()
    # print(lamda)
    threshold_diff = 1e-5
    lamda_fail = lamda
    lamda_succ = 0
    
    distances = []
    for update in all_updates:
        distance = torch.norm((all_updates - update), dim=1) ** 2
        distances = distance[None, :] if not len(distances) else torch.cat((distances, distance[None, :]), 0)
    
    max_distance = torch.max(distances)
    del distances

    while torch.abs(lamda_succ - lamda) > threshold_diff:
        mal_update = (model_re - lamda * deviation)
        distance = torch.norm((all_updates - mal_update), dim=1) ** 2
        max_d = torch.max(distance)
        
        if max_d <= max_distance:
            # print('successful lamda is ', lamda)
            lamda_succ = lamda
            lamda = lamda + lamda_fail / 2
        else:
            lamda = lamda - lamda_fail / 2

        lamda_fail = lamda_fail / 2

    mal_update = (model_re - lamda_succ * deviation)
    
    return mal_update

In [16]:
batch_size=250
resume=0
nepochs=1200
schedule=[1000]
nbatches = user_tr_len//batch_size

gamma=.5
opt = 'sgd'
fed_lr=0.5
criterion=nn.CrossEntropyLoss()
use_cuda = torch.cuda.is_available()

aggregation='mkrum'
multi_k = False
candidates = []

at_type='min-max'
dev_type ='std'
z=0
n_attackers=[10]

arch='alexnet'
chkpt='./'+aggregation

for n_attacker in n_attackers:
    candidates = []

    epoch_num = 0
    best_global_acc = 0
    best_global_te_acc = 0

    fed_model, _ = return_model(arch, 0.1, 0.9, parallel=False)
    optimizer_fed = SGD(fed_model.parameters(), lr=fed_lr)

    torch.cuda.empty_cache()
    r=np.arange(user_tr_len)

    while epoch_num <= nepochs:
        user_grads=[]
        if not epoch_num and epoch_num%nbatches == 0:
            np.random.shuffle(r)
            for i in range(nusers):
                user_tr_data_tensors[i]=user_tr_data_tensors[i][r]
                user_tr_label_tensors[i]=user_tr_label_tensors[i][r]

        for i in range(n_attacker, nusers):

            inputs = user_tr_data_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]
            targets = user_tr_label_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]

            inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

            outputs = fed_model(inputs)
            loss = criterion(outputs, targets)
            fed_model.zero_grad()
            loss.backward(retain_graph=True)

            param_grad=[]
            for param in fed_model.parameters():
                param_grad=param.grad.data.view(-1) if not len(param_grad) else torch.cat((param_grad,param.grad.view(-1)))

            user_grads=param_grad[None, :] if len(user_grads)==0 else torch.cat((user_grads,param_grad[None,:]), 0)

        malicious_grads = user_grads

        if epoch_num in schedule:
            for param_group in optimizer_fed.param_groups:
                param_group['lr'] *= gamma
                print('New learnin rate ', param_group['lr'])

        if n_attacker > 0:
            if at_type == 'lie':
                malicious_grads = get_malicious_updates_lie(malicious_grads, n_attacker, z, epoch_num)
            elif at_type == 'fang':
                agg_grads = torch.mean(malicious_grads, 0)
                deviation = torch.sign(agg_grads)
                malicious_grads = get_malicious_updates_fang(malicious_grads, agg_grads, deviation, n_attacker)
            elif at_type == 'our-agr':
                agg_grads = torch.mean(malicious_grads, 0)
                mal_update = our_attack_median(malicious_grads, agg_grads, n_attacker, dev_type)
            elif at_type == 'min-max':
                agg_grads = torch.mean(malicious_grads, 0)
                mal_update = our_attack_dist(malicious_grads, agg_grads, n_attacker, dev_type)
            elif at_type == 'min-sum':
                agg_grads = torch.mean(malicious_grads, 0)
                mal_update = our_attack_score(malicious_grads, agg_grads, n_attacker, dev_type)

            mal_updates = torch.stack([mal_update] * n_attacker)
            malicious_grads = torch.cat((mal_updates, user_grads), 0)

        if epoch_num==0: print('malicious_grads shape ', malicious_grads.shape)

        if aggregation=='median':
            agg_grads=torch.median(malicious_grads,dim=0)[0]

        elif aggregation=='average':
            agg_grads=torch.mean(malicious_grads,dim=0)

        elif aggregation=='krum' or aggregation=='mkrum':
            multi_k = True if aggregation == 'mkrum' else False
            if epoch_num == 0: print('multi krum is ', multi_k)
            agg_grads, krum_candidate = multi_krum(malicious_grads, n_attacker, multi_k=multi_k)

        elif aggregation=='bulyan':
            agg_grads, krum_candidate=bulyan(malicious_grads, n_attacker)

        del user_grads

        start_idx=0

        optimizer_fed.zero_grad()

        model_grads=[]

        for i, param in enumerate(fed_model.parameters()):
            param_=agg_grads[start_idx:start_idx+len(param.data.view(-1))].reshape(param.data.shape)
            start_idx=start_idx+len(param.data.view(-1))
            param_=param_.cuda()
            model_grads.append(param_)

        optimizer_fed.step(model_grads)

        val_loss, val_acc = test(val_data_tensor,val_label_tensor,fed_model,criterion,use_cuda)
        te_loss, te_acc = test(te_data_tensor,te_label_tensor, fed_model, criterion, use_cuda)

        is_best = best_global_acc < val_acc

        best_global_acc = max(best_global_acc, val_acc)

        if is_best:
            best_global_te_acc = te_acc

        if epoch_num%1==0 or epoch_num==nepochs-1:
            print('%s: at %s n_at %d n_mal_sel %d e %d | val loss %.4f val acc %.4f best val_acc %f'%(aggregation, at_type, n_attacker, np.sum(krum_candidate < n_attacker), epoch_num, val_loss, val_acc, best_global_acc))

        if val_loss > 1000:
            print('val loss %f too high'%val_loss)
            break
            
        epoch_num+=1

malicious_grads shape  torch.Size([50, 2472266])
multi krum is  True
mkrum: at min-max n_at 10 n_mal_sel 5 e 0 | val loss 2.3030 val acc 10.0446 best val_acc 10.044643
mkrum: at min-max n_at 10 n_mal_sel 3 e 1 | val loss 2.3024 val acc 10.0446 best val_acc 10.044643
mkrum: at min-max n_at 10 n_mal_sel 5 e 2 | val loss 2.3018 val acc 10.0446 best val_acc 10.044643
mkrum: at min-max n_at 10 n_mal_sel 4 e 3 | val loss 2.3012 val acc 10.0446 best val_acc 10.044643
mkrum: at min-max n_at 10 n_mal_sel 5 e 4 | val loss 2.3004 val acc 10.0446 best val_acc 10.044643
mkrum: at min-max n_at 10 n_mal_sel 4 e 5 | val loss 2.2996 val acc 10.0446 best val_acc 10.044643
mkrum: at min-max n_at 10 n_mal_sel 5 e 6 | val loss 2.2987 val acc 10.0446 best val_acc 10.044643
mkrum: at min-max n_at 10 n_mal_sel 4 e 7 | val loss 2.2977 val acc 10.0446 best val_acc 10.044643
mkrum: at min-max n_at 10 n_mal_sel 5 e 8 | val loss 2.2963 val acc 10.0446 best val_acc 10.044643
mkrum: at min-max n_at 10 n_mal_sel 4 e 

mkrum: at min-max n_at 10 n_mal_sel 4 e 82 | val loss 2.1223 val acc 15.6859 best val_acc 23.518669
mkrum: at min-max n_at 10 n_mal_sel 5 e 83 | val loss 2.0772 val acc 20.7792 best val_acc 23.518669
mkrum: at min-max n_at 10 n_mal_sel 4 e 84 | val loss 2.0738 val acc 20.4140 best val_acc 23.518669
mkrum: at min-max n_at 10 n_mal_sel 4 e 85 | val loss 2.2561 val acc 15.5235 best val_acc 23.518669
mkrum: at min-max n_at 10 n_mal_sel 5 e 86 | val loss 2.1991 val acc 17.2281 best val_acc 23.518669
mkrum: at min-max n_at 10 n_mal_sel 6 e 87 | val loss 2.1936 val acc 17.1875 best val_acc 23.518669
mkrum: at min-max n_at 10 n_mal_sel 5 e 88 | val loss 2.1416 val acc 16.8628 best val_acc 23.518669
mkrum: at min-max n_at 10 n_mal_sel 5 e 89 | val loss 2.1392 val acc 18.5877 best val_acc 23.518669
mkrum: at min-max n_at 10 n_mal_sel 6 e 90 | val loss 2.0774 val acc 18.8109 best val_acc 23.518669
mkrum: at min-max n_at 10 n_mal_sel 6 e 91 | val loss 2.1268 val acc 19.7849 best val_acc 23.518669


mkrum: at min-max n_at 10 n_mal_sel 5 e 164 | val loss 2.0975 val acc 22.8693 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 7 e 165 | val loss 2.1521 val acc 18.2427 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 4 e 166 | val loss 2.1089 val acc 21.6924 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 5 e 167 | val loss 2.1293 val acc 20.3734 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 4 e 168 | val loss 2.1028 val acc 21.9765 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 4 e 169 | val loss 2.2068 val acc 15.7062 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 3 e 170 | val loss 2.1872 val acc 17.5122 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 2 e 171 | val loss 2.1819 val acc 17.3701 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 4 e 172 | val loss 2.0725 val acc 21.6924 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 5 e 173 | val loss 2.0348 val acc 22.1794 best val_acc 

mkrum: at min-max n_at 10 n_mal_sel 5 e 246 | val loss 2.3604 val acc 9.9635 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 5 e 247 | val loss 2.3605 val acc 9.3141 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 3 e 248 | val loss 2.3481 val acc 10.0852 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 5 e 249 | val loss 2.3425 val acc 9.2330 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 5 e 250 | val loss 2.3367 val acc 9.8823 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 5 e 251 | val loss 2.3302 val acc 10.0852 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 3 e 252 | val loss 2.3236 val acc 7.6907 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 5 e 253 | val loss 2.3151 val acc 8.5227 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 6 e 254 | val loss 2.3056 val acc 10.0041 best val_acc 26.684253
mkrum: at min-max n_at 10 n_mal_sel 4 e 255 | val loss 2.2850 val acc 11.5666 best val_acc 26.684

mkrum: at min-max n_at 10 n_mal_sel 4 e 328 | val loss 2.1301 val acc 18.1209 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 329 | val loss 2.0813 val acc 20.1705 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 4 e 330 | val loss 2.1338 val acc 17.8166 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 6 e 331 | val loss 2.1029 val acc 21.6721 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 6 e 332 | val loss 2.2317 val acc 14.4886 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 4 e 333 | val loss 2.0873 val acc 20.4140 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 6 e 334 | val loss 2.0157 val acc 25.0000 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 6 e 335 | val loss 1.9944 val acc 25.0000 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 6 e 336 | val loss 1.9929 val acc 27.2321 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 337 | val loss 2.0765 val acc 19.8052 best val_acc 

mkrum: at min-max n_at 10 n_mal_sel 6 e 410 | val loss 2.1761 val acc 16.6599 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 3 e 411 | val loss 2.1978 val acc 15.5032 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 412 | val loss 2.1498 val acc 15.0162 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 4 e 413 | val loss 2.1376 val acc 17.8774 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 414 | val loss 2.1307 val acc 19.5211 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 4 e 415 | val loss 2.1423 val acc 17.4310 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 416 | val loss 2.1062 val acc 19.9878 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 417 | val loss 2.1734 val acc 17.5933 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 2 e 418 | val loss 2.7348 val acc 16.4164 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 6 e 419 | val loss 2.3645 val acc 9.9026 best val_acc 2

mkrum: at min-max n_at 10 n_mal_sel 3 e 492 | val loss 2.2119 val acc 16.4570 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 493 | val loss 2.1847 val acc 16.0917 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 3 e 494 | val loss 2.1599 val acc 19.0950 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 4 e 495 | val loss 2.1494 val acc 20.9010 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 4 e 496 | val loss 2.1113 val acc 21.6112 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 4 e 497 | val loss 2.1219 val acc 17.7354 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 498 | val loss 2.1326 val acc 23.3969 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 499 | val loss 2.2617 val acc 13.8799 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 4 e 500 | val loss 2.1339 val acc 19.3182 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 6 e 501 | val loss 2.1193 val acc 21.6924 best val_acc 

mkrum: at min-max n_at 10 n_mal_sel 5 e 574 | val loss 2.3208 val acc 9.9026 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 4 e 575 | val loss 2.3129 val acc 9.2330 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 4 e 576 | val loss 2.3005 val acc 10.3084 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 7 e 577 | val loss 2.2793 val acc 9.3953 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 7 e 578 | val loss 2.2568 val acc 10.6737 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 579 | val loss 2.4089 val acc 11.5666 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 580 | val loss 2.3214 val acc 15.2597 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 4 e 581 | val loss 2.3090 val acc 9.6794 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 4 e 582 | val loss 2.3039 val acc 8.9286 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 4 e 583 | val loss 2.2988 val acc 10.0649 best val_acc 27.96

mkrum: at min-max n_at 10 n_mal_sel 5 e 656 | val loss 1.9851 val acc 24.5333 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 7 e 657 | val loss 1.9667 val acc 26.6031 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 6 e 658 | val loss 1.9972 val acc 23.0519 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 659 | val loss 2.0874 val acc 23.8839 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 660 | val loss 2.0696 val acc 19.5820 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 6 e 661 | val loss 2.0102 val acc 22.4635 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 6 e 662 | val loss 2.1271 val acc 18.1818 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 7 e 663 | val loss 1.9482 val acc 23.4172 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 664 | val loss 1.8850 val acc 26.3393 best val_acc 27.962662
mkrum: at min-max n_at 10 n_mal_sel 5 e 665 | val loss 1.8869 val acc 27.2524 best val_acc 

mkrum: at min-max n_at 10 n_mal_sel 4 e 738 | val loss 2.2851 val acc 12.9667 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 739 | val loss 2.6293 val acc 10.9375 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 740 | val loss 2.1970 val acc 16.7208 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 6 e 741 | val loss 2.0344 val acc 19.9269 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 742 | val loss 2.0268 val acc 20.2110 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 4 e 743 | val loss 2.0388 val acc 21.0227 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 744 | val loss 2.2429 val acc 15.8888 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 745 | val loss 2.0388 val acc 19.8661 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 6 e 746 | val loss 1.9721 val acc 22.8287 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 747 | val loss 1.9454 val acc 22.0170 best val_acc 

mkrum: at min-max n_at 10 n_mal_sel 6 e 820 | val loss 2.0222 val acc 23.2346 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 821 | val loss 1.9494 val acc 25.9740 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 6 e 822 | val loss 1.9800 val acc 23.2143 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 6 e 823 | val loss 1.9383 val acc 24.8985 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 6 e 824 | val loss 1.9738 val acc 23.8839 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 4 e 825 | val loss 1.9875 val acc 24.7159 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 7 e 826 | val loss 2.1192 val acc 19.8864 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 827 | val loss 2.2159 val acc 21.6112 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 828 | val loss 2.5121 val acc 13.3523 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 829 | val loss 2.2037 val acc 16.4976 best val_acc 

mkrum: at min-max n_at 10 n_mal_sel 4 e 902 | val loss 2.3027 val acc 9.7808 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 3 e 903 | val loss 2.3026 val acc 9.7808 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 3 e 904 | val loss 2.3026 val acc 9.7808 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 3 e 905 | val loss 2.3027 val acc 9.6388 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 4 e 906 | val loss 2.3027 val acc 9.7808 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 3 e 907 | val loss 2.3026 val acc 9.7808 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 3 e 908 | val loss 2.3027 val acc 9.7808 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 3 e 909 | val loss 2.3027 val acc 9.7808 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 4 e 910 | val loss 2.3027 val acc 9.7808 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 3 e 911 | val loss 2.3026 val acc 9.7808 best val_acc 29.058442


mkrum: at min-max n_at 10 n_mal_sel 4 e 984 | val loss 2.3024 val acc 9.8823 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 3 e 985 | val loss 2.3023 val acc 9.8823 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 4 e 986 | val loss 2.3023 val acc 9.8620 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 4 e 987 | val loss 2.3023 val acc 9.8823 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 988 | val loss 2.3022 val acc 9.9026 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 4 e 989 | val loss 2.3022 val acc 9.9229 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 990 | val loss 2.3022 val acc 9.9229 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 4 e 991 | val loss 2.3021 val acc 9.9838 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 6 e 992 | val loss 2.3021 val acc 9.9838 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 6 e 993 | val loss 2.3021 val acc 10.0446 best val_acc 29.058442

mkrum: at min-max n_at 10 n_mal_sel 4 e 1065 | val loss 2.3676 val acc 6.9399 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 1066 | val loss 2.3129 val acc 9.9838 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 7 e 1067 | val loss 2.3144 val acc 9.3750 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 1068 | val loss 2.3183 val acc 9.8620 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 6 e 1069 | val loss 2.3064 val acc 12.7638 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 1070 | val loss 2.3739 val acc 7.8531 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 6 e 1071 | val loss 2.3181 val acc 9.8011 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 1072 | val loss 2.3152 val acc 9.9229 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 6 e 1073 | val loss 2.3127 val acc 10.0649 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 4 e 1074 | val loss 2.3087 val acc 12.2971 best val_a

mkrum: at min-max n_at 10 n_mal_sel 6 e 1146 | val loss 2.2819 val acc 14.4481 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 1147 | val loss 2.4479 val acc 9.8620 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 5 e 1148 | val loss 2.3108 val acc 9.9432 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 3 e 1149 | val loss 2.3093 val acc 9.9432 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 4 e 1150 | val loss 2.3080 val acc 9.9432 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 3 e 1151 | val loss 2.3070 val acc 9.9432 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 4 e 1152 | val loss 2.3061 val acc 9.9432 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 3 e 1153 | val loss 2.3054 val acc 9.9432 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 4 e 1154 | val loss 2.3046 val acc 9.9432 best val_acc 29.058442
mkrum: at min-max n_at 10 n_mal_sel 3 e 1155 | val loss 2.3040 val acc 9.9635 best val_acc

## Code for our second AGR-agnostic attack called Min-Sum

In [17]:
'''
MIN-SUM attack
'''

def our_attack_score(all_updates, model_re, n_attackers, dev_type='unit_vec'):

    if dev_type == 'unit_vec':
        deviation = model_re / torch.norm(model_re)  # unit vector, dir opp to good dir
    elif dev_type == 'sign':
        deviation = torch.sign(model_re)
    elif dev_type == 'std':
        deviation = torch.std(all_updates, 0)
    
    lamda = torch.Tensor([10.0]).float().cuda()
    # print(lamda)
    threshold_diff = 1e-5
    lamda_fail = lamda
    lamda_succ = 0
    
    distances = []
    for update in all_updates:
        distance = torch.norm((all_updates - update), dim=1) ** 2
        distances = distance[None, :] if not len(distances) else torch.cat((distances, distance[None, :]), 0)
    
    scores = torch.sum(distances, dim=1)
    min_score = torch.min(scores)
    del distances

    while torch.abs(lamda_succ - lamda) > threshold_diff:
        mal_update = (model_re - lamda * deviation)
        distance = torch.norm((all_updates - mal_update), dim=1) ** 2
        score = torch.sum(distance)
        
        if score <= min_score:
            # print('successful lamda is ', lamda)
            lamda_succ = lamda
            lamda = lamda + lamda_fail / 2
        else:
            lamda = lamda - lamda_fail / 2

        lamda_fail = lamda_fail / 2

    # print(lamda_succ)
    mal_update = (model_re - lamda_succ * deviation)
    
    return mal_update
    

In [18]:
batch_size=250
resume=0
nepochs=1200
schedule=[1000]
nbatches = user_tr_len//batch_size

gamma=.5
opt = 'sgd'
fed_lr=0.5
criterion=nn.CrossEntropyLoss()
use_cuda = torch.cuda.is_available()

aggregation='mkrum'
multi_k = False
candidates = []

at_type='min-sum'
dev_type ='std'
z=0
n_attackers=[10]

arch='alexnet'
chkpt='./'+aggregation

for n_attacker in n_attackers:
    candidates = []

    epoch_num = 0
    best_global_acc = 0
    best_global_te_acc = 0

    fed_model, _ = return_model(arch, 0.1, 0.9, parallel=False)
    optimizer_fed = SGD(fed_model.parameters(), lr=fed_lr)

    torch.cuda.empty_cache()
    r=np.arange(user_tr_len)

    while epoch_num <= nepochs:
        user_grads=[]
        if not epoch_num and epoch_num%nbatches == 0:
            np.random.shuffle(r)
            for i in range(nusers):
                user_tr_data_tensors[i]=user_tr_data_tensors[i][r]
                user_tr_label_tensors[i]=user_tr_label_tensors[i][r]

        for i in range(n_attacker, nusers):

            inputs = user_tr_data_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]
            targets = user_tr_label_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]

            inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

            outputs = fed_model(inputs)
            loss = criterion(outputs, targets)
            fed_model.zero_grad()
            loss.backward(retain_graph=True)

            param_grad=[]
            for param in fed_model.parameters():
                param_grad=param.grad.data.view(-1) if not len(param_grad) else torch.cat((param_grad,param.grad.view(-1)))

            user_grads=param_grad[None, :] if len(user_grads)==0 else torch.cat((user_grads,param_grad[None,:]), 0)

        malicious_grads = user_grads

        if epoch_num in schedule:
            for param_group in optimizer_fed.param_groups:
                param_group['lr'] *= gamma
                print('New learnin rate ', param_group['lr'])

        if n_attacker > 0:
            if at_type == 'lie':
                malicious_grads = get_malicious_updates_lie(malicious_grads, n_attacker, z, epoch_num)
            elif at_type == 'fang':
                agg_grads = torch.mean(malicious_grads, 0)
                deviation = torch.sign(agg_grads)
                malicious_grads = get_malicious_updates_fang(malicious_grads, agg_grads, deviation, n_attacker)
            elif at_type == 'our-agr':
                agg_grads = torch.mean(malicious_grads, 0)
                mal_update = our_attack_median(malicious_grads, agg_grads, n_attacker, dev_type)
            elif at_type == 'min-max':
                agg_grads = torch.mean(malicious_grads, 0)
                mal_update = our_attack_dist(malicious_grads, agg_grads, n_attacker, dev_type)
            elif at_type == 'min-sum':
                agg_grads = torch.mean(malicious_grads, 0)
                mal_update = our_attack_score(malicious_grads, agg_grads, n_attacker, dev_type)

            mal_updates = torch.stack([mal_update] * n_attacker)
            malicious_grads = torch.cat((mal_updates, user_grads), 0)

        if epoch_num==0: print('malicious_grads shape ', malicious_grads.shape)

        if aggregation=='median':
            agg_grads=torch.median(malicious_grads,dim=0)[0]

        elif aggregation=='average':
            agg_grads=torch.mean(malicious_grads,dim=0)

        elif aggregation=='krum' or aggregation=='mkrum':
            multi_k = True if aggregation == 'mkrum' else False
            if epoch_num == 0: print('multi krum is ', multi_k)
            agg_grads, krum_candidate = multi_krum(malicious_grads, n_attacker, multi_k=multi_k)

        elif aggregation=='bulyan':
            agg_grads, krum_candidate=bulyan(malicious_grads, n_attacker)

        del user_grads

        start_idx=0

        optimizer_fed.zero_grad()

        model_grads=[]

        for i, param in enumerate(fed_model.parameters()):
            param_=agg_grads[start_idx:start_idx+len(param.data.view(-1))].reshape(param.data.shape)
            start_idx=start_idx+len(param.data.view(-1))
            param_=param_.cuda()
            model_grads.append(param_)

        optimizer_fed.step(model_grads)

        val_loss, val_acc = test(val_data_tensor,val_label_tensor,fed_model,criterion,use_cuda)
        te_loss, te_acc = test(te_data_tensor,te_label_tensor, fed_model, criterion, use_cuda)

        is_best = best_global_acc < val_acc

        best_global_acc = max(best_global_acc, val_acc)

        if is_best:
            best_global_te_acc = te_acc

        if epoch_num%1==0 or epoch_num==nepochs-1:
            print('%s: at %s n_at %d n_mal_sel %d e %d | val loss %.4f val acc %.4f best val_acc %f'%(aggregation, at_type, n_attacker, np.sum(krum_candidate < n_attacker), epoch_num, val_loss, val_acc, best_global_acc))

        if val_loss > 1000:
            print('val loss %f too high'%val_loss)
            break
            
        epoch_num+=1

malicious_grads shape  torch.Size([50, 2472266])
multi krum is  True
mkrum: at min-sum n_at 10 n_mal_sel 10 e 0 | val loss 2.3025 val acc 10.1461 best val_acc 10.146104
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1 | val loss 2.3021 val acc 10.0852 best val_acc 10.146104
mkrum: at min-sum n_at 10 n_mal_sel 10 e 2 | val loss 2.3016 val acc 10.0446 best val_acc 10.146104
mkrum: at min-sum n_at 10 n_mal_sel 10 e 3 | val loss 2.3011 val acc 10.0446 best val_acc 10.146104
mkrum: at min-sum n_at 10 n_mal_sel 10 e 4 | val loss 2.3005 val acc 10.1055 best val_acc 10.146104
mkrum: at min-sum n_at 10 n_mal_sel 10 e 5 | val loss 2.2998 val acc 10.1258 best val_acc 10.146104
mkrum: at min-sum n_at 10 n_mal_sel 10 e 6 | val loss 2.2991 val acc 10.0446 best val_acc 10.146104
mkrum: at min-sum n_at 10 n_mal_sel 10 e 7 | val loss 2.2982 val acc 10.0446 best val_acc 10.146104
mkrum: at min-sum n_at 10 n_mal_sel 10 e 8 | val loss 2.2971 val acc 10.6737 best val_acc 10.673701
mkrum: at min-sum n_at 10 n_mal

mkrum: at min-sum n_at 10 n_mal_sel 10 e 81 | val loss 2.2916 val acc 13.2711 best val_acc 24.228896
mkrum: at min-sum n_at 10 n_mal_sel 10 e 82 | val loss 2.1371 val acc 22.5852 best val_acc 24.228896
mkrum: at min-sum n_at 10 n_mal_sel 10 e 83 | val loss 2.0877 val acc 21.4894 best val_acc 24.228896
mkrum: at min-sum n_at 10 n_mal_sel 10 e 84 | val loss 2.0406 val acc 25.9943 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 85 | val loss 2.0998 val acc 20.2516 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 86 | val loss 2.6131 val acc 13.9610 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 87 | val loss 2.7887 val acc 9.7200 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 88 | val loss 2.2987 val acc 11.7898 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 89 | val loss 2.2904 val acc 14.6510 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 90 | val loss 2.2840 val acc 14.8539 best val_acc 2

mkrum: at min-sum n_at 10 n_mal_sel 10 e 162 | val loss 2.2970 val acc 10.2070 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 163 | val loss 2.2902 val acc 10.2881 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 164 | val loss 2.2814 val acc 10.5722 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 165 | val loss 2.2694 val acc 9.9026 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 166 | val loss 2.2535 val acc 11.4651 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 167 | val loss 2.2310 val acc 14.6307 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 168 | val loss 2.2003 val acc 16.5787 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 169 | val loss 2.1689 val acc 18.3239 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 170 | val loss 2.1684 val acc 17.4919 best val_acc 25.994318
mkrum: at min-sum n_at 10 n_mal_sel 10 e 171 | val loss 2.3856 val acc 15.7670 best

mkrum: at min-sum n_at 10 n_mal_sel 10 e 243 | val loss 1.9798 val acc 22.8896 best val_acc 26.684253
mkrum: at min-sum n_at 10 n_mal_sel 10 e 244 | val loss 2.0215 val acc 24.9594 best val_acc 26.684253
mkrum: at min-sum n_at 10 n_mal_sel 10 e 245 | val loss 2.0821 val acc 21.2865 best val_acc 26.684253
mkrum: at min-sum n_at 10 n_mal_sel 10 e 246 | val loss 2.1967 val acc 20.3328 best val_acc 26.684253
mkrum: at min-sum n_at 10 n_mal_sel 10 e 247 | val loss 2.0693 val acc 23.2143 best val_acc 26.684253
mkrum: at min-sum n_at 10 n_mal_sel 10 e 248 | val loss 1.9994 val acc 22.9099 best val_acc 26.684253
mkrum: at min-sum n_at 10 n_mal_sel 10 e 249 | val loss 1.9237 val acc 25.1218 best val_acc 26.684253
mkrum: at min-sum n_at 10 n_mal_sel 10 e 250 | val loss 1.9400 val acc 25.8117 best val_acc 26.684253
mkrum: at min-sum n_at 10 n_mal_sel 10 e 251 | val loss 2.1131 val acc 23.1128 best val_acc 26.684253
mkrum: at min-sum n_at 10 n_mal_sel 10 e 252 | val loss 2.0855 val acc 19.1153 bes

mkrum: at min-sum n_at 10 n_mal_sel 10 e 324 | val loss 2.3513 val acc 20.5154 best val_acc 30.722403
mkrum: at min-sum n_at 10 n_mal_sel 10 e 325 | val loss 2.2781 val acc 15.1989 best val_acc 30.722403
mkrum: at min-sum n_at 10 n_mal_sel 10 e 326 | val loss 2.1132 val acc 22.6664 best val_acc 30.722403
mkrum: at min-sum n_at 10 n_mal_sel 10 e 327 | val loss 2.1557 val acc 20.9821 best val_acc 30.722403
mkrum: at min-sum n_at 10 n_mal_sel 10 e 328 | val loss 2.1963 val acc 17.2484 best val_acc 30.722403
mkrum: at min-sum n_at 10 n_mal_sel 10 e 329 | val loss 2.0986 val acc 17.2890 best val_acc 30.722403
mkrum: at min-sum n_at 10 n_mal_sel 10 e 330 | val loss 2.0307 val acc 17.8774 best val_acc 30.722403
mkrum: at min-sum n_at 10 n_mal_sel 10 e 331 | val loss 2.0320 val acc 17.1672 best val_acc 30.722403
mkrum: at min-sum n_at 10 n_mal_sel 10 e 332 | val loss 2.8194 val acc 10.7143 best val_acc 30.722403
mkrum: at min-sum n_at 10 n_mal_sel 10 e 333 | val loss 2.5379 val acc 9.9635 best

mkrum: at min-sum n_at 10 n_mal_sel 10 e 405 | val loss 2.0481 val acc 23.4172 best val_acc 33.441558
mkrum: at min-sum n_at 10 n_mal_sel 10 e 406 | val loss 1.9719 val acc 26.2378 best val_acc 33.441558
mkrum: at min-sum n_at 10 n_mal_sel 10 e 407 | val loss 1.9582 val acc 25.3044 best val_acc 33.441558
mkrum: at min-sum n_at 10 n_mal_sel 10 e 408 | val loss 2.0875 val acc 19.4805 best val_acc 33.441558
mkrum: at min-sum n_at 10 n_mal_sel 10 e 409 | val loss 1.9418 val acc 31.9602 best val_acc 33.441558
mkrum: at min-sum n_at 10 n_mal_sel 10 e 410 | val loss 1.8864 val acc 26.6640 best val_acc 33.441558
mkrum: at min-sum n_at 10 n_mal_sel 10 e 411 | val loss 1.8510 val acc 31.0471 best val_acc 33.441558
mkrum: at min-sum n_at 10 n_mal_sel 10 e 412 | val loss 1.8908 val acc 27.6177 best val_acc 33.441558
mkrum: at min-sum n_at 10 n_mal_sel 10 e 413 | val loss 2.0254 val acc 26.9075 best val_acc 33.441558
mkrum: at min-sum n_at 10 n_mal_sel 10 e 414 | val loss 2.0500 val acc 24.4115 bes

mkrum: at min-sum n_at 10 n_mal_sel 10 e 486 | val loss 1.9942 val acc 19.6023 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 487 | val loss 2.1648 val acc 15.0365 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 488 | val loss 2.2908 val acc 18.5877 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 489 | val loss 2.3022 val acc 9.8417 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 490 | val loss 2.2166 val acc 13.3929 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 491 | val loss 2.1150 val acc 19.4196 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 492 | val loss 2.0672 val acc 19.9067 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 493 | val loss 2.0416 val acc 20.0284 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 494 | val loss 2.0299 val acc 20.1299 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 495 | val loss 2.0522 val acc 21.0430 best

mkrum: at min-sum n_at 10 n_mal_sel 10 e 567 | val loss 1.8706 val acc 26.3190 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 568 | val loss 1.8382 val acc 32.9140 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 569 | val loss 1.9336 val acc 27.5162 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 570 | val loss 1.9676 val acc 25.4261 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 571 | val loss 2.0190 val acc 27.8003 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 572 | val loss 1.9116 val acc 33.0357 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 573 | val loss 1.8402 val acc 31.7167 best val_acc 33.745942
mkrum: at min-sum n_at 10 n_mal_sel 10 e 574 | val loss 1.8341 val acc 33.9286 best val_acc 33.928571
mkrum: at min-sum n_at 10 n_mal_sel 10 e 575 | val loss 1.8081 val acc 31.7979 best val_acc 33.928571
mkrum: at min-sum n_at 10 n_mal_sel 10 e 576 | val loss 2.0589 val acc 23.0519 bes

mkrum: at min-sum n_at 10 n_mal_sel 10 e 648 | val loss 1.8320 val acc 29.9107 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 649 | val loss 1.9483 val acc 23.3766 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 650 | val loss 1.7795 val acc 32.5487 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 651 | val loss 1.7745 val acc 32.7922 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 652 | val loss 1.9201 val acc 28.6120 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 653 | val loss 2.0328 val acc 26.4407 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 654 | val loss 1.9941 val acc 25.8320 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 655 | val loss 2.0689 val acc 27.0495 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 656 | val loss 2.1487 val acc 21.6112 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 657 | val loss 2.2766 val acc 17.5122 bes

mkrum: at min-sum n_at 10 n_mal_sel 10 e 729 | val loss 2.0762 val acc 26.7451 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 730 | val loss 2.0312 val acc 25.9334 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 731 | val loss 1.9700 val acc 26.4610 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 732 | val loss 1.9548 val acc 29.4034 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 733 | val loss 1.8957 val acc 27.6583 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 734 | val loss 1.8167 val acc 32.7313 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 735 | val loss 1.7922 val acc 31.3718 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 736 | val loss 1.7286 val acc 34.5170 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 737 | val loss 1.8008 val acc 29.5455 best val_acc 35.633117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 738 | val loss 1.8985 val acc 29.5049 bes

mkrum: at min-sum n_at 10 n_mal_sel 10 e 810 | val loss 1.7010 val acc 38.7581 best val_acc 38.758117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 811 | val loss 1.9804 val acc 28.9773 best val_acc 38.758117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 812 | val loss 2.4430 val acc 19.1964 best val_acc 38.758117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 813 | val loss 2.3490 val acc 19.8458 best val_acc 38.758117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 814 | val loss 2.2118 val acc 21.1242 best val_acc 38.758117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 815 | val loss 1.8611 val acc 30.8644 best val_acc 38.758117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 816 | val loss 1.8736 val acc 31.5544 best val_acc 38.758117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 817 | val loss 1.9047 val acc 28.9164 best val_acc 38.758117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 818 | val loss 1.9690 val acc 28.9773 best val_acc 38.758117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 819 | val loss 1.9245 val acc 26.2378 bes

mkrum: at min-sum n_at 10 n_mal_sel 10 e 891 | val loss 1.9209 val acc 27.4959 best val_acc 38.758117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 892 | val loss 1.9380 val acc 26.1567 best val_acc 38.758117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 893 | val loss 1.8503 val acc 32.2443 best val_acc 38.758117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 894 | val loss 1.7154 val acc 37.9667 best val_acc 38.758117
mkrum: at min-sum n_at 10 n_mal_sel 10 e 895 | val loss 1.6175 val acc 41.1526 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 896 | val loss 1.6690 val acc 39.5495 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 897 | val loss 2.1988 val acc 27.6177 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 898 | val loss 2.4976 val acc 19.8661 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 899 | val loss 4.0732 val acc 9.6388 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 900 | val loss 2.3521 val acc 14.5495 best

mkrum: at min-sum n_at 10 n_mal_sel 10 e 972 | val loss 2.7861 val acc 11.6883 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 973 | val loss 2.8059 val acc 10.5722 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 974 | val loss 2.1164 val acc 16.0308 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 975 | val loss 2.0189 val acc 22.8490 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 976 | val loss 1.9619 val acc 20.9010 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 977 | val loss 1.9163 val acc 24.1883 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 978 | val loss 1.8895 val acc 24.1274 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 979 | val loss 2.0312 val acc 21.9156 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 980 | val loss 2.5979 val acc 15.7873 best val_acc 41.152597
mkrum: at min-sum n_at 10 n_mal_sel 10 e 981 | val loss 2.6518 val acc 15.1380 bes

mkrum: at min-sum n_at 10 n_mal_sel 10 e 1052 | val loss 1.7765 val acc 34.4156 best val_acc 42.593344
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1053 | val loss 1.6147 val acc 38.3726 best val_acc 42.593344
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1054 | val loss 1.5393 val acc 42.9180 best val_acc 42.918019
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1055 | val loss 1.5310 val acc 42.7151 best val_acc 42.918019
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1056 | val loss 1.5619 val acc 41.7005 best val_acc 42.918019
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1057 | val loss 1.7354 val acc 36.2216 best val_acc 42.918019
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1058 | val loss 1.9919 val acc 30.6412 best val_acc 42.918019
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1059 | val loss 1.9751 val acc 27.4351 best val_acc 42.918019
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1060 | val loss 1.9127 val acc 29.5252 best val_acc 42.918019
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1061 | val loss 2.0051 val acc 2

mkrum: at min-sum n_at 10 n_mal_sel 10 e 1132 | val loss 1.7021 val acc 37.7841 best val_acc 44.602273
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1133 | val loss 1.6524 val acc 40.2192 best val_acc 44.602273
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1134 | val loss 1.5668 val acc 42.7557 best val_acc 44.602273
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1135 | val loss 1.5321 val acc 43.2427 best val_acc 44.602273
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1136 | val loss 1.6204 val acc 39.4075 best val_acc 44.602273
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1137 | val loss 1.9804 val acc 33.5024 best val_acc 44.602273
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1138 | val loss 1.9094 val acc 28.5308 best val_acc 44.602273
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1139 | val loss 1.8704 val acc 30.2963 best val_acc 44.602273
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1140 | val loss 1.7655 val acc 35.0649 best val_acc 44.602273
mkrum: at min-sum n_at 10 n_mal_sel 10 e 1141 | val loss 1.7010 val acc 3