In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.utils import save_image
from torchsummary import summary
from matplotlib import pyplot as plt

#from pushover import notify
from random import randint

from IPython.display import Image
from IPython.core.display import Image, display
import dataloader as dl
import model as m
import networks
from networks import LeNet, ClassificationNet
from testers import attack_test
from resnet import ResNet
import gmm as gmm
import parameters as p
import helper
import misc

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt


%load_ext autoreload
%autoreload 2

In [None]:
torch.cuda.set_device(1)

In [None]:
bs = 256
train_loader,test_loader,loader_list = misc.get_dataloaders("Lenet")

In [None]:
fixed_x, _ = next(iter(loader_list[0]))
save_image(fixed_x, 'real_image.png')

Image('real_image.png')


def in_top_k(targets, preds, k):
    topk = preds.topk(k,largest=False)[1]
    return (targets.unsqueeze(1) == topk).any(dim=1)


def cross_corr(centers):
    c = centers.view(-1,10*centers.size(1))
    corr =torch.matmul(c.T,c)
    loss = torch.norm(torch.triu(corr, diagonal=1, out=None))
    return 2*loss/corr.size(0)

In [None]:
class Proximity(nn.Module):

    def __init__(self, num_classes=100, feat_dim=1024, use_gpu=True, margin = 0.0 ):
        super(Proximity, self).__init__()
        self.num_classes = num_classes
        self.feat_dim = feat_dim
        self.use_gpu = use_gpu
        self.device = torch.device("cuda:1")
        self.margin = margin

        if self.use_gpu:
            self.centers =  nn.Parameter(torch.randn(self.num_classes, self.feat_dim).cuda())
        else:
            self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim))

    def forward(self, x , labels):
        batch_size = x.size(0)
        distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
                  torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
        distmat.addmm_(1, -2, x, self.centers.t())

        classes = torch.arange(self.num_classes).long()
        if self.use_gpu: classes = classes.to(self.device)
        labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
        mask = labels.eq(classes.expand(batch_size, self.num_classes))
        d_y = distmat[mask.clone()]
        
        
        values, indices = torch.topk(distmat,2, dim=1, largest=False, sorted=True, out=None)
        d_1 = values[:,0]
        d_2 = values[:,1]
        
        indicators = in_top_k(labels,distmat,1)[:,0]
        con_indicators = ~ indicators.clone()
        
        d_c = d_2*indicators + d_1*con_indicators
        
        loss = F.relu((d_y-d_c)/(d_y+d_c) + self.margin)
        mean_loss = loss.mean()
        return mean_loss, torch.argmin(distmat,dim=1)
        

In [None]:
image_channels = fixed_x.size(1)

In [None]:
embedding_net = LeNet()
model = ClassificationNet(embedding_net, n_classes=p.n_classes).cuda()
gmm = gmm.GaussianMixturePrior(p.num_classes, network_weights=list(model.embedding_net.layers.parameters()), pi_zero=0.99).cuda()
                                       
criterion_prox_256 = Proximity(num_classes=10, feat_dim=256, use_gpu=True,margin=0.75)
criterion_prox_1024 = Proximity(num_classes=10, feat_dim=1024, use_gpu=True, margin=0.75)

In [None]:
optimizer_pre = torch.optim.Adam([{'params':model.parameters()}], lr=1e-3, weight_decay=1e-7)
#optimizer_post = torch.optim.Adam([{'params':model.parameters()},
#                                 {'params': gmm.means, 'lr': p.lr_mu},
#                                 {'params': gmm.gammas, 'lr': p.lr_gamma},
#                                 {'params': gmm.rhos, 'lr': p.lr_rho}], lr=p.lr_post)
optimizer_post = torch.optim.Adam([{'params':model.parameters()}], lr=5e-3, weight_decay=1e-7)
#optimizer_prox_1024 = torch.optim.SGD(criterion_prox_1024.parameters(), lr=0.1)
#optimizer_conprox_1024 = torch.optim.SGD(criterion_conprox_1024.parameters(), lr=0.0001)
                                         
                                         
optimizer_prox_256 = torch.optim.SGD(criterion_prox_256.parameters(), lr=0.01)
optimizer_prox_1024 = torch.optim.SGD(criterion_prox_1024.parameters(), lr=0.01)


criterion =  nn.CrossEntropyLoss()

In [None]:
!rm -rfr reconstructed
!rm -rfr softmaxreconstructed
!rm -rfr figs
!mkdir reconstructed
!mkdir softmaxreconstructed
!mkdir figs

In [None]:
epochs_0 = 50
epochs_1 = 60

In [None]:
import time
import pandas as pd
import matplotlib.patheffects as PathEffects
%matplotlib inline
import seaborn as sns
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

sns.set_style('darkgrid')
sns.set_palette('muted')
sns.set_context("notebook", font_scale=1.5,
                rc={"lines.linewidth": 2.5})
RS = 123

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA


colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728',
              '#9467bd', '#8c564b', '#e377c2', '#7f7f7f',
              '#bcbd22', '#17becf']

mnist_classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

def t_sne_gen(data):
    fashion_tsne = TSNE(random_state=RS).fit_transform(data.numpy())
    #fashion_pca = PCA(n_components=2, svd_solver='full').fit(data.numpy())
    #x = fashion_pca.transform(data.numpy())
    return fashion_tsne


def fashion_scatter(x, colors,name,folder):
    # choose a color palette with seaborn.
    num_classes = len(np.unique(colors))
    palette = np.array(sns.color_palette("hls", num_classes))

    # create a scatter plot.
    f = plt.figure(figsize=(8, 8))
    ax = plt.subplot(aspect='equal')
    sc = ax.scatter(x[:,0], x[:,1], lw=0, s=40, c=palette[colors.astype(np.int)])
    plt.title(name)
    plt.xlim(-25, 25)
    plt.ylim(-25, 25)
    ax.axis('off')
    ax.axis('tight')

    # add the labels for each digit corresponding to the label
    txts = []

    for i in range(num_classes):

        # Position of each label at median of data points.

        xtext, ytext = np.median(x[colors == i, :], axis=0)
        txt = ax.text(xtext, ytext, str(i), fontsize=24)
        txt.set_path_effects([
            PathEffects.Stroke(linewidth=5, foreground="w"),
            PathEffects.Normal()])
        txts.append(txt)
   
    plt.savefig(folder+name+'.png')

    return f, ax, sc, txts


def plot_embeddings(embeddings, targets, xlim=None, ylim=None):
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111, projection='3d')
    for i in range(10):
        #ax = fig.add_subplot(111, projection='3d')
        inds = np.where(targets==i)[0]
        ax.scatter(embeddings[inds,0], embeddings[inds,1], embeddings[inds,2], alpha=0.5, color=colors[i])
    if xlim:
        plt.xlim(xlim[0], xlim[1])
    if ylim:
        plt.ylim(ylim[0], ylim[1])
    plt.legend(mnist_classes)

def extract_embeddings(dataloader, model, pretrain):
    with torch.no_grad():
        model.eval()
        embeddings_1 = np.zeros((len(dataloader.dataset), networks.vis_size))
        embeddings_2 = np.zeros((len(dataloader.dataset), networks.vis_size))
        labels = np.zeros(len(dataloader.dataset))
        k = 0
        for images, target in dataloader:
            
            images = images.cuda()
            emb_1, emb_2= model.get_embedding(images, pretrain)
            emb_1, emb_2 = emb_1.cpu(), emb_2.cpu()
            embeddings_1[k:k+len(images)] = emb_1
            embeddings_2[k:k+len(images)] = emb_2
            labels[k:k+len(images)] = target.numpy()
            k += len(images)
    return embeddings_1, embeddings_2, labels





In [None]:
import copy

correct =0
num_example =0
test_loss_bce=0
test_correct=0
test_num_example =0
for epoch in range(epochs_0):
    model.train()
    for idx, (images, target) in enumerate(train_loader):
        images, target= images.cuda(), target.cuda()
        out, rep_1, rep_2 = model(images, test= False)
        loss_bce = criterion(out,target)
        #loss_prox_1024 = criterion_prox_1024(rep_1, target) 
        #loss_conprox_1024 = criterion_conprox_1024(rep_1, target) 
        #loss_prox_256 = criterion_prox_256(rep_2, target) 
        #loss_conprox_256= criterion_conprox_256(rep_2, target) 
        loss = loss_bce #+ loss_prox_1024 + loss_prox_256 - loss_conprox_1024*0.0001 - loss_conprox_256*0.0001
        preds = out.data.max(1, keepdim=True)[1]
        correct += preds.eq(target.data.view_as(preds)).sum()
        num_example += len(target)
        optimizer_pre.zero_grad()
        loss.backward()
        optimizer_pre.step()
        
        to_print = "Epoch[{}/{}] Loss: {:.3f}  Accuracy:  {}".format(epoch+1,epochs_0, loss.item()/bs, correct.item()/num_example)
        
        
        if idx % 500 == 0:
            print(to_print)
            
            
            
    model.eval()
    
    with torch.no_grad():
        for images, target in test_loader:
            images, target = images.cuda(), target.cuda()
            out, rep_1, rep_2= model(images, test=False)
            loss_bce = criterion(out,target)
            preds = out.data.max(1, keepdim=True)[1]
            test_correct += preds.eq(target.data.view_as(preds)).sum()
            test_num_example += len(target)
            test_loss_bce+=loss_bce.item()
            
            
            
    test_loss_bce /= len(test_loader.dataset)
    print( "test_Loss: {:.3f} Test accuracy: {}".format( test_loss_bce, test_correct.item()/test_num_example))
    if epoch %10==0:
        val_embeddings_1, val_embeddings_2, val_labels_baseline = extract_embeddings(test_loader, model,False)
        plot_embeddings(val_embeddings_1, val_labels_baseline) 
        plot_embeddings(val_embeddings_2, val_labels_baseline)    
        #fashion_scatter(t_sne_gen(rep_2.cpu()), target.cpu().numpy(),"Clean_data: "+"VAE_"+str(epoch)+"softmax_rep2","./softmaxreconstructed/")  
        #fashion_scatter(t_sne_gen(rep_1.cpu()), target.cpu().numpy(),"Clean_data: "+"VAE_"+str(epoch)+"softmax_rep1","./softmaxreconstructed/")
        attack_test(model, test_loader, nn.CrossEntropyLoss() )

In [None]:
import copy
correct =0
num_example =0
test_loss_bce=0
test_correct=0
test_num_example =0
pre_wts= copy.deepcopy(list(model.embedding_net.layers.parameters()))
for epoch in range(epochs_1):
    model.train()
    for idx, (images, target) in enumerate(train_loader):
        images, target= images.cuda(), target.cuda()
        out, rep_1, rep_2 = model(images,test=False)
        #loss_bce = criterion(out,target)
        loss_prox_1024, _ = criterion_prox_1024(rep_1, target) 
        loss_prox_256, preds = criterion_prox_256(rep_2, target) 
        loss = loss_prox_256 + loss_prox_1024 + 0.1 * cross_corr(criterion_prox_256.centers)
        #preds = out.data.max(1, keepdim=True)[1]
        correct += preds.eq(target.data.view_as(preds)).sum()
        num_example += len(target)
        optimizer_post.zero_grad()
        optimizer_prox_1024.zero_grad() 
        optimizer_prox_256.zero_grad() 
        loss.backward()
        optimizer_post.step()
      
        for param in criterion_prox_256.parameters():
            param.grad.data *= (1. /1)
        optimizer_prox_256.step()
        
        
        for param in criterion_prox_1024.parameters():
            param.grad.data *= (1. /1)
        optimizer_prox_256.step()
        
    
        to_print = "Epoch[{}/{}] Loss: {:.3f}  Accuracy:  {}".format(epoch+1,epochs_1, loss.item()/bs, correct.item()/num_example)
        
        
        if idx % 500 == 0:
            print(to_print)
            
    #helper.plot_histogram(epoch,idx, pre_wts, list(model.embedding_net.layers.parameters()), list(gmm.parameters()), correct.item()/num_example,"./figs/")    
            
    model.eval()
    
    with torch.no_grad():
        for images, target in test_loader:
            images, target = images.cuda(), target.cuda()
            out, rep_1, rep_2= model(images, test=False)
            loss_bce = criterion(out,target)
            loss_prox_256, preds = criterion_prox_256(rep_2, target) 
            #preds = out.data.max(1, keepdim=True)[1]
            test_correct += preds.eq(target.data.view_as(preds)).sum()
            test_num_example += len(target)
            test_loss_bce+=loss_bce.item()
            
            
            
    test_loss_bce /= len(test_loader.dataset)
    print( "test_Loss: {:.3f} Test accuracy: {}".format( test_loss_bce, test_correct.item()/test_num_example))
    
            
    if epoch %10==0:
        val_embeddings_1, val_embeddings_2, val_labels_baseline = extract_embeddings(test_loader, model,True)
        plot_embeddings(val_embeddings_1, val_labels_baseline) 
        plot_embeddings(val_embeddings_2, val_labels_baseline)    
        #fashion_scatter(t_sne_gen(rep_2.cpu()), target.cpu().numpy(),"Clean_data: "+"VAE_"+str(epoch)+"softmax_rep2","./softmaxreconstructed/")  
        #fashion_scatter(t_sne_gen(rep_1.cpu()), target.cpu().numpy(),"Clean_data: "+"VAE_"+str(epoch)+"softmax_rep1","./softmaxreconstructed/")
        attack_test(model, test_loader, nn.CrossEntropyLoss() )

In [1]:

import torch
import loss_fn

In [2]:
x = torch.randn(32,64)

In [3]:
y = torch.randint(0,10,(32,))

In [4]:
centers = torch.randn(10,64)

In [5]:
distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(32,10) + torch.pow(centers, 2).sum(dim=1, keepdim=True).expand(10, 32).t()
distmat.addmm_(1, -2, x, centers.t())

	addmm_(Number beta, Number alpha, Tensor mat1, Tensor mat2)
Consider using one of the following signatures instead:
	addmm_(Tensor mat1, Tensor mat2, *, Number beta, Number alpha) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:766.)
  distmat.addmm_(1, -2, x, centers.t())


tensor([[123.4102, 143.3020, 156.8284, 110.1828, 126.2942, 132.7550, 111.2098,
         124.4898, 103.6460, 116.6430],
        [121.6430, 131.7764, 132.5055, 103.9737, 125.6361, 124.4557, 135.0880,
         104.8350, 122.7714, 162.5762],
        [119.0500, 113.2463, 109.8876,  85.4301, 104.2258, 135.8229, 119.5961,
         104.2679,  97.2643, 110.8201],
        [120.0528, 116.0704, 113.9804,  82.1713, 127.1796, 119.0071, 139.8723,
         121.6481,  98.2589, 154.8297],
        [118.7614, 130.8418, 138.5307,  89.2935, 144.2542, 141.4307, 114.3689,
         125.0864, 101.4656, 175.1440],
        [120.8643, 110.4047, 132.6652,  94.4043, 129.6920, 122.6486, 137.9121,
         111.5099, 126.2253, 155.8465],
        [118.5902, 129.5552, 128.4425, 101.3906, 113.5115, 102.6424, 128.7205,
         104.5829, 108.3017, 150.3145],
        [113.0652, 135.6908, 140.5896, 121.8736, 137.1870, 148.4487, 177.1949,
         113.3142, 143.8784, 162.3724],
        [141.4132, 145.1192, 159.5695,  93.8768,

In [6]:
values, indices = torch.topk(distmat,2, dim=1, largest=False, sorted=True, out=None)
d_1 = values[:,0]
d_2 = values[:,1]

In [7]:
a=indices[:,0].numpy()

In [8]:
b=indices[:,1].numpy()

In [9]:
l=[tuple(a) for a in zip(a,b)]


In [10]:
s = set(l)

In [11]:
s

{(0, 3),
 (0, 7),
 (0, 8),
 (3, 1),
 (3, 2),
 (3, 4),
 (3, 5),
 (3, 6),
 (3, 7),
 (3, 8),
 (5, 3),
 (5, 8),
 (7, 0),
 (8, 3)}

In [27]:
x[y==9].shape

torch.Size([1, 64])

In [2]:
import pandas as pd
import joblib

In [3]:
study = joblib.load("./results/fmnist_optuna_bce.pkl")
print("Best trial until now:")
print(" Value: ", study.best_trial.value)
print(" Params: ")
for key, value in study.best_trial.params.items():
    print(f"    {key}: {value}")

Best trial until now:
 Value:  94.30999755859375
 Params: 
    post_epoch: 80
    lr: 0.0005492907019496496
    weight: 4.118563821233173e-05
    lr_64: 0.0005125059487844361
    lr_512: 0.00018328013361443643
    margin_64: 0.8733892182380124
    margin_512: 0.1392219583376752
    sl_64: 0.23824426328723722
    sl_512: 0.0442688718927673
    lamda_64: 0.040272312116240425
    lamda_512: 0.007846737960864792
