In [1]:
#!/usr/bin/env python
# coding: utf-8

import itertools

import torch
import torch.nn as nn
import torchvision
import torchvision.utils as vutils
import torchvision.transforms as transforms
import torchvision.models as models
from torch import optim
from torch.utils.data import DataLoader

from models import GeneratorA2B
from models import GeneratorB2A
from models import DiscriminatorA

from utils import train_al
from utils import visualize_fake_C, visualize_p
from utils import weights_init_normal
from utils import weights_init
from utils import LambdaLR

# import different loss functions for GAN B
from geomloss import SamplesLoss

import os
os.makedirs('mnist_models', exist_ok=True)
import numpy as np

# hyper-parameters
# device        = torch.device("cuda" if cuda else "cpu")
transform     = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize((0.5,), (0.5,))])
trainset_A    = torchvision.datasets.MNIST(root="./datasets",train=True, transform=transform, download=True)
testset_A     = torchvision.datasets.MNIST(root="./datasets",train=False, transform=transform, download=True)
missing_label = []
present_label = list(range(10))
all_label     = present_label + missing_label
# classes       = trainset_A.classes
# idxs          = torch.where(torch.Tensor([x in present_label for x in trainset_A.targets]))[0] 
# idxs_         = torch.where(torch.Tensor([x in all_label for x in testset_A.targets]))[0]
# train_data    = torch.utils.data.Subset(trainset_A, idxs)
# test_data     = torch.utils.data.Subset(testset_A, idxs_)

# train_loader  = DataLoader(train_data, batch_size=batch_size, shuffle=True)
# test_loader   = DataLoader(test_data, batch_size=batch_size, shuffle=False)


In [3]:
from sklearn.neighbors import KernelDensity
from sklearn.manifold import TSNE
kdes = []
for i in range(len(present_label)):
    if torch.is_tensor(trainset_A.targets):
        idxs = torch.where(trainset_A.targets == i)[0] 
    else:
        idxs = torch.where(torch.Tensor(trainset_A.targets) == i)[0] 
    x_subset = trainset_A.data[idxs].view(-1, 28*28)
#     train_data = torch.utils.data.Subset(trainset_A, idxs)
#     x_subset = train_data.dataset.data.view(-1,28*28)
    tsne = TSNE(random_state = 42, n_components=2,verbose=0, perplexity=40, n_iter=300).fit_transform(x_subset)
    kde = KernelDensity(kernel='gaussian', bandwidth=1)
    kde.fit(tsne)
    kdes.append(kde)
    print(i)

0
1
2
3
4
5
6
7
8
9


In [12]:
test_x = testset_A.data.view(-1, 28*28)
test_tsne = TSNE(random_state = 42, n_components=2,verbose=0, perplexity=40, n_iter=300).fit_transform(test_x)
# dens_class and store p-values for each class 
dens_classes = np.zeros((len(present_label), len(testset_A.targets)))
for lab in all_label:   
    kde = kdes[lab]
    log_p = kde.score_samples(test_tsne)
    p = np.exp(log_p)
    # calculate the p-value and put it in the corresponding list
    dens_classes[lab, :] = p
    ## logger
    print('Finished Label {}'.format(lab))

Finished Label 0
Finished Label 1
Finished Label 2
Finished Label 3
Finished Label 4
Finished Label 5
Finished Label 6
Finished Label 7
Finished Label 8
Finished Label 9


In [29]:
cover_accs = []
avg_sizes = []

In [30]:
cover = torch.zeros(len(all_label))
size = torch.zeros(len(all_label))
count = torch.zeros(len(all_label))
for i in range(len(testset_A.targets)):
    dens = dens_classes[:,i]
    lab = testset_A.targets[i].item()
    ## sort the p value list and get the corresponding indicies
    sorted = -np.sort(-dens)
    indicies = np.argsort(-dens)
    if sorted[0] == 0:
        p_set = np.array([])
    else:
        ## find the minimum index when the coverage first exceeds 1-alpha
        idx = np.argmax(np.cumsum(sorted) / np.sum(sorted) > 0.95)
        p_set = indicies[:idx + 1]
        size[lab] += len(p_set)
    if lab in missing_label:
        if len(p_set) == 0:
            cover[lab] += 1
    else:
        if lab in p_set:
            cover[lab] += 1
    count[lab] += 1

cover_acc = torch.div(cover, count)
avg_size = torch.div(size, count)
cover_accs.append(cover_acc)
avg_sizes.append(avg_size)

In [31]:
cover = torch.zeros(len(all_label))
size = torch.zeros(len(all_label))
count = torch.zeros(len(all_label))
for i in range(len(testset_A.targets)):
    dens = dens_classes[:,i]
    lab = testset_A.targets[i].item()
    ## sort the p value list and get the corresponding indicies
    sorted = -np.sort(-dens)
    sorted_norm = sorted / np.sum(sorted)
    indicies = np.argsort(-dens)
    if sorted[0] == 0:
        p_set = np.array([])
    else:
        ## find the minimum index when the coverage first exceeds 1-alpha
        sorted_norm_cumsum = np.cumsum(sorted_norm)
        idx = np.argmax(sorted_norm_cumsum > 0.95)
        if idx != 0:
            gamma = (0.95 - sorted_norm_cumsum[idx - 1]) / sorted_norm[idx]
        else: 
            gamma = 0.95 / sorted_norm[idx]
        if np.random.rand(1) < gamma:
            p_set = indicies[:idx + 1]
        else: 
            p_set = indicies[:idx]
        size[lab] += len(p_set)
    if lab in missing_label:
        if len(p_set) == 0:
            cover[lab] += 1
    else:
        if lab in p_set:
            cover[lab] += 1
    count[lab] += 1

cover_acc = torch.div(cover, count)
avg_size = torch.div(size, count)
cover_accs.append(cover_acc)
avg_sizes.append(avg_size)

In [32]:
cover_accs

[tensor([1.0000, 1.0000, 0.8052, 1.0000, 1.0000, 0.9854, 0.5929, 1.0000, 0.9938,
         0.9960]),
 tensor([0.9990, 0.9427, 0.6783, 0.9653, 1.0000, 0.8632, 0.4405, 0.9874, 0.9918,
         0.9921])]

In [33]:
avg_sizes

[tensor([9.1020, 7.9216, 9.7345, 9.9861, 9.7128, 9.9798, 7.7881, 9.2821, 9.9846,
         9.8652]),
 tensor([8.5541, 7.5084, 9.1037, 9.2515, 9.1324, 9.3240, 7.3361, 8.7519, 9.3542,
         9.1903])]