In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler, random_split
import matplotlib.pyplot as plt
import collections
import random
import numpy as np
from PIL import Image
from toolbox import *
cuda_device = torch.device("cuda:1")
from scipy.spatial.distance import pdist, squareform 
from model import *
from IPython.display import display


In [None]:
kwargs = {'dataset': datasets.MNIST}
trainset, trainloader = load_trainset(**kwargs)
testset, testloader =  load_testset(**kwargs)

In [None]:
val_set, train_set = random_split(trainset, [2000, len(trainset)-2000])

In [None]:
trainloader = torch.utils.data.DataLoader(train_set, batch_size=100,
                                          shuffle=True, num_workers=0)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=100,
                                         shuffle=False, num_workers=10)

In [None]:
available_item = train_set.indices
labeled_trainset = L_set_MNIST([],[], transform_train)
net = Net().to(cuda_device)
net2 = Net().to(cuda_device)
optimizer = optim.Adam(params = net.parameters(),  lr = 0.0008)
optimizer2 = optim.Adam(params = net2.parameters(),  lr = 0.0008)
best_net_model = None
best_model_accuracy = 0
acquisition_size = 20
sub_sample_pool_size = 2000
test_result = []

In [None]:
dual_transform_train = transforms.Compose([
    transforms.RandomResizedCrop(size = 28, scale = (0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),
])

In [None]:
#index_list of the train_set
train_index_list = train_set.indices.clone().numpy()
#create the initial trainset with 20 images
sample_indexes = available_item[:20]
for items in sample_indexes: 
    labeled_trainset.update(trainset.data[items], trainset.targets[items])
train_index_list = np.setdiff1d(train_index_list,sample_indexes)
#Performs Active Learning for 50 step
for step in range(50):
    if step < 1:
        sample_indexes = available_item[(step*acquisition_size):((step+1)*acquisition_size)]
        for items in sample_indexes: 
            labeled_trainset.update(trainset.data[items], trainset.targets[items])
        #re_load_train_loader
        trainloader = torch.utils.data.DataLoader(labeled_trainset, batch_size=100, shuffle=True, num_workers=2)
        train_index_list = np.setdiff1d(train_index_list,sample_indexes)           
    else:    
        current_output = None
        last_output  = None
        for iteration in range(20):
            np.random.shuffle(train_index_list)
            #select a randomized sub-sample pool
            sample = trainset.data[train_index_list[:sub_sample_pool_size]]
            sample_transformed = torch.zeros([sub_sample_pool_size, 1, 28,28])

            for i in range(len(sample)):
                sample_transformed[i] = dual_transform_train(Image.fromarray(sample[i].numpy(), mode='L'))   
            with torch.no_grad():
                net.train()
                net2.train()
                net_o0= net((sample_transformed).to(cuda_device))
                net2_o0 = net2((sample_transformed).to(cuda_device))
                net_o0_copy= net((sample_transformed).to(cuda_device))
                net2_o0_copy = net2((sample_transformed).to(cuda_device))
                largest_distance = 0
                for samples in range(len(net_o0)):
                    #calculate distance
                    distance_val_1 = pdist(torch.cat((normalizer(net_o0[samples].cpu()).unsqueeze(0), normalizer(net2_o0[samples].cpu()).unsqueeze(0)), dim = 0))
                    distance_val_2 = pdist(torch.cat((normalizer(net_o0_copy[samples].cpu()).unsqueeze(0), normalizer(net2_o0_copy[samples].cpu()).unsqueeze(0)), dim = 0))
                    distance_val = distance_val_1+distance_val_2  
                    
                    if distance_val > largest_distance:
                        largest_distance = distance_val
                        temp_index = samples
                        largest_index = train_index_list[samples]
            train_index_list = np.setdiff1d(train_index_list, largest_index)
            labeled_trainset.update(trainset.data[largest_index], trainset.targets[largest_index])
    #train on the newly_accquire data       
    trainloader = torch.utils.data.DataLoader(labeled_trainset, batch_size=100, shuffle=True, num_workers=2)
    for i in range(30):
        train(1,net, optimizer, trainloader, cuda_device)
        train(1,net2, optimizer2, trainloader, cuda_device)
        best_model_accuracy, best_net_model = test_val(net, best_net_model, val_loader, best_model_accuracy, cuda_device) 
    net = load_network(net, best_net_model)
    test_result.append(test_test(step, net, testloader, cuda_device))

In [None]:
np.save("MNist_dual_val.npy", np.array(result_list))
np.save("MNist_dual_test.npy", np.array(test_result_list))    

In [None]:
def plot_dataset(data, save_id):
    a = collections.Counter(data)
    plt.bar(a.keys(), a.values(), 0.8)
    plt.ylabel('Number of instances per class')
    plt.grid(True)
    plt.xlabel('Class id')
    plt.ylim(top = 200)
    plt.show
    num = str(save_id)
    plt.savefig('Plot/distribution_' + num + '.eps', bbox_inches='tight') 

In [None]:
item_per_class = []
for item in labeled_trainset.labelset:
    item_per_class.append(item.item())

In [None]:
plot_dataset(temp_list1, "Mnist_dist_dual")

In [None]:
class_count = test_acc_per_class( net, testloader, cuda_device)
class_accuracy = [0 for i in range(10)]
for i in range(10):
    class_accuracy[i] = class_count[1][i]/class_count[0][i]

In [None]:
class_id = [i for i in range(10)]
plt.bar(class_id, class_accuracy  )
plt.ylabel('Test Accuracy')
plt.grid(True)
plt.ylim(top = 1)
plt.ylim(bottom = 0.8)
plt.xlabel('Class id')
plt.savefig('Plot/Mnist_dist_acc_dual.eps') 