In [1]:
import numpy as np
import pandas as pd
import torch
import torchvision
import matplotlib.pyplot as plt

In [2]:
print(torch.__version__)

1.3.1


In [3]:
import torchvision.datasets as datasets
from torchvision import transforms

In [4]:
#Obtain mnist dataset
mnist_train = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), target_transform=None, download=False)
mnist_test = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), target_transform=None, download=False)

In [5]:
#Dataloader of MNIST
dl_train = torch.utils.data.DataLoader(mnist_train)
dl_test = torch.utils.data.DataLoader(mnist_test)

#y_train = (dl_train.dataset.targets).to(dtype=torch.long)
#y_test = (dl_test.dataset.targets).to(dtype=torch.long)

#tensor_train = (dl_train.dataset.data).to(dtype=torch.float32)
#tensor_test = (dl_test.dataset.data).to(dtype=torch.float32)

#x_train = tensor_train.reshape(tensor_train.size(0),-1)
#x_train = x_train/255
#x_test = tensor_test.reshape(tensor_test.size(0),-1)
#x_test = x_test/255


In [40]:
#Uncertainty Sample Selection Criterias

#Least confidence criteria
#Return: indices of k selected samples, indices+prediction+lc value of k selected samples
#prediction: batch_size x class_size
def least_confidence(prediction,k):
    lc_value, lc_class = np.max(prediction, axis=1), np.argmax(prediction, axis=1)
    indices = list(range(len(prediction)))
    lc_i = np.column_stack((indices, lc_class, lc_value))
    # sort lc_i in ascending order
    lc_i = lc_i[lc_i[:, -1].argsort()]
    return lc_i[:k, 0].astype(np.int32), lc_i[:k]

#Margin sampling criteria
#Return: indices of k selected samples, indices+prediction+lc value of k selected samples
def margin_sampling(prediction, k):
     # Sort pred_prob to get j1 and j2
    size = len(prediction)
    margin = np.diff(np.abs(np.sort(prediction, axis=1)[:, ::-1][:, :2]))
    pred_class = np.argmax(prediction, axis=1)
    ms_i = np.column_stack((list(range(size)), pred_class, margin))

    # sort ms_i in ascending order according to margin
    ms_i = ms_i[ms_i[:, 2].argsort()]
    ms_i = ms_i[::-1]

    # the smaller the margin  means the classifier is more
    # uncertain about the sample
    return ms_i[:k, 0].astype(np.int32), ms_i[:k]

#Entropy criteria
#Return: indices of k selected samples, indices+prediction+lc value of k selected samples
def entropy(prediction, k):
    size = len(prediction)
    entropy_ = - np.nansum(prediction * np.log(prediction), axis=1)
    pred_class = np.argmax(prediction, axis=1)
    en_i = np.column_stack((list(range(size)), pred_class, entropy_))

    # Sort en_i in descending order
    en_i = en_i[(-1 * en_i[:, 2]).argsort()]
    return en_i[:k, 0].astype(np.int32), en_i[:k]


In [44]:
#Sampling Selection
def get_high_confidence_samples(pred_prob: np.ndarray,delta: float):
    en_, eni = entropy(prediction=pred_prob, k=len(pred_prob))
    hcs = eni[eni[:, 2] < delta]
    return hcs[:, 0].astype(np.int32), hcs[:, 1].astype(np.int32)


def get_uncertain_samples(pred_prob, k,criteria):
    if criteria == 'lc':
        return least_confidence(prediction=pred_prob, k=k)
    elif criteria == 'ms':
        return margin_sampling(prediction=pred_prob, k=k)
    elif criteria == 'en':
        return entropy(prediction=pred_prob, k=k)
    else:
        assert False

In [19]:
#Random prob to test above criterias
prob_dist = np.random.rand(50,5)
norm = np.reciprocal(np.reshape(np.sum(prob_dist,axis = 1),(50,1)))
norm = np.column_stack((norm,norm,norm,norm,norm))
prob_dist = np.multiply(prob_dist,norm)
print(prob_dist)

[[0.24500691 0.07347462 0.18415803 0.38929432 0.10806613]
 [0.32171023 0.30209066 0.10971362 0.15663874 0.10984675]
 [0.10261614 0.36607897 0.23751959 0.15135717 0.14242813]
 [0.11703409 0.25449621 0.1832989  0.22733727 0.21783353]
 [0.19570744 0.17986119 0.03029136 0.36621777 0.22792225]
 [0.09717249 0.25241655 0.22446338 0.36633715 0.05961043]
 [0.16395701 0.24382957 0.02912987 0.2731846  0.28989896]
 [0.06470066 0.47639205 0.24711201 0.12640636 0.08538892]
 [0.02096216 0.2980948  0.33747217 0.22399674 0.11947414]
 [0.161696   0.00629839 0.43690593 0.32226396 0.07283572]
 [0.12462174 0.34582474 0.19506928 0.22834826 0.10613598]
 [0.30540454 0.17916349 0.22071668 0.02395428 0.27076102]
 [0.16348177 0.24160319 0.22250624 0.24562583 0.12678297]
 [0.12811512 0.33490139 0.27550046 0.03736756 0.22411547]
 [0.00135463 0.34137036 0.37848586 0.0414964  0.23729276]
 [0.24565155 0.04505783 0.37391404 0.16640395 0.16897263]
 [0.12704753 0.36623161 0.1825556  0.08085007 0.24331518]
 [0.10877765 0

In [55]:
#Entropy test
ic_i, ic = entropy(prob_dist,5)
print(ic)

[[29.          2.          1.60763283]
 [12.          3.          1.58033155]
 [ 3.          1.          1.57907698]
 [44.          0.          1.57224293]
 [39.          1.          1.57024432]]


In [56]:
#Least Confidence test
ic_i, ic = least_confidence(prob_dist,5)
print(ic)

[[29.          2.          0.22014602]
 [44.          0.          0.2438765 ]
 [12.          3.          0.24562583]
 [ 3.          1.          0.25449621]
 [39.          1.          0.25683602]]


In [41]:
#Margin Sampling test
ic_i, ic = margin_sampling(prob_dist,5)
print(ic)

[[ 2.80000000e+01  3.00000000e+00 -5.33295342e-04]
 [ 3.70000000e+01  4.00000000e+00 -5.48794433e-04]
 [ 3.90000000e+01  1.00000000e+00 -2.38748067e-03]
 [ 3.50000000e+01  4.00000000e+00 -2.45739685e-03]
 [ 4.90000000e+01  4.00000000e+00 -3.61409148e-03]]


In [62]:
#Get high confidence samples test
en, en_i = get_high_confidence_samples(prob_dist,1.27)
print(en)
print(en_i)

[32  9 14 21 40]
[4 2 2 1 0]


In [None]:
from torch.utils.data.sampler import SubsetRandomSampler
random_seed = 123
validation_split = 0.1  
batch_size = 16
dataset_size = len(mnist_train)

indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))

train_indices, val_indices = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

dl_train = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, sampler=train_sampler)
dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, sampler=valid_sampler)

model = torchvision.models.resnet(pretrained=True, progress=True)
model.fc = torch.nn.Linear(in_features=512,out_features=10,bias=True)
criterion = torch.nn.CrossEntropyLoss()

In [77]:
for idx,(img,label) in enumerate(dl_train):
    optimizer.zero_grad()
    prob = model(img)
    loss = criterion(prob,label)
    loss.backward()
    optimizer.step()
    
    #print some bookkeeping

NameError: name 'optimizer' is not defined

In [None]:
def ceal_learning_algorithm(du: Dataset,
                            dl: Dataset,
                           k: int = 1000,
                           delta_0: float = 0.005,
                           dr: float = 0.00033,
                           t: int = 1,
                           epochs: int = 10,
                           criteria = str = 'lc',
                           max_iter:int = 45):
    