In [None]:
from matplotlib import pyplot as plt
import numpy as np
from time import time
import random
import os

In [None]:
import wandb

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torcheval.metrics import MulticlassPrecision
from torcheval.metrics.classification import MulticlassRecall
from torcheval.metrics import MulticlassF1Score

In [None]:
# set the amount of labels
label_num = 10
# set maximum separation setting to True or False
ms = True
# Adjust number of labels for maximum separation
if ms == True:
    label_num = label_num - 1

In [None]:
print(label_num)
torch.cuda.is_available()

In [None]:
# use GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
project_name = "SVHN"
run_name = "Maximum-Separation-Training"
wandb.init(project=project_name, name=run_name,settings=wandb.Settings(start_method='fork'))
wandb.define_metric("custom_step")
wandb.define_metric(
  "test/accuracy", step_metric="custom_step")
wandb.define_metric(
  "test_precision", step_metric="custom_step")
wandb.define_metric(
  "test_recall", step_metric="custom_step")
wandb.define_metric(
  "test_f1", step_metric="custom_step")
# wandb.config.update(args)


In [None]:
# import the CIFAR-10 dataset
#train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
#test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor())

# import the CIFAR-100 dataset
#train_set = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transforms.ToTensor())
#test_set = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transforms.ToTensor())

# import the SVHN dataset
train_set = torchvision.datasets.SVHN(root='./data', split='train', download=True, transform=transforms.ToTensor())
test_set = torchvision.datasets.SVHN(root='./data', split='test', download=True, transform=transforms.ToTensor())

In [None]:
import torch.nn as nn

class SimpleNet(nn.Module):
    def __init__(self, num_classes=label_num):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(32*32*3, 100) # Fully connected layer with 100 hidden neurons
        self.fc2 = nn.Linear(100, num_classes) # Fully connected layer with num_classes outputs

    def forward(self, x):
        x = x.view(-1, 32*32*3) # reshape the input tensor
        x = self.fc1(x)
        x = torch.relu(x)
        x = self.fc2(x)
        return x

In [None]:
"""resnet in pytorch

[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun.

    Deep Residual Learning for Image Recognition
    https://arxiv.org/abs/1512.03385v1
"""

import torch
import torch.nn as nn

class BasicBlock(nn.Module):
    """Basic Block for resnet 18 and resnet 34

    """

    #BasicBlock and BottleNeck block
    #have different output size
    #we use class attribute expansion
    #to distinct
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()

        #residual function
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels * BasicBlock.expansion)
        )

        #shortcut
        self.shortcut = nn.Sequential()

        #the shortcut output dimension is not the same with residual function
        #use 1*1 convolution to match the dimension
        if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * BasicBlock.expansion)
            )

    def forward(self, x):
        return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))

class BottleNeck(nn.Module):
    """Residual block for resnet over 50 layers

    """
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, stride=stride, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels * BottleNeck.expansion),
        )

        self.shortcut = nn.Sequential()

        if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BottleNeck.expansion, stride=stride, kernel_size=1, bias=False),
                nn.BatchNorm2d(out_channels * BottleNeck.expansion)
            )

    def forward(self, x):
        return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))

class ResNet(nn.Module):

    def __init__(self, block, num_block, num_classes=100):
        super().__init__()

        self.in_channels = 64

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True))
        #we use a different inputsize than the original paper
        #so conv2_x's stride is 1
        self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
        self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
        self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
        self.conv5_x = self._make_layer(block, 512, num_block[3], 2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride):
        """make resnet layers(by layer i didnt mean this 'layer' was the
        same as a neuron netowork layer, ex. conv layer), one layer may
        contain more than one residual block

        Args:
            block: block type, basic block or bottle neck block
            out_channels: output depth channel number of this layer
            num_blocks: how many blocks per layer
            stride: the stride of the first block of this layer

        Return:
            return a resnet layer
        """

        # we have num_block blocks per layer, the first block
        # could be 1 or 2, other blocks would always be 1
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion

        return nn.Sequential(*layers)

    def forward(self, x):
        output = self.conv1(x)
        output = self.conv2_x(output)
        output = self.conv3_x(output)
        output = self.conv4_x(output)
        output = self.conv5_x(output)
        output = self.avg_pool(output)
        output = output.view(output.size(0), -1)
        output = self.fc(output)

        return output

def resnet18():
    """ return a ResNet 18 object
    """
    return ResNet(BasicBlock, [2, 2, 2, 2])

def resnet34(dims):
    """ return a ResNet 34 object
    """
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=dims)

def resnet50(dims):
    """ return a ResNet 50 object
    """
    return ResNet(BottleNeck, [3, 4, 6, 3], num_classes=dims)

def resnet101(dims):
    """ return a ResNet 101 object
    """
    return ResNet(BottleNeck, [3, 4, 23, 3], num_classes=dims)

def resnet152():
    """ return a ResNet 152 object
    """
    return ResNet(BottleNeck, [3, 8, 36, 3])

In [None]:
# Instantiate the model
model = resnet34(label_num)
model = model.to(device)

In [None]:
import sys
sys.setrecursionlimit(10000) #for nr_prototypes>=1000
import numpy as np
from scipy.spatial.distance import *

In [None]:
def create_prototypes(nr_prototypes):
    assert nr_prototypes > 0
    prototypes = V(nr_prototypes - 1).T
    assert prototypes.shape == (nr_prototypes, nr_prototypes - 1)
    assert np.all(np.abs(np.sum(np.power(prototypes, 2), axis=1) - 1) <= 1e-6)
    distances = cdist(prototypes, prototypes)
    assert distances[~np.eye(*distances.shape, dtype=bool)].std() <= 1e-3
    return prototypes.astype(np.float32)

def V(order):
    if order == 1:
        return np.array([[1, -1]])
    else:
        col1 = np.zeros((order, 1))
        col1[0] = 1
        row1 = -1 / order * np.ones((1, order))
        return np.concatenate((col1, np.concatenate((row1, np.sqrt(1 - 1 / (order**2)) * V(order - 1)), axis=0)), axis=1)

In [None]:
# Load the data into PyTorch DataLoader
# train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=False)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Create prototypes for maximum separation
prototypes = create_prototypes(10)
prototypes = torch.from_numpy(prototypes).float()
prototypes *= 0.1
dims = prototypes.shape[1]
prototypes = prototypes.t()
prototypes = prototypes.to(device)
test_prototypes = prototypes

In [None]:
def training(train_loader, label_num, ms):
    # train the model
    num_epochs = 50
    train_loss_history = []
    train_acc_history = []
    val_loss_history = []
    val_acc_history = []
    metric_p = MulticlassPrecision(average="macro", num_classes=10)
    metric_r = MulticlassRecall(average="macro", num_classes=10)
    metric_f1 = MulticlassF1Score(average="macro", num_classes=10)


    # Loop through the number of epochs
    for epoch in range(num_epochs):
        train_loss = 0.0
        train_acc = 0.0
        val_loss = 0.0
        val_acc = 0.0
        metric_p.reset()
        metric_r.reset()
        metric_f1.reset()


        # set model to train mode
        model.train()
        n_steps_per_epoch = len(train_loader.dataset) / train_loader.batch_size
        # iterate over the training data
        for batch_index,(inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            outputs = outputs.to(device)
            # Maximum separation
            if ms == True:
                outputs = torch.mm(outputs, prototypes)
            #compute the loss
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            # increment the running loss and accuracy
            train_loss += loss.item()
            train_acc += (outputs.argmax(1) == labels).sum().item()

        # calculate the average training loss and accuracy
        train_loss /= len(train_loader)
        train_loss_history.append(train_loss)
        train_acc /= len(train_loader.dataset)
        train_acc_history.append(train_acc)

        wandb.log({"train/batchwise_loss": loss.item(),
        "train/lr": optimizer.param_groups[0]["lr"],
        "train/weight_decay": optimizer.param_groups[0]["weight_decay"],
        "train/epoch": ((batch_index)/ n_steps_per_epoch + epoch),
        })


        # set the model to evaluation mode
        model.eval()
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                outputs = outputs.to(device)
                # Maximum separation
                if ms == True:
                    outputs = torch.mm(outputs, prototypes)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                val_acc += (outputs.argmax(1) == labels).sum().item()

                # update precision metric
                metric_p.to(device)
                metric_p.update(outputs.argmax(1), labels)

                # update recall metric
                metric_r.to(device)
                metric_r.update(outputs.argmax(1), labels)

                # update F1-score metric
                metric_f1.to(device)
                metric_f1.update(outputs.argmax(1), labels)


        # calculate the average validation loss and accuracy
        val_loss /= len(test_loader)
        val_loss_history.append(val_loss)
        val_acc /= len(test_loader.dataset)
        val_acc_history.append(val_acc)

        # calculate the precision, recall and F1-score
        precision = metric_p.compute()
        recall = metric_r.compute()
        f1 = metric_f1.compute()


        print(f'Epoch {epoch+1}/{num_epochs}, train loss: {train_loss:.4f}, train acc: {train_acc:.4f}, val loss: {val_loss:.4f}, val acc: {val_acc:.4f}')

    return train_acc, val_acc, precision, recall, f1

In [None]:
# Diverse mini-Batch Active Learning input
import random

unlabeled_dataset = train_set
batch_size = 1000
n_iter = 10
#use maximum separation
#use logits



# Create random k number of images
randomlist = random.sample(range(0, 73257), batch_size)
run1_randomlist = randomlist

In [None]:
# Create DataLoader from trainset
trainset_loader = torch.utils.data.DataLoader(train_set, shuffle = False, batch_size=64, num_workers=2)
print(len(trainset_loader.dataset))

# Create a sampler that selects the first k number of random images
train_sampler = torch.utils.data.Subset(trainset_loader.dataset, randomlist)
train_loader = torch.utils.data.DataLoader(train_sampler, shuffle = False, batch_size=64, num_workers=2)
print(len(train_loader.dataset))

# Select the still unlabeled data and create a DataLoader
all_data_list = list(range(0, 73257))
unlabeled_data_list = [i for i in all_data_list if i not in randomlist]
print(len(unlabeled_data_list))
unlabeled_sampler = torch.utils.data.Subset(trainset_loader.dataset, unlabeled_data_list)
unlabeled_loader = torch.utils.data.DataLoader(unlabeled_sampler, shuffle = False, batch_size=64, num_workers=2)
print(len(unlabeled_loader))

In [None]:
## Repeat until budget is exhausted

acc_train_list = []
acc_val_list = []
precision_list = []
recall_list = []
f1_list = []


for n in range(n_iter):

    #Train classifier on all the examples selected so far

    train_acc, val_acc, precision, recall, f1 = training(train_loader, label_num, ms)
    
    wandb.log({"custom_step": n+1,
               "test/accuracy": val_acc,
                   "test_precision":precision,
                   "test_recall":recall,
                   "test_f1": f1})

    #Add accuracies of training round
    acc_train_list.append(train_acc)
    acc_val_list.append(val_acc)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)

    # Create dictionary that contains informative factor for each unlabeled datapoint
    uncert_outputs = {}

    # Compute informativeness factor for each unlabeled datapoint per batch
    for idx,(inputs,_) in enumerate(unlabeled_loader):
        inputs = inputs.to(device)
        model.eval()
        outputs = model(inputs)

        # Add maximum separation
        #if ms == True:
          #outputs = torch.mm(outputs, prototypes)

        # use softmax to get probability distribuition for each datapoint in batch
        probs = torch.nn.functional.softmax(outputs, dim=1)
        # sort probabilities
        prob,_ = torch.sort(probs, descending=True)
        # compute difference between highest and second highest probability (= informative factor)
        diff = prob.data[:,0] - prob.data[:,1]
        idy = 0
        for x in diff:
            index = (idx*64) + idy
            # Save index of each datapoint with given informative factor
            uncert_outputs[index] = x
            idy += 1

    # Select the k most informative unlabeled datapoints
    k_dataset = sorted(uncert_outputs, key=uncert_outputs.get, reverse=True)[:batch_size]

    # Select all other unlabeled datapoints
    remain_set = sorted(uncert_outputs, key=uncert_outputs.get, reverse=True)[batch_size:]

    # Obtain label of new k datapoints
    k_dataset = torch.utils.data.Subset(unlabeled_loader.dataset, k_dataset)
    k_dataset =  torch.utils.data.ConcatDataset([train_loader.dataset, k_dataset])
    train_loader = torch.utils.data.DataLoader(k_dataset, batch_size=64, shuffle=False, num_workers=2)

    # Remove new k datapoints from unlabeled dataset
    unlabeled_loader = torch.utils.data.Subset(unlabeled_loader.dataset, remain_set)
    unlabeled_loader = torch.utils.data.DataLoader(unlabeled_loader, batch_size=64, shuffle=False, num_workers=2)
