# Preface

The locations requiring configuration for your experiment are commented in capital text.

# Setup

**Experiment-Specific Imports**

In [3]:
from distil.utils.models.resnet import ResNet18                                 # IMPORT YOUR MODEL HERE

## Main Imports

In [4]:
import pandas as pd 
import numpy as np
import copy
from torch.utils.data import Dataset, DataLoader, Subset, ConcatDataset
import torch.nn.functional as F
from torch import nn
from torchvision import transforms
from torchvision import datasets
from PIL import Image
import torch
import torch.optim as optim
from torch.autograd import Variable
import sys
sys.path.append('../')
import matplotlib.pyplot as plt
import time
import math
import random
import os
import pickle
from tqdm import tqdm
import torch.utils.data as data
import torchvision.transforms as transforms

from numpy.linalg import cond
from numpy.linalg import inv
from numpy.linalg import norm
from scipy import sparse as sp
from scipy.linalg import lstsq
from scipy.linalg import solve
from scipy.optimize import nnls

from distil.active_learning_strategies.badge import BADGE
from distil.active_learning_strategies.glister import GLISTER
from distil.active_learning_strategies.margin_sampling import MarginSampling
from distil.active_learning_strategies.entropy_sampling import EntropySampling
from distil.active_learning_strategies.random_sampling import RandomSampling
from distil.active_learning_strategies.gradmatch_active import GradMatchActive
from distil.active_learning_strategies.fass import FASS
from distil.active_learning_strategies.adversarial_bim import AdversarialBIM
from distil.active_learning_strategies.adversarial_deepfool import AdversarialDeepFool
from distil.active_learning_strategies.core_set import CoreSet
from distil.active_learning_strategies.least_confidence_sampling import LeastConfidenceSampling
from distil.active_learning_strategies.margin_sampling import MarginSampling
from distil.active_learning_strategies.bayesian_active_learning_disagreement_dropout import BALDDropout
from distil.utils.train_helper import data_train
from distil.utils.utils import LabeledToUnlabeledDataset

import warnings
warnings.filterwarnings("ignore")

## Checkpointing and Logs

In [5]:
class Checkpoint:

    def __init__(self, acc_list=None, indices=None, state_dict=None, experiment_name=None, path=None):

        # If a path is supplied, load a checkpoint from there.
        if path is not None:

            if experiment_name is not None:
                self.load_checkpoint(path, experiment_name)
            else:
                raise ValueError("Checkpoint contains None value for experiment_name")

            return

        if acc_list is None:
            raise ValueError("Checkpoint contains None value for acc_list")

        if indices is None:
            raise ValueError("Checkpoint contains None value for indices")

        if state_dict is None:
            raise ValueError("Checkpoint contains None value for state_dict")

        if experiment_name is None:
            raise ValueError("Checkpoint contains None value for experiment_name")

        self.acc_list = acc_list
        self.indices = indices
        self.state_dict = state_dict
        self.experiment_name = experiment_name

    def __eq__(self, other):

        # Check if the accuracy lists are equal
        acc_lists_equal = self.acc_list == other.acc_list

        # Check if the indices are equal
        indices_equal = self.indices == other.indices

        # Check if the experiment names are equal
        experiment_names_equal = self.experiment_name == other.experiment_name

        return acc_lists_equal and indices_equal and experiment_names_equal

    def save_checkpoint(self, path):

        # Get current time to use in file timestamp
        timestamp = time.time_ns()

        # Create the path supplied
        os.makedirs(path, exist_ok=True)

        # Name saved files using timestamp to add recency information
        save_path = os.path.join(path, F"c{timestamp}1")
        copy_save_path = os.path.join(path, F"c{timestamp}2")

        # Write this checkpoint to the first save location
        with open(save_path, 'wb') as save_file:
            pickle.dump(self, save_file)

        # Write this checkpoint to the second save location
        with open(copy_save_path, 'wb') as copy_save_file:
            pickle.dump(self, copy_save_file)

    def load_checkpoint(self, path, experiment_name):

        # Obtain a list of all files present at the path
        timestamp_save_no = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]

        # If there are no such files, set values to None and return
        if len(timestamp_save_no) == 0:
            self.acc_list = None
            self.indices = None
            self.state_dict = None
            return

        # Sort the list of strings to get the most recent
        timestamp_save_no.sort(reverse=True)

        # Read in two files at a time, checking if they are equal to one another. 
        # If they are equal, then it means that the save operation finished correctly.
        # If they are not, then it means that the save operation failed (could not be 
        # done atomically). Repeat this action until no possible pair can exist.
        while len(timestamp_save_no) > 1:

            # Pop a most recent checkpoint copy
            first_file = timestamp_save_no.pop(0)

            # Keep popping until two copies with equal timestamps are present
            while True:
                
                second_file = timestamp_save_no.pop(0)
                
                # Timestamps match if the removal of the "1" or "2" results in equal numbers
                if (second_file[:-1]) == (first_file[:-1]):
                    break
                else:
                    first_file = second_file

                    # If there are no more checkpoints to examine, set to None and return
                    if len(timestamp_save_no) == 0:
                        self.acc_list = None
                        self.indices = None
                        self.state_dict = None
                        return

            # Form the paths to the files
            load_path = os.path.join(path, first_file)
            copy_load_path = os.path.join(path, second_file)

            # Load the two checkpoints
            with open(load_path, 'rb') as load_file:
                checkpoint = pickle.load(load_file)

            with open(copy_load_path, 'rb') as copy_load_file:
                checkpoint_copy = pickle.load(copy_load_file)

            # Do not check this experiment if it is not the one we need to restore
            if checkpoint.experiment_name != experiment_name:
                continue

            # Check if they are equal
            if checkpoint == checkpoint_copy:

                # This checkpoint will suffice. Populate this checkpoint's fields 
                # with the selected checkpoint's fields.
                self.acc_list = checkpoint.acc_list
                self.indices = checkpoint.indices
                self.state_dict = checkpoint.state_dict
                return

        # Instantiate None values in acc_list, indices, and model
        self.acc_list = None
        self.indices = None
        self.state_dict = None

    def get_saved_values(self):

        return (self.acc_list, self.indices, self.state_dict)

def delete_checkpoints(checkpoint_directory, experiment_name):

    # Iteratively go through each checkpoint, deleting those whose experiment name matches.
    timestamp_save_no = [f for f in os.listdir(checkpoint_directory) if os.path.isfile(os.path.join(checkpoint_directory, f))]

    for file in timestamp_save_no:

        delete_file = False

        # Get file location
        file_path = os.path.join(checkpoint_directory, file)

        if not os.path.exists(file_path):
            continue

        # Unpickle the checkpoint and see if its experiment name matches
        with open(file_path, "rb") as load_file:

            checkpoint_copy = pickle.load(load_file)
            if checkpoint_copy.experiment_name == experiment_name:
                delete_file = True

        # Delete this file only if the experiment name matched
        if delete_file:
            os.remove(file_path)

#Logs
def write_logs(logs, save_directory, rd):
  file_path = save_directory + 'run_'+'.txt'
  with open(file_path, 'a') as f:
    f.write('---------------------\n')
    f.write('Round '+str(rd)+'\n')
    f.write('---------------------\n')
    for key, val in logs.items():
      if key == 'Training':
        f.write(str(key)+ '\n')
        for epoch in val:
          f.write(str(epoch)+'\n')       
      else:
        f.write(str(key) + ' - '+ str(val) +'\n')

## AL Loop

In [6]:
def train_one(full_train_dataset, initial_train_indices, test_dataset, net, n_rounds, budget, args, nclasses, strategy, save_directory, checkpoint_directory, experiment_name):

    # Split the full training dataset into an initial training dataset and an unlabeled dataset
    train_dataset = Subset(full_train_dataset, initial_train_indices)
    initial_unlabeled_indices = list(set(range(len(full_train_dataset))) - set(initial_train_indices))
    unlabeled_dataset = Subset(full_train_dataset, initial_unlabeled_indices)

    # Set up the AL strategy
    if strategy == "random":
        strategy_args = {'batch_size' : args['batch_size'], 'device':args['device']}
        strategy = RandomSampling(train_dataset, LabeledToUnlabeledDataset(unlabeled_dataset), net, nclasses, strategy_args)
    elif strategy == "entropy":
        strategy_args = {'batch_size' : args['batch_size'], 'device':args['device']}
        strategy = EntropySampling(train_dataset, LabeledToUnlabeledDataset(unlabeled_dataset), net, nclasses, strategy_args)
    elif strategy == "margin":
        strategy_args = {'batch_size' : args['batch_size'], 'device':args['device']}
        strategy = MarginSampling(train_dataset, LabeledToUnlabeledDataset(unlabeled_dataset), net, nclasses, strategy_args)
    elif strategy == "least_confidence":
        strategy_args = {'batch_size' : args['batch_size'], 'device':args['device']}
        strategy = LeastConfidenceSampling(train_dataset, LabeledToUnlabeledDataset(unlabeled_dataset), net, nclasses, strategy_args)
    elif strategy == "badge":
        strategy_args = {'batch_size' : args['batch_size'], 'device':args['device']}
        strategy = BADGE(train_dataset, LabeledToUnlabeledDataset(unlabeled_dataset), net, nclasses, strategy_args)
    elif strategy == "coreset":
        strategy_args = {'batch_size' : args['batch_size'], 'device':args['device']}
        strategy = CoreSet(train_dataset, LabeledToUnlabeledDataset(unlabeled_dataset), net, nclasses, strategy_args)
    elif strategy == "fass":
        strategy_args = {'batch_size' : args['batch_size'], 'device':args['device']}
        strategy = FASS(train_dataset, LabeledToUnlabeledDataset(unlabeled_dataset), net, nclasses, strategy_args)
    elif strategy == "glister":
        strategy_args = {'batch_size' : args['batch_size'], 'lr': args['lr'], 'device':args['device']}
        strategy = GLISTER(train_dataset, LabeledToUnlabeledDataset(unlabeled_dataset), net, nclasses, strategy_args, typeOf='rand', lam=0.1)
    elif strategy == "adversarial_bim":
        strategy_args = {'batch_size' : args['batch_size'], 'device':args['device']}
        strategy = AdversarialBIM(train_dataset, LabeledToUnlabeledDataset(unlabeled_dataset), net, nclasses, strategy_args)
    elif strategy == "adversarial_deepfool":
        strategy_args = {'batch_size' : args['batch_size'], 'device':args['device']}
        strategy = AdversarialDeepFool(train_dataset, LabeledToUnlabeledDataset(unlabeled_dataset), net, nclasses, strategy_args)
    elif strategy == "bald":
        strategy_args = {'batch_size' : args['batch_size'], 'device':args['device']}
        strategy = BALDDropout(train_dataset, LabeledToUnlabeledDataset(unlabeled_dataset), net, nclasses, strategy_args)

    # Define acc initially
    acc = np.zeros(n_rounds+1)

    initial_unlabeled_size = len(unlabeled_dataset)

    initial_round = 1

    # Define an index map
    index_map = np.array([x for x in range(initial_unlabeled_size)])

    # Attempt to load a checkpoint. If one exists, then the experiment crashed.
    training_checkpoint = Checkpoint(experiment_name=experiment_name, path=checkpoint_directory)
    rec_acc, rec_indices, rec_state_dict = training_checkpoint.get_saved_values()

    # Check if there are values to recover
    if rec_acc is not None:

        # Restore the accuracy list
        for i in range(len(rec_acc)):
            acc[i] = rec_acc[i]

        # Restore the indices list and shift those unlabeled points to the labeled set.
        index_map = np.delete(index_map, rec_indices)

        # Record initial size of the training dataset
        intial_seed_size = len(train_dataset)

        restored_unlabeled_points = Subset(unlabeled_dataset, rec_indices)
        train_dataset = ConcatDataset([train_dataset, restored_unlabeled_points])

        remaining_unlabeled_indices = list(set(range(len(unlabeled_dataset))) - set(rec_indices))
        unlabeled_dataset = Subset(unlabeled_dataset, remaining_unlabeled_indices)

        # Restore the model
        net.load_state_dict(rec_state_dict) 

        # Fix the initial round
        initial_round = (len(train_dataset) - initial_seed_size) // budget + 1

        # Ensure loaded model is moved to GPU
        if torch.cuda.is_available():
            net = net.cuda()     

        strategy.update_model(net)
        strategy.update_data(train_dataset, LabeledToUnlabeledDataset(unlabeled_dataset)) 

        dt = data_train(train_dataset, net, args)

    else:

        if torch.cuda.is_available():
            net = net.cuda()

        dt = data_train(train_dataset, net, args)

        acc[0] = dt.get_acc_on_set(test_dataset)
        print('Initial Testing accuracy:', round(acc[0]*100, 2), flush=True)

        logs = {}
        logs['Training Points'] = len(train_dataset)
        logs['Test Accuracy'] =  str(round(acc[0]*100, 2))
        write_logs(logs, save_directory, 0)
          
        #Updating the trained model in strategy class
        strategy.update_model(net)

    # Record the training transform and test transform for disabling purposes
    train_transform = full_train_dataset.transform
    test_transform = test_dataset.transform

    ##User Controlled Loop
    for rd in range(initial_round, n_rounds+1):
        print('-------------------------------------------------')
        print('Round', rd) 
        print('-------------------------------------------------')

        sel_time = time.time()
        full_train_dataset.transform = test_transform # Disable any augmentation while selecting points
        idx = strategy.select(budget)            
        full_train_dataset.transform = train_transform # Re-enable any augmentation done during training
        sel_time = time.time() - sel_time
        print("Selection Time:", sel_time)

        selected_unlabeled_points = Subset(unlabeled_dataset, idx)
        train_dataset = ConcatDataset([train_dataset, selected_unlabeled_points])

        remaining_unlabeled_indices = list(set(range(len(unlabeled_dataset))) - set(idx))
        unlabeled_dataset = Subset(unlabeled_dataset, remaining_unlabeled_indices)

        # Update the index map
        index_map = np.delete(index_map, idx, axis = 0)

        print('Number of training points -', len(train_dataset))

        # Start training
        strategy.update_data(train_dataset, LabeledToUnlabeledDataset(unlabeled_dataset))
        dt.update_data(train_dataset)
        t1 = time.time()
        clf, train_logs = dt.train(None)
        t2 = time.time()
        acc[rd] = dt.get_acc_on_set(test_dataset)
        logs = {}
        logs['Training Points'] = len(train_dataset)
        logs['Test Accuracy'] =  str(round(acc[rd]*100, 2))
        logs['Selection Time'] = str(sel_time)
        logs['Trainining Time'] = str(t2 - t1) 
        logs['Training'] = train_logs
        write_logs(logs, save_directory, rd)
        strategy.update_model(clf)
        print('Testing accuracy:', round(acc[rd]*100, 2), flush=True)

        # Create a checkpoint
        used_indices = np.array([x for x in range(initial_unlabeled_size)])
        used_indices = np.delete(used_indices, index_map).tolist()

        round_checkpoint = Checkpoint(acc.tolist(), used_indices, clf.state_dict(), experiment_name=experiment_name)
        round_checkpoint.save_checkpoint(checkpoint_directory)

    print('Training Completed')
    return acc

# MNIST

## Parameter Definitions

Parameters related to the specific experiment are placed here. You should examine each and modify them as needed.

In [7]:
data_set_name = "MNIST" # DSET NAME HERE
dataset_root_path = '../downloaded_data/'
net = ResNet18() # MODEL HERE

# MODIFY AS NECESSARY
logs_directory = 'distil_checkpoints/logs/'
checkpoint_directory = 'distil_checkpoints/check/'
# model_directory = "/home/atharv/distil_checkpoints/model/"

experiment_name = "MNIST DISTIL IMPLEMENTATION 4"

# Only train a new model if one does not exist.

model_directory = 'distil_checkpoints/model/MNIST4'

initial_seed_size = 50 # INIT SEED SIZE HERE
training_size_cap = 1250 # TRAIN SIZE CAP HERE

budget = 50 # BUDGET HERE

# CHANGE ARGS AS NECESSARY
args = {'n_epoch':300, 'lr':float(0.01), 'batch_size':32, 'max_accuracy':float(0.99), 'islogs':True, 'isreset':False, 'isverbose':True, 'device':'cuda', 'optimizer':'adam'} 

# Train on approximately the full dataset given the budget contraints
n_rounds = (training_size_cap - initial_seed_size) // budget

## Initial Loading and Training

You may choose to train a new initial model or to continue to load a specific model. If this notebook is being executed in Colab, you should consider whether or not you need the gdown line.

In [8]:
start_time = time.monotonic()

In [9]:
# # Mount drive containing possible saved model and define file path.
# colab_model_storage_mount = "/content/gdrive"
# drive.mount(colab_model_storage_mount)

# Retrieve the model from a download link and save it to the drive
# os.makedirs(logs_directory, exist_ok = True)
# os.makedirs(checkpoint_directory, exist_ok = True)
# os.makedirs(model_directory, exist_ok = True)
# model_directory = F"{model_directory}/{data_set_name}"
#!/content/gdown.pl/gdown.pl "INSERT SHARABLE LINK HERE" "INSERT DOWNLOAD LOCATION HERE (ideally, same as model_directory)" # MAY NOT NEED THIS LINE IF NOT CLONING MODEL FROM COLAB

# Load the dataset
if data_set_name == "MNIST":

    image_dim=28
    train_transform = transforms.Compose([transforms.RandomCrop(image_dim, padding=4), transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)), transforms.Lambda(lambda x: x.repeat(3, 1, 1))])
    test_transform = transforms.Compose([transforms.Resize((image_dim, image_dim)), transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)), transforms.Lambda(lambda x: x.repeat(3, 1, 1))])

    full_train_dataset = datasets.MNIST(dataset_root_path, download=True, train=True, transform=train_transform, target_transform=torch.tensor)
    test_dataset = datasets.MNIST(dataset_root_path, download=True, train=False, transform=test_transform, target_transform=torch.tensor)

    nclasses = 10 # NUM CLASSES HERE


In [10]:
args['nclasses'] = nclasses

dim = full_train_dataset[0][0].shape

# Seed the random number generator for reproducibility and create the initial seed set
np.random.seed(42)
initial_train_indices = np.random.choice(len(full_train_dataset), replace=False, size=initial_seed_size)

# COMMENT OUT ONE OR THE OTHER IF YOU WANT TO TRAIN A NEW INITIAL MODEL
load_model = False
# load_model = True

if load_model:
    net.load_state_dict(torch.load(model_directory))
    initial_model = net
else:
    dt = data_train(Subset(full_train_dataset, initial_train_indices), net, args)
    initial_model, _ = dt.train(None)
    torch.save(initial_model.state_dict(), model_directory)

print("Training for", n_rounds, "rounds with budget", budget, "on unlabeled set size", training_size_cap)

Training..
50 training accuracy: 0.74
Epoch: 88 Training accuracy: 1.0
Training for 24 rounds with budget 50 on unlabeled set size 1250


In [11]:
nclasses

10

## Random Sampling

In [12]:
strategy = "random"
strat_logs = logs_directory+F'{data_set_name}/{strategy}/'
os.makedirs(strat_logs, exist_ok = True)
train_one(full_train_dataset, initial_train_indices, test_dataset, copy.deepcopy(initial_model), n_rounds, budget, args, nclasses, strategy, strat_logs, checkpoint_directory, F"{experiment_name}_{strategy}")

Initial Testing accuracy: 48.01
-------------------------------------------------
Round 1
-------------------------------------------------
Selection Time: 0.0007522106170654297
Number of training points - 100
Training..
50 training accuracy: 0.9
100 training accuracy: 0.93
Epoch: 101 Training accuracy: 1.0
Testing accuracy: 84.42
-------------------------------------------------
Round 2
-------------------------------------------------
Selection Time: 0.0007653236389160156
Number of training points - 150
Training..
Epoch: 11 Training accuracy: 1.0
Testing accuracy: 90.42
-------------------------------------------------
Round 3
-------------------------------------------------
Selection Time: 0.00101470947265625
Number of training points - 200
Training..
Epoch: 28 Training accuracy: 0.995
Testing accuracy: 91.2
-------------------------------------------------
Round 4
-------------------------------------------------
Selection Time: 0.0009143352508544922
Number of training points - 25

array([0.4801, 0.8442, 0.9042, 0.912 , 0.9263, 0.9075, 0.9296, 0.9212,
       0.9459, 0.9455, 0.9566, 0.9571, 0.9574, 0.947 , 0.973 , 0.965 ,
       0.9669, 0.9721, 0.9635, 0.9673, 0.9686, 0.9694, 0.9699, 0.9772,
       0.9653])

## Entropy

In [13]:
strategy = "entropy"
strat_logs = logs_directory+F'{data_set_name}/{strategy}/'
os.makedirs(strat_logs, exist_ok = True)
train_one(full_train_dataset, initial_train_indices, test_dataset, copy.deepcopy(initial_model), n_rounds, budget, args, nclasses, strategy, strat_logs, checkpoint_directory, F"{experiment_name}_{strategy}")

Initial Testing accuracy: 48.01
-------------------------------------------------
Round 1
-------------------------------------------------
Selection Time: 22.68111228942871
Number of training points - 100
Training..
50 training accuracy: 0.91
Epoch: 58 Training accuracy: 1.0
Testing accuracy: 76.53
-------------------------------------------------
Round 2
-------------------------------------------------
Selection Time: 22.767117261886597
Number of training points - 150
Training..
Epoch: 27 Training accuracy: 0.993
Testing accuracy: 85.14
-------------------------------------------------
Round 3
-------------------------------------------------
Selection Time: 21.32067370414734
Number of training points - 200
Training..
Epoch: 26 Training accuracy: 0.99
Testing accuracy: 90.64
-------------------------------------------------
Round 4
-------------------------------------------------
Selection Time: 22.91009521484375
Number of training points - 250
Training..
Epoch: 27 Training accurac

array([0.4801, 0.7653, 0.8514, 0.9064, 0.9012, 0.9514, 0.946 , 0.9637,
       0.9748, 0.9737, 0.981 , 0.981 , 0.9806, 0.9769, 0.987 , 0.9853,
       0.9847, 0.9883, 0.9902, 0.9896, 0.9924, 0.9906, 0.9908, 0.9891,
       0.9922])

## GLISTER

In [14]:
strategy = "glister"
strat_logs = logs_directory+F'{data_set_name}/{strategy}/'
os.makedirs(strat_logs, exist_ok = True)
train_one(full_train_dataset, initial_train_indices, test_dataset, copy.deepcopy(initial_model), n_rounds, budget, args, nclasses, strategy, strat_logs, checkpoint_directory, F"{experiment_name}_{strategy}")

Initial Testing accuracy: 48.01
-------------------------------------------------
Round 1
-------------------------------------------------
Selection Time: 77.3955557346344
Number of training points - 100
Training..
50 training accuracy: 0.78
Epoch: 98 Training accuracy: 1.0
Testing accuracy: 71.1
-------------------------------------------------
Round 2
-------------------------------------------------
Selection Time: 72.95324802398682
Number of training points - 150
Training..
Epoch: 28 Training accuracy: 0.993
Testing accuracy: 86.09
-------------------------------------------------
Round 3
-------------------------------------------------
Selection Time: 72.91101479530334
Number of training points - 200
Training..
Epoch: 27 Training accuracy: 0.995
Testing accuracy: 89.2
-------------------------------------------------
Round 4
-------------------------------------------------
Selection Time: 73.02025389671326
Number of training points - 250
Training..
Epoch: 9 Training accuracy: 0

array([0.4801, 0.711 , 0.8609, 0.892 , 0.9141, 0.9335, 0.9075, 0.9387,
       0.9607, 0.9672, 0.9705, 0.9707, 0.9748, 0.9841, 0.9853, 0.9865,
       0.9852, 0.9886, 0.9839, 0.9883, 0.9831, 0.9902, 0.9886, 0.9903,
       0.9906])

## FASS

In [15]:
strategy = "fass"
strat_logs = logs_directory+F'{data_set_name}/{strategy}/'
os.makedirs(strat_logs, exist_ok = True)
train_one(full_train_dataset, initial_train_indices, test_dataset, copy.deepcopy(initial_model), n_rounds, budget, args, 10, strategy, strat_logs, checkpoint_directory, F"{experiment_name}_{strategy}")

Initial Testing accuracy: 48.01
-------------------------------------------------
Round 1
-------------------------------------------------
Selection Time: 22.80303406715393
Number of training points - 100
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

50 training accuracy: 0.92
Epoch: 94 Training accuracy: 0.99
Testing accuracy: 72.41
-------------------------------------------------
Round 2
-------------------------------------------------
Selection Time: 22.40721035003662
Number of training points - 150
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 14 Training accuracy: 1.0
Testing accuracy: 88.47
-------------------------------------------------
Round 3
-------------------------------------------------
Selection Time: 22.941866397857666
Number of training points - 200
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 48 Training accuracy: 0.99
Testing accuracy: 92.07
-------------------------------------------------
Round 4
-------------------------------------------------
Selection Time: 22.908734798431396
Number of training points - 250
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 15 Training accuracy: 0.996
Testing accuracy: 94.7
-------------------------------------------------
Round 5
-------------------------------------------------
Selection Time: 22.232004642486572
Number of training points - 300
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 11 Training accuracy: 0.99
Testing accuracy: 95.74
-------------------------------------------------
Round 6
-------------------------------------------------
Selection Time: 22.877830028533936
Number of training points - 350
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 23 Training accuracy: 0.991
Testing accuracy: 95.88
-------------------------------------------------
Round 7
-------------------------------------------------


[||||||||||||||||||||]100% [Iteration 50 of 50]

Selection Time: 22.92329716682434
Number of training points - 400
Training..
Epoch: 34 Training accuracy: 0.99
Testing accuracy: 97.87
-------------------------------------------------
Round 8
-------------------------------------------------
Selection Time: 22.426288843154907
Number of training points - 450
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 16 Training accuracy: 0.991
Testing accuracy: 98.06
-------------------------------------------------
Round 9
-------------------------------------------------
Selection Time: 22.77938151359558
Number of training points - 500
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 19 Training accuracy: 0.992
Testing accuracy: 98.24
-------------------------------------------------
Round 10
-------------------------------------------------
Selection Time: 22.502869129180908
Number of training points - 550
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 20 Training accuracy: 0.996
Testing accuracy: 98.54
-------------------------------------------------
Round 11
-------------------------------------------------
Selection Time: 22.085646152496338
Number of training points - 600
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 11 Training accuracy: 0.993
Testing accuracy: 98.08
-------------------------------------------------
Round 12
-------------------------------------------------
Selection Time: 22.88173484802246
Number of training points - 650
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 11 Training accuracy: 0.995
Testing accuracy: 98.23
-------------------------------------------------
Round 13
-------------------------------------------------
Selection Time: 22.232613563537598
Number of training points - 700
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 14 Training accuracy: 0.994
Testing accuracy: 98.69
-------------------------------------------------
Round 14
-------------------------------------------------
Selection Time: 22.85296082496643
Number of training points - 750
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 21 Training accuracy: 0.997
Testing accuracy: 99.05
-------------------------------------------------
Round 15
-------------------------------------------------
Selection Time: 22.13641905784607
Number of training points - 800
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 16 Training accuracy: 0.991
Testing accuracy: 98.95
-------------------------------------------------
Round 16
-------------------------------------------------
Selection Time: 20.530940532684326
Number of training points - 850
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 31 Training accuracy: 0.996
Testing accuracy: 98.5
-------------------------------------------------
Round 17
-------------------------------------------------
Selection Time: 22.868223667144775
Number of training points - 900
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 6 Training accuracy: 0.991
Testing accuracy: 99.2
-------------------------------------------------
Round 18
-------------------------------------------------
Selection Time: 22.79943871498108
Number of training points - 950
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 10 Training accuracy: 0.993
Testing accuracy: 98.95
-------------------------------------------------
Round 19
-------------------------------------------------


[||||||||||||||||||||]100% [Iteration 50 of 50]

Selection Time: 18.921785354614258
Number of training points - 1000
Training..
Epoch: 15 Training accuracy: 0.99
Testing accuracy: 99.09
-------------------------------------------------
Round 20
-------------------------------------------------
Selection Time: 22.811479330062866
Number of training points - 1050
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 8 Training accuracy: 0.993
Testing accuracy: 99.03
-------------------------------------------------
Round 21
-------------------------------------------------
Selection Time: 20.66963505744934
Number of training points - 1100
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 8 Training accuracy: 0.994
Testing accuracy: 99.1
-------------------------------------------------
Round 22
-------------------------------------------------


[||||||||||||||||||||]100% [Iteration 50 of 50]

Selection Time: 20.73637366294861
Number of training points - 1150
Training..
Epoch: 8 Training accuracy: 0.993
Testing accuracy: 99.25
-------------------------------------------------
Round 23
-------------------------------------------------
Selection Time: 20.999318838119507
Number of training points - 1200
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 13 Training accuracy: 0.99
Testing accuracy: 99.33
-------------------------------------------------
Round 24
-------------------------------------------------
Selection Time: 18.458323001861572
Number of training points - 1250
Training..


[||||||||||||||||||||]100% [Iteration 50 of 50]

Epoch: 15 Training accuracy: 0.993
Testing accuracy: 99.21
Training Completed


array([0.4801, 0.7241, 0.8847, 0.9207, 0.947 , 0.9574, 0.9588, 0.9787,
       0.9806, 0.9824, 0.9854, 0.9808, 0.9823, 0.9869, 0.9905, 0.9895,
       0.985 , 0.992 , 0.9895, 0.9909, 0.9903, 0.991 , 0.9925, 0.9933,
       0.9921])

## BADGE

In [16]:
strategy = "badge"
strat_logs = logs_directory+F'{data_set_name}/{strategy}/'
os.makedirs(strat_logs, exist_ok = True)
train_one(full_train_dataset, initial_train_indices, test_dataset, copy.deepcopy(initial_model), n_rounds, budget, args, nclasses, strategy, strat_logs, checkpoint_directory, F"{experiment_name}_{strategy}")

Initial Testing accuracy: 48.01
-------------------------------------------------
Round 1
-------------------------------------------------
Selection Time: 36.72698473930359
Number of training points - 100
Training..
50 training accuracy: 0.91
Epoch: 63 Training accuracy: 0.99
Testing accuracy: 79.03
-------------------------------------------------
Round 2
-------------------------------------------------
Selection Time: 32.39088034629822
Number of training points - 150
Training..
Epoch: 19 Training accuracy: 1.0
Testing accuracy: 87.02
-------------------------------------------------
Round 3
-------------------------------------------------
Selection Time: 32.40498423576355
Number of training points - 200
Training..
Epoch: 32 Training accuracy: 0.99
Testing accuracy: 92.33
-------------------------------------------------
Round 4
-------------------------------------------------
Selection Time: 32.49461555480957
Number of training points - 250
Training..
Epoch: 17 Training accuracy:

array([0.4801, 0.7903, 0.8702, 0.9233, 0.9551, 0.9625, 0.97  , 0.969 ,
       0.9797, 0.9773, 0.9843, 0.9859, 0.9848, 0.9851, 0.9863, 0.9886,
       0.9899, 0.9905, 0.9905, 0.9922, 0.9911, 0.9936, 0.9938, 0.9921,
       0.9935])

## CoreSet

In [17]:
strategy = "coreset"
strat_logs = logs_directory+F'{data_set_name}/{strategy}/'
os.makedirs(strat_logs, exist_ok = True)
train_one(full_train_dataset, initial_train_indices, test_dataset, copy.deepcopy(initial_model), n_rounds, budget, args, nclasses, strategy, strat_logs, checkpoint_directory, F"{experiment_name}_{strategy}")

Initial Testing accuracy: 48.01
-------------------------------------------------
Round 1
-------------------------------------------------
Selection Time: 13.748229265213013
Number of training points - 100
Training..
50 training accuracy: 0.9
Epoch: 60 Training accuracy: 0.99
Testing accuracy: 85.09
-------------------------------------------------
Round 2
-------------------------------------------------
Selection Time: 13.747932434082031
Number of training points - 150
Training..
Epoch: 24 Training accuracy: 0.993
Testing accuracy: 88.68
-------------------------------------------------
Round 3
-------------------------------------------------
Selection Time: 13.782348155975342
Number of training points - 200
Training..
Epoch: 10 Training accuracy: 0.99
Testing accuracy: 82.51
-------------------------------------------------
Round 4
-------------------------------------------------
Selection Time: 13.821179866790771
Number of training points - 250
Training..
Epoch: 34 Training accu

array([0.4801, 0.8509, 0.8868, 0.8251, 0.9111, 0.9544, 0.9549, 0.9559,
       0.9689, 0.9697, 0.9766, 0.9748, 0.9733, 0.9825, 0.9822, 0.9815,
       0.9859, 0.9865, 0.9831, 0.9856, 0.9798, 0.9852, 0.9856, 0.9761,
       0.9845])

## Least Confidence

In [18]:
strategy = "least_confidence"
strat_logs = logs_directory+F'{data_set_name}/{strategy}/'
os.makedirs(strat_logs, exist_ok = True)
train_one(full_train_dataset, initial_train_indices, test_dataset, copy.deepcopy(initial_model), n_rounds, budget, args, nclasses, strategy, strat_logs, checkpoint_directory, F"{experiment_name}_{strategy}")

Initial Testing accuracy: 48.01
-------------------------------------------------
Round 1
-------------------------------------------------
Selection Time: 23.25375533103943
Number of training points - 100
Training..
50 training accuracy: 0.9
Epoch: 72 Training accuracy: 0.99
Testing accuracy: 57.55
-------------------------------------------------
Round 2
-------------------------------------------------
Selection Time: 23.06514883041382
Number of training points - 150
Training..
Epoch: 35 Training accuracy: 0.993
Testing accuracy: 89.98
-------------------------------------------------
Round 3
-------------------------------------------------
Selection Time: 23.014482498168945
Number of training points - 200
Training..
Epoch: 42 Training accuracy: 0.99
Testing accuracy: 86.61
-------------------------------------------------
Round 4
-------------------------------------------------
Selection Time: 22.961421251296997
Number of training points - 250
Training..
Epoch: 14 Training accura

array([0.4801, 0.5755, 0.8998, 0.8661, 0.9084, 0.9631, 0.95  , 0.9771,
       0.9801, 0.9784, 0.9832, 0.9874, 0.9881, 0.99  , 0.9868, 0.9906,
       0.9912, 0.9905, 0.9921, 0.9901, 0.9904, 0.9927, 0.9916, 0.9917,
       0.9938])

## Margin

In [19]:
strategy = "margin"
strat_logs = logs_directory+F'{data_set_name}/{strategy}/'
os.makedirs(strat_logs, exist_ok = True)
train_one(full_train_dataset, initial_train_indices, test_dataset, copy.deepcopy(initial_model), n_rounds, budget, args, nclasses, strategy, strat_logs, checkpoint_directory, F"{experiment_name}_{strategy}")

Initial Testing accuracy: 48.01
-------------------------------------------------
Round 1
-------------------------------------------------
Selection Time: 23.16196632385254
Number of training points - 100
Training..
50 training accuracy: 0.94
Epoch: 51 Training accuracy: 0.99
Testing accuracy: 87.72
-------------------------------------------------
Round 2
-------------------------------------------------
Selection Time: 23.13338041305542
Number of training points - 150
Training..
Epoch: 30 Training accuracy: 1.0
Testing accuracy: 88.45
-------------------------------------------------
Round 3
-------------------------------------------------
Selection Time: 22.73502540588379
Number of training points - 200
Training..
Epoch: 40 Training accuracy: 0.99
Testing accuracy: 94.0
-------------------------------------------------
Round 4
-------------------------------------------------
Selection Time: 22.577221870422363
Number of training points - 250
Training..
Epoch: 19 Training accuracy:

array([0.4801, 0.8772, 0.8845, 0.94  , 0.9353, 0.9506, 0.961 , 0.9724,
       0.9788, 0.9788, 0.9801, 0.9849, 0.9871, 0.9811, 0.9873, 0.9885,
       0.9905, 0.9885, 0.9868, 0.9908, 0.9892, 0.9883, 0.9905, 0.9911,
       0.9927])

In [20]:
end_time = time.monotonic()

In [21]:
(end_time - start_time)/3600

2.0116025433813856