In [1]:
import os


HOME_DIRECTORY=os.path.abspath(os.path.join(os.getcwd(), os.pardir))

os.chdir(HOME_DIRECTORY)


In [2]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # sync ids with nvidia-smi
os.environ["CUDA_VISIBLE_DEVICES"] = "0" 
os.environ["MKL_SERVICE_FORCE_INTEL"]="1"

port=10002 #when running more than one experiment on same  use different port for each experiment as 
sampling_fn="uncertainty"

lSet_partition=1
base_seed=1
num_GPU=1

dataset="CIFAR10"
clf_lr=5e-4
clf_wt_decay=5e-4
init_partition=10
step_partition=10
swa_lr=5e-4
swa_freq=50
swa_epochs=2
optim_type="adam"
clf_epochs=5 #100
num_classes=10

tr_bs=64
ts_bs=200
log_iter=40

#Data arguments
train_dir="./data/CIFAR10/train-CIFAR10/"
test_dir="./data/CIFAR10/test-CIFAR10/"
lSetPath=f"./data/CIFAR10/partition_{lSet_partition}/lSet_CIFAR10.npy"
uSetPath=f"./data/CIFAR10/partition_{lSet_partition}/uSet_CIFAR10.npy"
valSetPath=f"./data/CIFAR10/partition_{lSet_partition}/valSet_CIFAR10.npy"
out_dir=f"./results/best_automl_results/lSet_{lSet_partition}/start_{base_seed}/"


model_style="vgg_style"
model_type="vgg"
model_depth=16

model_cfg_file=f"configs/{dataset}/{model_style}/{model_type}/R-18_4gpu.yaml"



In [3]:
# Run active learning and lets see if the active set is reprooduced using uncertainty method.
# Precisely, we compare the active set drawn after running the below command and the active set which was used to 
# report the perfomance in main paper.


!python3 $HOME_DIRECTORY/tools/train_al.py --n_GPU $num_GPU \
--port $port --out_dir $out_dir --dataset $dataset --seed_id $base_seed \
--model_type $model_type --model_depth $model_depth --train_batch_size $tr_bs --test_batch_size $ts_bs \
--lr $clf_lr --wt_decay $clf_wt_decay --clf_epochs $clf_epochs --num_classes $num_classes --eval_period 1 \
--checkpoint_period 1 --cfg configs/$dataset/$model_style/$model_type/R-18_4gpu.yaml \
--lSetPath $lSetPath --uSetPath $uSetPath --valSetPath $valSetPath \
--train_dir $train_dir --test_dir $test_dir \
--init_partition $init_partition --al_mode --sampling_fn $sampling_fn \
--step_partition $step_partition --al_max_iter 2 --budget_size 5000 \
--optim $optim_type --log_iter $log_iter


== al_model_phase: True ==
Directory_specific: vanilla
Dataset is augmented
--------------------------------------
Preprocess Operations Selected ==>  [RandomHorizontalFlip(p=0.5), ToTensor()]
--------------------------------------
Files already downloaded and verified
Dataset is augmented
--------------------------------------
Preprocess Operations Selected ==>  [ToTensor()]
--------------------------------------
Files already downloaded and verified
Dataset is augmented
--------------------------------------
Preprocess Operations Selected ==>  [ToTensor()]
--------------------------------------
Files already downloaded and verified
~~ Constructing al_args for 20.0 with alStart: True
best_model_path chosen: ./results/best_automl_results/lSet_1/start_1/CIFAR10/10.0/vgg_depth_16/vanilla/checkpoints/vlBest_acc_70.47999824523926_model_epoch_0144.pyth
al_args: ['ACTIVE_LEARNING.LSET_PATH', './data/CIFAR10/partition_1/lSet_CIFAR10.npy', 'ACTIVE_LEARNING.USET_PATH', './data/CIFAR10/partition

[logging.py:  75]: json_stats: {"_type": "Val_epoch", "epoch": "1/5", "min_top1_err": 83.820000000000, "top1_err": 83.820000000000}
[train_al.py: 529]: Successfully logged numpy arrays!!
~~~ isPruning Flag:  True
~~~ isEvalEpoch:  True
[logging.py:  75]: json_stats: {"_type": "train_iter", "epoch": "2/5", "iter": "40/157", "loss": 2.040542125702, "lr": 0.000500000000, "top1_err": 82.812500000000}
[logging.py:  75]: json_stats: {"_type": "train_iter", "epoch": "2/5", "iter": "80/157", "loss": 1.986630797386, "lr": 0.000500000000, "top1_err": 80.468750000000}
[logging.py:  75]: json_stats: {"_type": "train_iter", "epoch": "2/5", "iter": "120/157", "loss": 1.963099718094, "lr": 0.000500000000, "top1_err": 80.468750000000}
[logging.py:  75]: json_stats: {"_type": "train_epoch", "epoch": "2/5", "loss": 1.990457831192, "lr": 0.000500000000, "top1_err": 80.580000000000}
[logging.py:  75]: json_stats: {"_type": "Val_epoch", "epoch": "2/5", "min_top1_err": 80.520000000000, "top1_err": 80.520000

In [4]:
# Now the AL sets drawn are saved at path: $out_dir/CIFAR10/20.0/1/vgg_depth_16/vanilla/uncertainty 
# --> According to the code following files are saved: 
# 1. activeSet.txt --> containing the indices for active set.
# 2. actualScores.txt --> contains the uncertainty scores for unlabeled set. (uncertainty == maximum confidence among prob for all classes)


#To check reproducibility old active set

old_as_fpath = "/nfs/users/ext_prateek.munjal/projects/active_learning_codebase_automl/iclr_results/best_automl_results/lSet_1/start_1/CIFAR10/20.0/uncertainty/vgg_depth_16/vanilla/activeSet.txt"
new_as_fpath = "/nfs/users/ext_prateek.munjal/projects/active_learning_codebase_automl/results/best_automl_results/lSet_1/start_1/CIFAR10/20.0/1/vgg_depth_16/vanilla/uncertainty/activeSet.txt"



In [5]:
def read_text_file(fpath):
    """
    Args:
    fpath (str, required): Path to the text file.
    
    Returns: 
    List: Returns a list containing data line by line. New line character is removed from each line, if exists.
    """
    file_content = []
    assert os.path.isfile(fpath), f"Expected {os.path.basename(fpath)} file to exist here!"
    fp = open(fpath, "r")
    
    for line in fp.readlines():
        line = line.rstrip("\n")

        file_content.append(line)
    fp.close()
    return file_content
             
old_active_set = read_text_file(old_as_fpath)
new_active_set = read_text_file(new_as_fpath)

In [6]:
print("#Points in old active set: ",len(old_active_set))
print("#Points in new active set: ",len(new_active_set))

#Points in old active set:  5000
#Points in new active set:  5000


In [7]:
# Lets check if they are same: i.e if the len(intersection) == len(both sets) then both the active sets are same.

intersection_set = set(old_active_set).intersection(set(new_active_set))
print("Len intersection set: ", len(intersection_set))

Len intersection set:  5000


In [None]:
# As the length of intersection set matches the length of both active sets. As both active sets are same, 
# hence we conclude reproducibility of active sets by this notebook.