## Active learning

<img src="./Perception_4_ActiveLearning_7_OnlineLearning.png" alt="Drawing" style="width: 800px;"/>

In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import numpy as np
import cv2
import matplotlib.pyplot as plt
from utils import visualize, pickle_load, pickle_save
import torch
import numpy as np
from tqdm import tqdm
import segmentation_models_pytorch as smp
# segmentation models wrapper
from segmodel import SegModel, model_selection_function
# anomaly detection functions
from anomaly_detection import sample_selection_function
# datasets wrapper
from dataset import CamVid, BDD100K

### Load data

In [2]:
# # CamVid directories
# DATA_DIR = './data/CamVid/'

# x_train_dir = os.path.join(DATA_DIR, 'train')
# y_train_dir = os.path.join(DATA_DIR, 'trainannot')

# x_valid_dir = os.path.join(DATA_DIR, 'val')
# y_valid_dir = os.path.join(DATA_DIR, 'valannot')

# x_test_dir = os.path.join(DATA_DIR, 'test')
# y_test_dir = os.path.join(DATA_DIR, 'testannot')

# BDD100K directories
DATA_DIR = '/home/ruslan/datasets/bdd100k/seg/'
x_train_dir = os.path.join(DATA_DIR, 'images/train')
y_train_dir = os.path.join(DATA_DIR, 'labels/train')

x_valid_dir = os.path.join(DATA_DIR, 'images/val')
y_valid_dir = os.path.join(DATA_DIR, 'labels/val')

x_test_dir = os.path.join(DATA_DIR, 'images/test')

In [3]:
# all data paths
X_train_paths = np.array([os.path.join(x_train_dir, image_name) for image_name in os.listdir(x_train_dir)])[:300]
y_train_paths = np.array([os.path.join(y_train_dir, image_name) for image_name in os.listdir(y_train_dir)])[:300]

X_valid_paths = np.array([os.path.join(x_valid_dir, image_name) for image_name in os.listdir(x_valid_dir)])[:100]
y_valid_paths = np.array([os.path.join(y_valid_dir, image_name) for image_name in os.listdir(y_valid_dir)])

X_test_paths = np.array([os.path.join(x_test_dir, image_name) for image_name in os.listdir(x_test_dir)])[:100]
#y_test_paths = np.array([os.path.join(y_test_dir, image_name) for image_name in os.listdir(y_test_dir)])

### Active Learning experiment

- X_train, y_train: is used partially to train a model
- X_valid, y_valid: is used fully for validation
- X_test, y_test: is used as an unlabelled set to detect anomalies and add labels to train set

#### Main AL experiment

In [4]:
def al_experiment(model_str,
                  samples_selection_fn,
                  k,
                  visualize_most_uncertain=False,
                  verbose_train=False,
                  random_seed=0):
    # define model from its name
    model = model_selection_function(model_str)
    model.epochs = MODEL_TRAIN_EPOCHS
    # define samples selection function from its name
    samples_selection_fn = sample_selection_function(samples_selection_str)
    
    # select k random samples from initial dataset and treat it as initially labelled data
    X = np.copy(X_train_paths)
    y = np.copy(y_train_paths)
    np.random.seed(random_seed)
    initial_selection = np.random.choice(len(X), INITIAL_N_TRAIN_IMAGES, replace=False) # k
    X_train_paths_part = X[initial_selection]
    y_train_paths_part = y[initial_selection]

    X_test = np.delete(X, initial_selection)
    y_test = np.delete(X, initial_selection)

    IoUs = [0.]
    N_train_samples = [len(X_train_paths_part)]

    # main loop
    while len(X_train_paths_part) <= MAX_QUEERY_IMAGES:
        # train model
        print('Labelled set size: ', len(X_train_paths_part))
        print('Unlabelled set size: ', len(X_test))
        print(f'\nTraining a model for {MODEL_TRAIN_EPOCHS} epochs...')
        model.train(X_train_paths_part,
                    y_train_paths_part,
                    X_valid_paths,
                    y_valid_paths,
                    Dataset=BDD100K,
                    verbose=verbose_train)

        # remeber results
        print(f'IoU so far: {model.max_iou_score}')
        IoUs.append(model.max_iou_score)
        N_train_samples.append(len(X_train_paths_part))
        
        if len(X_test) < k:
            print('\nNo more images in Unlabelled set')
            break
            
        selected_images_indexes = samples_selection_fn(X_test, k, model)

        # Add labels for uncertain images to train data
        #print('Labelled set before: ', len(X_train_paths_part))
        X_train_paths_part = np.concatenate([X_train_paths_part, X_test[selected_images_indexes]])
        y_train_paths_part = np.concatenate([y_train_paths_part, y_test[selected_images_indexes]])
        #print('Labelled set after: ', len(X_train_paths_part))

        # Visualization
        if visualize_most_uncertain:
            print('Visualizing most uncertain results so far:')
            for i in selected_images_indexes[:1]:
                img_path = X_test[i]
                image = cv2.imread(img_path)[...,(2,1,0)]
                gt_mask = cv2.imread(y_test_paths[i])
                pr_mask = model.predict([img_path])
                mask_np = pr_mask.squeeze().cpu().numpy().round()

                visualize(image=image, car_mask=mask_np[0,...], road_mask=mask_np[1,...])

        # Remove labelled data from validation set
        #print('Unlabelled set before: ', len(X_test))
        X_test = np.delete(X_test, selected_images_indexes)
        y_test = np.delete(y_test, selected_images_indexes)
        #print('Unlabelled set after: ', len(X_test))
        
    print(f'Max IoU score: {np.max(IoUs)}')
    print('----------------------------------------\n')
    return IoUs, N_train_samples

In [5]:
MAX_QUEERY_IMAGES = 120 # 220 # maximum number of images to train on during AL loop
MODEL_TRAIN_EPOCHS = 1 # 5 # number of epochs to train a model during one AL cicle
INITIAL_N_TRAIN_IMAGES = 60 # 20, initial number of accessible labelled images
NUM_UNCERTAIN_IMAGES = [10]#, 20]#, 40, 60] # k: number of uncertain images to label at each AL cicle
SAMPLES_SELECTIONS = ['Margin', 'Random', 'Entropy']
MODELS = ['Unet']#, 'Linknet', 'FPN', 'PSPNet']

In [6]:
name = ''
for model in MODELS:
    name += model + '_'
name += 'Nsamples_'+str(MAX_QUEERY_IMAGES)
name += '_epochs_'+str(MODEL_TRAIN_EPOCHS)
name += '_N0_'+str(INITIAL_N_TRAIN_IMAGES)
name += '_Ks_'
for k in NUM_UNCERTAIN_IMAGES:
    name += str(k) + '_'
for fn in SAMPLES_SELECTIONS:
    name += fn + '_'
RESULTS_FNAME = name+'.pkl'
print(RESULTS_FNAME)

Unet_Nsamples_120_epochs_1_N0_60_Ks_10_Margin_Random_Entropy_.pkl


In [None]:
results = {}

# choose model
for model_str in MODELS:
    print(f'\nModel name: {model_str}')
    print('------------------------------------')
    results[model_str] = {}
    
    # choose samples selection function
    for samples_selection_str in SAMPLES_SELECTIONS:
        print(f'\nSamples selection function: {samples_selection_str}')
        print('------------------------------------')
        results[model_str][samples_selection_str] = {}
        
        # choose number of samples to select for labelling from inference results
        for k in NUM_UNCERTAIN_IMAGES:
            print(f'\nNumber of samples to label on one iteration, k={k}')
            print('------------------------------------')
            results[model_str][samples_selection_str][str(k)] = {}
            
            IoUs, N_train_samples = al_experiment(model_str, samples_selection_str, k, verbose_train=True)
            
            results[model_str][samples_selection_str][str(k)]['IoUs'] = IoUs
            results[model_str][samples_selection_str][str(k)]['N_train_samples'] = N_train_samples
            
pickle_save(RESULTS_FNAME, results)


Model name: Unet
------------------------------------

Samples selection function: Margin
------------------------------------

Number of samples to label on one iteration, k=10
------------------------------------
Labelled set size:  60
Unlabelled set size:  240

Training a model for 1 epochs...

Epoch: 0
train: 100%|██████████| 8/8 [00:15<00:00,  1.88s/it, dice_loss - 0.9565, iou_score - 5.149e-05]
valid: 100%|██████████| 100/100 [00:14<00:00,  6.72it/s, dice_loss - 0.9605, iou_score - 0.0002759]
Model saved!


  0%|          | 0/240 [00:00<?, ?it/s]

IoU so far: 0.00027594741409756046
Inference on unlabelled data...


100%|██████████| 240/240 [00:18<00:00, 12.73it/s]

Min margin: 5.10,             Mean margin: 8.05,             Max margin: 9.84
Labelled set size:  70
Unlabelled set size:  230

Training a model for 1 epochs...

Epoch: 0
train:   0%|          | 0/9 [00:00<?, ?it/s]




train: 100%|██████████| 9/9 [00:17<00:00,  1.95s/it, dice_loss - 0.9468, iou_score - 0.0001634]
valid: 100%|██████████| 100/100 [00:15<00:00,  6.61it/s, dice_loss - 0.9491, iou_score - 0.0002746]
Model saved!


  0%|          | 0/230 [00:00<?, ?it/s]

IoU so far: 0.00027458306670131896
Inference on unlabelled data...


100%|██████████| 230/230 [00:18<00:00, 12.62it/s]

Min margin: 6.98,             Mean margin: 9.08,             Max margin: 9.59
Labelled set size:  80
Unlabelled set size:  220

Training a model for 1 epochs...

Epoch: 0
train:   0%|          | 0/10 [00:00<?, ?it/s]




train: 100%|██████████| 10/10 [00:19<00:00,  1.96s/it, dice_loss - 0.9345, iou_score - 0.0005683]
valid: 100%|██████████| 100/100 [00:14<00:00,  6.69it/s, dice_loss - 0.9337, iou_score - 0.003491]
Model saved!


  0%|          | 0/220 [00:00<?, ?it/s]

IoU so far: 0.0034905777251435463
Inference on unlabelled data...


100%|██████████| 220/220 [00:18<00:00, 11.69it/s]

Min margin: 4.25,             Mean margin: 8.32,             Max margin: 9.50
Labelled set size:  90
Unlabelled set size:  210

Training a model for 1 epochs...

Epoch: 0
train:   0%|          | 0/12 [00:00<?, ?it/s]




train: 100%|██████████| 12/12 [00:21<00:00,  1.83s/it, dice_loss - 0.9172, iou_score - 0.006352]
valid: 100%|██████████| 100/100 [00:14<00:00,  6.68it/s, dice_loss - 0.8996, iou_score - 0.04252]
Model saved!


  0%|          | 0/210 [00:00<?, ?it/s]

IoU so far: 0.042519463011093585
Inference on unlabelled data...


100%|██████████| 210/210 [00:16<00:00, 12.80it/s]

Min margin: 3.55,             Mean margin: 7.92,             Max margin: 9.44
Labelled set size:  100
Unlabelled set size:  200

Training a model for 1 epochs...

Epoch: 0
train:   0%|          | 0/13 [00:00<?, ?it/s]




train: 100%|██████████| 13/13 [00:23<00:00,  1.83s/it, dice_loss - 0.8905, iou_score - 0.04939]
valid: 100%|██████████| 100/100 [00:14<00:00,  6.67it/s, dice_loss - 0.9055, iou_score - 0.04181]
Model saved!


  0%|          | 0/200 [00:00<?, ?it/s]

IoU so far: 0.04181337986207926
Inference on unlabelled data...


100%|██████████| 200/200 [00:15<00:00, 12.97it/s]

Min margin: 6.07,             Mean margin: 8.58,             Max margin: 9.57
Labelled set size:  110
Unlabelled set size:  190

Training a model for 1 epochs...

Epoch: 0
train:   0%|          | 0/14 [00:00<?, ?it/s]




train: 100%|██████████| 14/14 [00:24<00:00,  1.79s/it, dice_loss - 0.8715, iou_score - 0.09207]
valid: 100%|██████████| 100/100 [00:15<00:00,  6.36it/s, dice_loss - 0.8883, iou_score - 0.06712]
Model saved!


  0%|          | 0/190 [00:00<?, ?it/s]

IoU so far: 0.06712398571004416
Inference on unlabelled data...


100%|██████████| 190/190 [00:14<00:00, 13.46it/s]

Min margin: 3.06,             Mean margin: 7.21,             Max margin: 9.53
Labelled set size:  120
Unlabelled set size:  180

Training a model for 1 epochs...

Epoch: 0
train:   0%|          | 0/15 [00:00<?, ?it/s]




train: 100%|██████████| 15/15 [00:24<00:00,  1.66s/it, dice_loss - 0.8262, iou_score - 0.1652]
valid: 100%|██████████| 100/100 [00:15<00:00,  6.64it/s, dice_loss - 0.8996, iou_score - 0.0552]
Model saved!


  0%|          | 0/180 [00:00<?, ?it/s]

IoU so far: 0.05520060291341972
Inference on unlabelled data...


100%|██████████| 180/180 [00:13<00:00, 13.26it/s]


Min margin: 3.55,             Mean margin: 8.00,             Max margin: 9.67
Max IoU score: 0.06712398571004416
----------------------------------------


Samples selection function: Random
------------------------------------

Number of samples to label on one iteration, k=10
------------------------------------
Labelled set size:  60
Unlabelled set size:  240

Training a model for 1 epochs...

Epoch: 0
train: 100%|██████████| 8/8 [00:14<00:00,  1.87s/it, dice_loss - 0.9559, iou_score - 0.0004753]
valid: 100%|██████████| 100/100 [00:14<00:00,  6.68it/s, dice_loss - 0.9517, iou_score - 4.439e-13]
Model saved!
IoU so far: 4.4387243094403884e-13
Labelled set size:  70
Unlabelled set size:  230

Training a model for 1 epochs...

Epoch: 0
train: 100%|██████████| 9/9 [00:17<00:00,  1.89s/it, dice_loss - 0.9488, iou_score - 0.002451] 
valid: 100%|██████████| 100/100 [00:14<00:00,  6.96it/s, dice_loss - 0.9242, iou_score - 0.01838]
Model saved!
IoU so far: 0.01838080269108045
Labelled set si

## Results

In [None]:
results = pickle_load(RESULTS_FNAME)
# results = pickle_load('Unet_epochs_2_N0_80_Ks_10_20_Margin_Random_Entropy_.pkl')

plt.figure(figsize=(8,8))

# choose model
for model_str in MODELS:    
    # choose samples selection function
    for samples_selection_str in SAMPLES_SELECTIONS:        
        # choose number of samples to select for labelling from inference results
        for k in NUM_UNCERTAIN_IMAGES:

            ious = results[model_str][samples_selection_str][str(k)]['IoUs']
            n_train = results[model_str][samples_selection_str][str(k)]['N_train_samples']

            plt.plot(np.array(n_train[1:]), ious[1:], label=model_str+'_'+samples_selection_str+'_k='+str(k))
        
plt.grid()
plt.title('Active Learning Results', fontsize=18)
plt.xlabel('N images / full train set size', fontsize=16)
plt.ylabel('IoU', fontsize=16)
plt.legend();