# Code  
---
This code is taken from: https://github.com/rafikg/CEAL
## Import all the required stuff

In [1]:
# imported from run_ceal/ceal_learning_algorithm.py
from model import AlexNet
from utils import Normalize, RandomCrop, SquarifyImage, \
    ToTensor, GameImageDataset
from utils import get_uncertain_samples, get_high_confidence_samples, \
    update_threshold
from torch.utils.data import DataLoader
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler

import numpy as np
import torch
import logging


# others
from sklearn.metrics import precision_recall_fscore_support as prfs

logging.basicConfig(format="%(levelname)s:%(name)s: %(message)s",
                    level=logging.INFO)
logger = logging.getLogger(__name__)

In [2]:
device = torch.device('cuda:1')

## CEAL Algorithm

In [3]:
def ceal_learning_algorithm(du: DataLoader,
                            dl: DataLoader,
                            dtest: DataLoader,
                            k: int = 20,
                            delta_0: float = 0.005,
                            dr: float = 0.00033,
                            t: int = 1,
                            epochs: int = 10,
                            criteria: str = 'cl',
                            max_iter: int = 15):
    """
    Algorithm1 : Learning algorithm of CEAL.
    For simplicity, I used the same notation in the paper.
    Parameters
    ----------
    du: DataLoader
        Unlabeled samples
    dl : DataLoader
        labeled samples
    dtest : DataLoader
        test data
    k: int, (default = 1000)
        uncertain samples selection
    delta_0: float
        hight confidence samples selection threshold
    dr: float
        threshold decay
    t: int
        fine-tuning interval
    epochs: int
    criteria: str
    max_iter: int
        maximum iteration number.

    Returns
    -------

    """
    logger.info('Initial configuration: len(du): {}, len(dl): {} '.format(
        len(du.sampler.indices),
        len(dl.sampler.indices)))

    # Create the model
    model = AlexNet(n_classes=4, device=device)

    # Initialize the model
    logger.info('Intialize training the model on `dl` and test on `dtest`')

    model.train(epochs=epochs, train_loader=dl, valid_loader=None)

    # Evaluate model on dtest
    p, r, f = model.evaluate(test_loader=dtest)

    print('====> Initial precision: {} '.format(sum(p)/4))
    print('====> Initial recall: {} '.format(sum(r)/4))
    print('====> Initial f-score: {} '.format(sum(f)/4))

    for iteration in range(max_iter):

        logger.info('Iteration: {}: run prediction on unlabeled data '
                    '`du` '.format(iteration))

        pred_prob = model.predict(test_loader=du)

        # get k uncertain samples
        uncert_samp_idx, _ = get_uncertain_samples(pred_prob=pred_prob, k=k,
                                                   criteria=criteria)

        # get original indices
        uncert_samp_idx = [du.sampler.indices[idx] for idx in uncert_samp_idx]

        # add the uncertain samples selected from `du` to the labeled samples
        #  set `dl`
        dl.sampler.indices.extend(uncert_samp_idx)

        logger.info(
            'Update size of `dl`  and `du` by adding uncertain {} samples'
            ' in `dl`'
            ' len(dl): {}, len(du) {}'.
            format(len(uncert_samp_idx), len(dl.sampler.indices),
                   len(du.sampler.indices)))

        # get high confidence samples `dh`
        hcs_idx, hcs_labels = get_high_confidence_samples(pred_prob=pred_prob,
                                                          delta=delta_0)
        # get the original indices
        hcs_idx = [du.sampler.indices[idx] for idx in hcs_idx]

        # remove the samples that already selected as uncertain samples.
        hcs_idx = [x for x in hcs_idx if
                   x not in list(set(uncert_samp_idx) & set(hcs_idx))]

        # add high confidence samples to the labeled set 'dl'

        # (1) update the indices
        dl.sampler.indices.extend(hcs_idx)
        # (2) update the original labels with the pseudo labels.
        for idx in range(len(hcs_idx)):
            x = [0, 0, 0, 0]
            x[hcs_labels[idx]] = 1    
            dl.dataset.labels[hcs_idx[idx]] = x
        logger.info(
            'Update size of `dl`  and `du` by adding {} hcs samples in `dl`'
            ' len(dl): {}, len(du) {}'.
            format(len(hcs_idx), len(dl.sampler.indices),
                   len(du.sampler.indices)))

        if iteration % t == 0:
            logger.info('Iteration: {} fine-tune the model on dh U dl'.
                        format(iteration))
            model.train(epochs=epochs, train_loader=dl)

            # update delta_0
            delta_0 = update_threshold(delta=delta_0, dr=dr, t=iteration)

        # remove the uncertain samples from the original `du`
        logger.info('remove {} uncertain samples from du'.
                    format(len(uncert_samp_idx)))
        for val in uncert_samp_idx:
            du.sampler.indices.remove(val)

        p, r, f = model.evaluate(test_loader=dtest)
                
        print(
            "Iteration: {}, len(dl): {}, len(du): {},"
            " len(dh) {}".format(
                iteration, len(dl.sampler.indices),
                len(du.sampler.indices), len(hcs_idx)))
        
        print("Precision:",sum(p)/4)
        print("Recall:",sum(r)/4)
        print("F1:",sum(f)/4)
        
    print("Saving model...")
    torch.save(model, 'saved_model/alexnet.pth')
    print("Model saved as: saved_model/alexnet.pth")

In [4]:
dataset_train = GameImageDataset(
    root_dir="data/train",
    transform=transforms.Compose(
        [SquarifyImage(),
         RandomCrop(224),
         Normalize(),
         ToTensor()]))

dataset_test = GameImageDataset(
    root_dir="data/test",
    transform=transforms.Compose(
        [SquarifyImage(),
         RandomCrop(224),
         Normalize(),
         ToTensor()]))

# Creating data indices for training and validation splits:
random_seed = 123
validation_split = 0.1  # 10%
shuffling_dataset = True
batch_size = 16
dataset_size = len(dataset_train)

indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))

if shuffling_dataset:
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

du = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size,
                                 sampler=train_sampler, num_workers=1)
dl = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size,
                                 sampler=valid_sampler, num_workers=1)
dtest = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size,
                                    num_workers=1)

ceal_learning_algorithm(du=du, dl=dl, dtest=dtest, k=60, max_iter=5, epochs=30)

INFO:__main__: Initial configuration: len(du): 314, len(dl): 34 
INFO:model.alexnet: The code is running on cuda:1 
INFO:__main__: Intialize training the model on `dl` and test on `dtest`


====> Epoch: 0 Average loss: 0.0707
====> Epoch: 1 Average loss: 0.0521
====> Epoch: 2 Average loss: 0.0459
====> Epoch: 3 Average loss: 0.0501
====> Epoch: 4 Average loss: 0.0332
====> Epoch: 5 Average loss: 0.0406
====> Epoch: 6 Average loss: 0.0265
====> Epoch: 7 Average loss: 0.0259
====> Epoch: 8 Average loss: 0.0249
====> Epoch: 9 Average loss: 0.0250
====> Epoch: 10 Average loss: 0.0189
====> Epoch: 11 Average loss: 0.0132
====> Epoch: 12 Average loss: 0.0124
====> Epoch: 13 Average loss: 0.0135
====> Epoch: 14 Average loss: 0.0105
====> Epoch: 15 Average loss: 0.0147
====> Epoch: 16 Average loss: 0.0179
====> Epoch: 17 Average loss: 0.0133
====> Epoch: 18 Average loss: 0.0112
====> Epoch: 19 Average loss: 0.0106
====> Epoch: 20 Average loss: 0.0118
====> Epoch: 21 Average loss: 0.0101
====> Epoch: 22 Average loss: 0.0123
====> Epoch: 23 Average loss: 0.0102
====> Epoch: 24 Average loss: 0.0096
====> Epoch: 25 Average loss: 0.0084
====> Epoch: 26 Average loss: 0.0101
====> Epoch

INFO:__main__: Iteration: 0: run prediction on unlabeled data `du` 


====> Initial precision: 0.7089039432789431 
====> Initial recall: 0.7070942945942946 
====> Initial f-score: 0.6880104379257193 


INFO:__main__: Update size of `dl`  and `du` by adding uncertain 60 samples in `dl` len(dl): 94, len(du) 314
INFO:__main__: Update size of `dl`  and `du` by adding 0 hcs samples in `dl` len(dl): 94, len(du) 314
INFO:__main__: Iteration: 0 fine-tune the model on dh U dl


====> Epoch: 0 Average loss: 0.0332
====> Epoch: 1 Average loss: 0.0326
====> Epoch: 2 Average loss: 0.0299
====> Epoch: 3 Average loss: 0.0253
====> Epoch: 4 Average loss: 0.0227
====> Epoch: 5 Average loss: 0.0222
====> Epoch: 6 Average loss: 0.0218
====> Epoch: 7 Average loss: 0.0206
====> Epoch: 8 Average loss: 0.0175
====> Epoch: 9 Average loss: 0.0166
====> Epoch: 10 Average loss: 0.0154
====> Epoch: 11 Average loss: 0.0174
====> Epoch: 12 Average loss: 0.0175
====> Epoch: 13 Average loss: 0.0139
====> Epoch: 14 Average loss: 0.0161
====> Epoch: 15 Average loss: 0.0142
====> Epoch: 16 Average loss: 0.0141
====> Epoch: 17 Average loss: 0.0130
====> Epoch: 18 Average loss: 0.0132
====> Epoch: 19 Average loss: 0.0138
====> Epoch: 20 Average loss: 0.0137
====> Epoch: 21 Average loss: 0.0130
====> Epoch: 22 Average loss: 0.0112
====> Epoch: 23 Average loss: 0.0120
====> Epoch: 24 Average loss: 0.0121
====> Epoch: 25 Average loss: 0.0119
====> Epoch: 26 Average loss: 0.0117
====> Epoch

INFO:__main__: remove 60 uncertain samples from du


====> Epoch: 29 Average loss: 0.0120


INFO:__main__: Iteration: 1: run prediction on unlabeled data `du` 


Iteration: 0, len(dl): 94, len(du): 254, len(dh) 0
Precision: 0.6942364637677138
Recall: 0.6926780395530395
F1: 0.6779382829492924


INFO:__main__: Update size of `dl`  and `du` by adding uncertain 60 samples in `dl` len(dl): 154, len(du) 254
INFO:__main__: Update size of `dl`  and `du` by adding 0 hcs samples in `dl` len(dl): 154, len(du) 254
INFO:__main__: Iteration: 1 fine-tune the model on dh U dl


====> Epoch: 0 Average loss: 0.0212
====> Epoch: 1 Average loss: 0.0191
====> Epoch: 2 Average loss: 0.0172
====> Epoch: 3 Average loss: 0.0179
====> Epoch: 4 Average loss: 0.0177
====> Epoch: 5 Average loss: 0.0174
====> Epoch: 6 Average loss: 0.0171
====> Epoch: 7 Average loss: 0.0147
====> Epoch: 8 Average loss: 0.0155
====> Epoch: 9 Average loss: 0.0152
====> Epoch: 10 Average loss: 0.0134
====> Epoch: 11 Average loss: 0.0152
====> Epoch: 12 Average loss: 0.0148
====> Epoch: 13 Average loss: 0.0139
====> Epoch: 14 Average loss: 0.0133
====> Epoch: 15 Average loss: 0.0132
====> Epoch: 16 Average loss: 0.0128
====> Epoch: 17 Average loss: 0.0128
====> Epoch: 18 Average loss: 0.0117
====> Epoch: 19 Average loss: 0.0112
====> Epoch: 20 Average loss: 0.0124
====> Epoch: 21 Average loss: 0.0124
====> Epoch: 22 Average loss: 0.0134
====> Epoch: 23 Average loss: 0.0133
====> Epoch: 24 Average loss: 0.0126
====> Epoch: 25 Average loss: 0.0119
====> Epoch: 26 Average loss: 0.0126
====> Epoch

INFO:__main__: remove 60 uncertain samples from du


====> Epoch: 29 Average loss: 0.0118


INFO:__main__: Iteration: 2: run prediction on unlabeled data `du` 


Iteration: 1, len(dl): 154, len(du): 194, len(dh) 0
Precision: 0.731528511997262
Recall: 0.7073545667295666
F1: 0.696806800071029


INFO:__main__: Update size of `dl`  and `du` by adding uncertain 60 samples in `dl` len(dl): 214, len(du) 194
INFO:__main__: Update size of `dl`  and `du` by adding 0 hcs samples in `dl` len(dl): 214, len(du) 194
INFO:__main__: Iteration: 2 fine-tune the model on dh U dl


====> Epoch: 0 Average loss: 0.0234
====> Epoch: 1 Average loss: 0.0199
====> Epoch: 2 Average loss: 0.0187
====> Epoch: 3 Average loss: 0.0180
====> Epoch: 4 Average loss: 0.0178
====> Epoch: 5 Average loss: 0.0171
====> Epoch: 6 Average loss: 0.0186
====> Epoch: 7 Average loss: 0.0180
====> Epoch: 8 Average loss: 0.0170
====> Epoch: 9 Average loss: 0.0172
====> Epoch: 10 Average loss: 0.0163
====> Epoch: 11 Average loss: 0.0160
====> Epoch: 12 Average loss: 0.0148
====> Epoch: 13 Average loss: 0.0156
====> Epoch: 14 Average loss: 0.0170
====> Epoch: 15 Average loss: 0.0132
====> Epoch: 16 Average loss: 0.0142
====> Epoch: 17 Average loss: 0.0137
====> Epoch: 18 Average loss: 0.0146
====> Epoch: 19 Average loss: 0.0131
====> Epoch: 20 Average loss: 0.0140
====> Epoch: 21 Average loss: 0.0141
====> Epoch: 22 Average loss: 0.0141
====> Epoch: 23 Average loss: 0.0126
====> Epoch: 24 Average loss: 0.0137
====> Epoch: 25 Average loss: 0.0117
====> Epoch: 26 Average loss: 0.0132
====> Epoch

INFO:__main__: remove 60 uncertain samples from du


====> Epoch: 29 Average loss: 0.0123


INFO:__main__: Iteration: 3: run prediction on unlabeled data `du` 


Iteration: 2, len(dl): 214, len(du): 134, len(dh) 0
Precision: 0.7505345349095349
Recall: 0.7110879571817073
F1: 0.7068258839923232


INFO:__main__: Update size of `dl`  and `du` by adding uncertain 60 samples in `dl` len(dl): 274, len(du) 134
INFO:__main__: Update size of `dl`  and `du` by adding 0 hcs samples in `dl` len(dl): 274, len(du) 134
INFO:__main__: Iteration: 3 fine-tune the model on dh U dl


====> Epoch: 0 Average loss: 0.0187
====> Epoch: 1 Average loss: 0.0182
====> Epoch: 2 Average loss: 0.0194
====> Epoch: 3 Average loss: 0.0201
====> Epoch: 4 Average loss: 0.0170
====> Epoch: 5 Average loss: 0.0176
====> Epoch: 6 Average loss: 0.0149
====> Epoch: 7 Average loss: 0.0168
====> Epoch: 8 Average loss: 0.0176
====> Epoch: 9 Average loss: 0.0156
====> Epoch: 10 Average loss: 0.0174
====> Epoch: 11 Average loss: 0.0152
====> Epoch: 12 Average loss: 0.0148
====> Epoch: 13 Average loss: 0.0162
====> Epoch: 14 Average loss: 0.0157
====> Epoch: 15 Average loss: 0.0143
====> Epoch: 16 Average loss: 0.0167
====> Epoch: 17 Average loss: 0.0140
====> Epoch: 18 Average loss: 0.0138
====> Epoch: 19 Average loss: 0.0120
====> Epoch: 20 Average loss: 0.0128
====> Epoch: 21 Average loss: 0.0146
====> Epoch: 22 Average loss: 0.0135
====> Epoch: 23 Average loss: 0.0122
====> Epoch: 24 Average loss: 0.0141
====> Epoch: 25 Average loss: 0.0133
====> Epoch: 26 Average loss: 0.0139
====> Epoch

INFO:__main__: remove 60 uncertain samples from du


====> Epoch: 29 Average loss: 0.0138


INFO:__main__: Iteration: 4: run prediction on unlabeled data `du` 


Iteration: 3, len(dl): 274, len(du): 74, len(dh) 0
Precision: 0.6859481375106375
Recall: 0.6854937597125097
F1: 0.6669745944034061


INFO:__main__: Update size of `dl`  and `du` by adding uncertain 60 samples in `dl` len(dl): 334, len(du) 74
INFO:__main__: Update size of `dl`  and `du` by adding 0 hcs samples in `dl` len(dl): 334, len(du) 74
INFO:__main__: Iteration: 4 fine-tune the model on dh U dl


====> Epoch: 0 Average loss: 0.0169
====> Epoch: 1 Average loss: 0.0159
====> Epoch: 2 Average loss: 0.0166
====> Epoch: 3 Average loss: 0.0143
====> Epoch: 4 Average loss: 0.0153
====> Epoch: 5 Average loss: 0.0149
====> Epoch: 6 Average loss: 0.0138
====> Epoch: 7 Average loss: 0.0145
====> Epoch: 8 Average loss: 0.0136
====> Epoch: 9 Average loss: 0.0131
====> Epoch: 10 Average loss: 0.0124
====> Epoch: 11 Average loss: 0.0140
====> Epoch: 12 Average loss: 0.0129
====> Epoch: 13 Average loss: 0.0113
====> Epoch: 14 Average loss: 0.0128
====> Epoch: 15 Average loss: 0.0129
====> Epoch: 16 Average loss: 0.0138
====> Epoch: 17 Average loss: 0.0140
====> Epoch: 18 Average loss: 0.0124
====> Epoch: 19 Average loss: 0.0134
====> Epoch: 20 Average loss: 0.0126
====> Epoch: 21 Average loss: 0.0137
====> Epoch: 22 Average loss: 0.0120
====> Epoch: 23 Average loss: 0.0110
====> Epoch: 24 Average loss: 0.0120
====> Epoch: 25 Average loss: 0.0131
====> Epoch: 26 Average loss: 0.0134
====> Epoch

INFO:__main__: remove 60 uncertain samples from du


====> Epoch: 29 Average loss: 0.0121
Iteration: 4, len(dl): 334, len(du): 14, len(dh) 0
Precision: 0.7328382554945055
Recall: 0.7235808173308174
F1: 0.7097713661123549
Saving model...
Model saved as: saved_model/alexnet.pth


# Extra processing
---
#### The cells are converted to markdown cells so that they are not run when you run all cells. Change them to code cells to run if required

### Exporting data zip file

import zipfile
with zipfile.ZipFile('data/annotated_r2.zip', 'r') as zip_ref:
    zip_ref.extractall()

### Move files to train and test directory

import os
dataitems = os.listdir('data')
finalitems = []
for item in dataitems:
    if not os.path.isdir('data/'+item):
        finalitems.append(item)

for idx, item in enumerate(finalitems):
    # this will assign 20% images(every 5th image) to the test directory
    if idx % 5 == 0: 
        os.rename('data/' + item, 'data/test/'+item)
    else:
        os.rename('data/' + item, 'data/train/'+item)

### Create small train-test set for debugging

import os
from shutil import copyfile

trainitems = os.listdir('data/train')
testitems = os.listdir('data/test')

for idx, item in enumerate(trainitems):
    if os.path.isdir('data/train/'+item):
        continue
        
    # this will assign 20% images(every 5th image) to the debug directory
    if idx % 5 == 0: 
        copyfile('data/train/'+item,'data/debug_train/'+item)
        
for idx, item in enumerate(testitems):
    if os.path.isdir('data/train/'+item):
        continue
        
    # this will assign 20% images(every 5th image) to the debug directory
    if idx % 5 == 0: 
        copyfile('data/test/'+item,'data/debug_test/'+item)