# Semi-Supervised Learning testing

Import and get data

In [1]:
import import_ipynb
import visdom
import ipynb.fs.full.Datasets as Datasets
from Datasets import get_dataset, HyperX, HyperX_unlabeled
import utils
import numpy as np
import sklearn.svm
import seaborn as sns
import importlib

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import LambdaLR
import torch.optim as optim
from torch.nn import init
import torch.utils.data as data
from torchsummary import summary

import math
import os
import datetime
from sklearn.externals import joblib
from tqdm import tqdm

vis = visdom.Visdom()

Setting up a new session...


importing Jupyter notebook from Datasets.ipynb


In [2]:
Datasets = importlib.reload(Datasets)

In [3]:
salinas_img, salinas_gt, salinas_label_values, salinas_ignored_labels, salinas_rgb_bands, salinas_palette = get_dataset("Salinas")

Specify bands and classes while generating color palette

In [4]:
N_CLASSES = len(salinas_label_values)
N_BANDS = salinas_img.shape[-1]

if salinas_palette is None:
    # Generate color palette
    salinas_palette = {0: (0, 0, 0)}
    for k, color in enumerate(sns.color_palette("hls", len(salinas_label_values) - 1)):
        salinas_palette[k + 1] = tuple(np.asarray(255 * np.array(color), dtype='uint8'))
invert_palette = {v: k for k, v in salinas_palette.items()}

def convert_to_color(x):
    return utils.convert_to_color_(x, palette=salinas_palette)
def convert_from_color(x):
    return utils.convert_from_color_(x, palette=invert_palette)

Define sample percantage and sampling of data from the original dataset

In [5]:
SAMPLE_PERCENTAGE = 0.3
SAMPLING_MODE = 'disjoint' #random, fixed, disjoint

train_gt, test_gt = utils.sample_gt(salinas_gt, SAMPLE_PERCENTAGE, mode=SAMPLING_MODE)
print("{} samples selected (over {})".format(np.count_nonzero(train_gt), np.count_nonzero(salinas_gt)))

utils.display_predictions(convert_to_color(train_gt), vis, caption="Train ground truth")
utils.display_predictions(convert_to_color(test_gt), vis, caption="Test ground truth")

14925 samples selected (over 54129)


Define a training function that uses "FixMatch" style training with labeled and unlabeled datasets. Configure the original training function from "test" to suit the pytorch implementation of FixMatch here: https://github.com/kekmodel/FixMatch-pytorch/blob/master/train.py

In [14]:
def train(net, optimizer, criterion, labeled_data_loader, unlabeled_data_loader, epoch, threshold, scheduler=None,
          display_iter=100, device=torch.device('cpu'), display=None,
          val_loader=None):
    """
    Training loop to optimize a network for several epochs and a specified loss
    Args:
        net: a PyTorch model
        optimizer: a PyTorch optimizer
        labeled_data_loader: a PyTorch dataset loader for the labeled dataset
        unlabeled_data_loader: a PyTorch dataset loader for the weakly and strongly augmented, unlabeled dataset
        epoch: int specifying the number of training epochs
        threshold: probability thresold for pseudo labels acceptance
        criterion: a PyTorch-compatible loss function, e.g. nn.CrossEntropyLoss
        device (optional): torch device to use (defaults to CPU)
        display_iter (optional): number of iterations before refreshing the
        display (False/None to switch off).
        scheduler (optional): PyTorch scheduler
        val_loader (optional): validation dataset
        supervision (optional): 'full' or 'semi'
    """

    if criterion is None:
        raise Exception("Missing criterion. You must specify a loss function.")

    net.to(device)

    save_epoch = epoch // 20 if epoch > 20 else 1


    losses = np.zeros(1000000)
    mean_losses = np.zeros(100000000)
    iter_ = 1
    loss_win, val_win = None, None
    val_accuracies = []

    for e in tqdm(range(1, epoch + 1), desc="Training the network"):
        # Set the network to training mode
        net.train()
        avg_loss = 0.
        
        train_loader = zip(labeled_data_loader, unlabeled_data_loader)

        # Run the training loop for one epoch
        for batch_idx, (data_x, data_u) in tqdm(enumerate(train_loader), total=len(labeled_data_loader)):
            # Load the data into the GPU if required
            inputs_x, targets_x = data_x
            inputs_u_w, inputs_u_s = data_u
            
            batch_size = inputs_x.shape[0]
            
            inputs = torch.cat((inputs_x, inputs_u_w, inputs_u_s)).to(device)
            targets_x = targets_x.to(device)
            logits = net(inputs)
            logits_x = logits[:batch_size]
            logits_u_w, logits_u_s = logits[batch_size:].chunk(2)
            del logits

            Lx = F.cross_entropy(logits_x, targets_x, reduction='mean')

            pseudo_label = torch.softmax(logits_u_w.detach_(), dim=-1)
            max_probs, targets_u = torch.max(pseudo_label, dim=-1)
            mask = max_probs.ge(threshold).float()

            Lu = (F.cross_entropy(logits_u_s, targets_u,
                              reduction='none') * mask).mean()

            loss = Lx + 1 * Lu
    
            
            optimizer.zero_grad()
            
            loss.backward()
            optimizer.step()

            avg_loss += loss.item()
            losses[iter_] = loss.item()
            mean_losses[iter_] = np.mean(losses[max(0, iter_ - 100):iter_ + 1])

            if display_iter and iter_ % display_iter == 0:
                string = 'Train (epoch {}/{}) [{}/{} ({:.0f}%)]\tLoss: {:.6f}'
                string = string.format(e, epoch, batch_idx * len(data_x), len(data_x) * len(labeled_data_loader),
                                       100. * batch_idx / len(labeled_data_loader), mean_losses[iter_])
                update = None if loss_win is None else 'append'
                loss_win = display.line(
                    X=np.arange(iter_ - display_iter, iter_),
                    Y=mean_losses[iter_ - display_iter:iter_],
                    win=loss_win,
                    update=update,
                    opts={'title': "Training loss",
                          'xlabel': "Iterations",
                          'ylabel': "Loss"
                         }
                )
                tqdm.write(string)

                if len(val_accuracies) > 0:
                    val_win = display.line(Y=np.array(val_accuracies),
                                           X=np.arange(len(val_accuracies)),
                                           win=val_win,
                                           opts={'title': "Validation accuracy",
                                                 'xlabel': "Epochs",
                                                 'ylabel': "Accuracy"
                                                })
            iter_ += 1
            del(data_x, data_u, loss)

        # Update the scheduler
        avg_loss /= len(labeled_data_loader)
        if val_loader is not None:
            val_acc = val(net, val_loader, device=device, supervision='full')
            val_accuracies.append(val_acc)
            metric = -val_acc
        else:
            metric = avg_loss

        if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step(metric)
        elif scheduler is not None:
            scheduler.step()

        # Save the weights
        if e % save_epoch == 0:
            save_model(net, utils.camel_to_snake(str(net.__class__.__name__)), 
                       labeled_data_loader.dataset.name, epoch=e, metric=abs(metric))


Test, validation and saving models should be the same

In [15]:
def save_model(model, model_name, dataset_name, **kwargs):
     model_dir = './checkpoints/' + model_name + "/" + dataset_name + "/"
     if not os.path.isdir(model_dir):
         os.makedirs(model_dir, exist_ok=True)
     if isinstance(model, torch.nn.Module):
         filename = str(datetime.datetime.now()) + "_epoch{epoch}_{metric:.2f}".format(**kwargs)
         tqdm.write("Saving neural network weights in {}".format(filename))
         torch.save(model.state_dict(), model_dir + filename + '.pth')
     else:
         filename = str(datetime.datetime.now())
         tqdm.write("Saving model params in {}".format(filename))
         joblib.dump(model, model_dir + filename + '.pkl')


def test(net, img, hyperparams):
    """
    Test a model on a specific image
    """
    net.eval()
    patch_size = hyperparams['patch_size']
    center_pixel = hyperparams['center_pixel']
    batch_size, device = hyperparams['batch_size'], hyperparams['device']
    n_classes = hyperparams['n_classes']

    kwargs = {'step': hyperparams['test_stride'], 'window_size': (patch_size, patch_size)}
    probs = np.zeros(img.shape[:2] + (n_classes,))

    iterations = utils.count_sliding_window(img, **kwargs) // batch_size
    for batch in tqdm(utils.grouper(batch_size, utils.sliding_window(img, **kwargs)),
                      total=(iterations), desc="Inference on the image"):
        with torch.no_grad():
            if patch_size == 1:
                data = [b[0][0, 0] for b in batch]
                data = np.copy(data)
                data = torch.from_numpy(data)
            else:
                data = [b[0] for b in batch]
                data = np.copy(data)
                data = data.transpose(0, 3, 1, 2)
                data = torch.from_numpy(data)
                data = data.unsqueeze(1)

            indices = [b[1:] for b in batch]
            data = data.to(device)
            output = net(data)
            if isinstance(output, tuple):
                output = output[0]
            output = output.to('cpu')

            if patch_size == 1 or center_pixel:
                output = output.numpy()
            else:
                output = np.transpose(output.numpy(), (0, 2, 3, 1))
            for (x, y, w, h), out in zip(indices, output):
                if center_pixel:
                    probs[x + w // 2, y + h // 2] += out
                else:
                    probs[x:x + w, y:y + h] += out
    return probs

def val(net, data_loader, device='cpu', supervision='full'):
    # TODO : fix me using metrics()
    accuracy, total = 0., 0.
    ignored_labels = data_loader.dataset.ignored_labels
    for batch_idx, (data, target) in enumerate(data_loader):
        with torch.no_grad():
            # Load the data into the GPU if required
            data, target = data.to(device), target.to(device)
            if supervision == 'full':
                output = net(data)
            elif supervision == 'semi':
                outs = net(data)
                output, rec = outs
            _, output = torch.max(output, dim=1)
            for out, pred in zip(output.view(-1), target.view(-1)):
                if out.item() in ignored_labels:
                    continue
                else:
                    accuracy += out.item() == pred.item()
                    total += 1
    return accuracy / total

Model

In [16]:
class HamidaEtAl(nn.Module):
    """
    3-D Deep Learning Approach for Remote Sensing Image Classification
    Amina Ben Hamida, Alexandre Benoit, Patrick Lambert, Chokri Ben Amar
    IEEE TGRS, 2018
    https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8344565
    """
    @staticmethod
    def weight_init(m):
        if isinstance(m, nn.Linear) or isinstance(m, nn.Conv3d):
            init.kaiming_normal_(m.weight)
            init.zeros_(m.bias)

    def __init__(self, input_channels, n_classes, patch_size=5, dilation=1):
        super(HamidaEtAl, self).__init__()
        # The first layer is a (3,3,3) kernel sized Conv characterized
        # by a stride equal to 1 and number of neurons equal to 20
        self.patch_size = patch_size
        self.input_channels = input_channels
        dilation = (dilation, 1, 1)

        if patch_size == 3:
            self.conv1 = nn.Conv3d(
                1, 20, (3, 3, 3), stride=(1, 1, 1), dilation=dilation, padding=1)
        else:
            self.conv1 = nn.Conv3d(
                1, 20, (3, 3, 3), stride=(1, 1, 1), dilation=dilation, padding=0)
        # Next pooling is applied using a layer identical to the previous one
        # with the difference of a 1D kernel size (1,1,3) and a larger stride
        # equal to 2 in order to reduce the spectral dimension
        self.pool1 = nn.Conv3d(
            20, 20, (3, 1, 1), dilation=dilation, stride=(2, 1, 1), padding=(1, 0, 0))
        # Then, a duplicate of the first and second layers is created with
        # 35 hidden neurons per layer.
        self.conv2 = nn.Conv3d(
            20, 35, (3, 3, 3), dilation=dilation, stride=(1, 1, 1), padding=(1, 0, 0))
        self.pool2 = nn.Conv3d(
            35, 35, (3, 1, 1), dilation=dilation, stride=(2, 1, 1), padding=(1, 0, 0))
        # Finally, the 1D spatial dimension is progressively reduced
        # thanks to the use of two Conv layers, 35 neurons each,
        # with respective kernel sizes of (1,1,3) and (1,1,2) and strides
        # respectively equal to (1,1,1) and (1,1,2)
        self.conv3 = nn.Conv3d(
            35, 35, (3, 1, 1), dilation=dilation, stride=(1, 1, 1), padding=(1, 0, 0))
        self.conv4 = nn.Conv3d(
            35, 35, (2, 1, 1), dilation=dilation, stride=(2, 1, 1), padding=(1, 0, 0))

        self.dropout = nn.Dropout(p=0.5)

        self.features_size = self._get_final_flattened_size()
        # The architecture ends with a fully connected layer where the number
        # of neurons is equal to the number of input classes.
        self.fc = nn.Linear(self.features_size, n_classes)

        self.apply(self.weight_init)

    def _get_final_flattened_size(self):
        with torch.no_grad():
            x = torch.zeros((1, 1, self.input_channels,
                             self.patch_size, self.patch_size))
            x = self.pool1(self.conv1(x))
            x = self.pool2(self.conv2(x))
            x = self.conv3(x)
            x = self.conv4(x)
            _, t, c, w, h = x.size()
        return t * c * w * h

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = x.view(-1, self.features_size)
        x = self.dropout(x)
        x = self.fc(x)
        return x

Define a cosine training scheme with warm-up. Shown to be good, taken from the same resource as above.

In [17]:
def get_cosine_schedule_with_warmup(optimizer,
                                    num_warmup_steps,
                                    num_training_steps,
                                    num_cycles=7./16.,
                                    last_epoch=-1):
    def _lr_lambda(current_step):
        if current_step < num_warmup_steps:
            return float(current_step) / float(max(1, num_warmup_steps))
        no_progress = float(current_step - num_warmup_steps) / \
            float(max(1, num_training_steps - num_warmup_steps))
        return max(0., math.cos(math.pi * num_cycles * no_progress))

    return LambdaLR(optimizer, _lr_lambda, last_epoch)

Initiate weights and hyperparams

In [6]:
device = torch.device('cpu')

hyperparams = {'patch_size' : 1, 'ignored_labels' : salinas_ignored_labels, 'flip_augmentation' : False, 
              'radiation_augmentation' : False, 'mixture_augmentation' : False, 'center_pixel' : True, 
              'supervision' : 'full', 'batch_size' : 100, 'epochs' : 100, 'dataset' : 'Salinas', 
              'n_classes' : N_CLASSES, 'test_stride' : 1, 'scheduler' : None, 'weights' : None,
              'device' : device, 'n_bands' : N_BANDS, 'warmup' : 0, 'threshold' : 0.95}

weights = torch.ones(N_CLASSES)
weights[torch.LongTensor(salinas_ignored_labels)] = 0
weights = weights.to(device)

In [7]:
weights

tensor([0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [19]:
hyperparams['patch_size'] = 5
hyperparams['center_pixel'] = True
hyperparams['epochs'] = 10
hyperparams['warmup'] = 1
hyperparams['batch_size'] = 64

hyperparams['flip_augmentation'] = True

model = HamidaEtAl(hyperparams['n_bands'], hyperparams['n_classes'], 
                   patch_size=hyperparams['patch_size'])
lr =  0.03
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, nesterov=True)
loss = nn.CrossEntropyLoss(weight=weights)

In [20]:
prev_train_gt = train_gt

train_gt, val_gt = utils.sample_gt(prev_train_gt, 0.95, mode=SAMPLING_MODE)
print("{} samples selected (over {})".format(np.count_nonzero(train_gt), np.count_nonzero(prev_train_gt)))

val_dataset = HyperX(salinas_img, val_gt, **hyperparams)
val_loader = data.DataLoader(val_dataset,
                             batch_size=hyperparams['batch_size'])

samples = np.count_nonzero(train_gt)
unlabeled_portion = 7

train_labeled_gt, train_unlabeled_gt = utils.sample_gt(train_gt, 1/(unlabeled_portion + 1), mode=SAMPLING_MODE)
print("{} samples selected (over {})".format(np.count_nonzero(train_labeled_gt), np.count_nonzero(train_gt)))
print("{} samples selected (over {})".format(np.count_nonzero(train_unlabeled_gt), np.count_nonzero(train_gt)))

train_labeled_dataset = HyperX(salinas_img, train_labeled_gt, **hyperparams)
train_labeled_loader = data.DataLoader(train_labeled_dataset, batch_size=hyperparams['batch_size'],
                               shuffle=True, drop_last=True)


train_unlabeled_dataset = HyperX_unlabeled(salinas_img, train_unlabeled_gt, **hyperparams)
train_unlabeled_loader = data.DataLoader(train_unlabeled_dataset, 
                                         batch_size=hyperparams['batch_size']*unlabeled_portion,
                                         shuffle=True, drop_last=True)


amount_labeled = samples//(unlabeled_portion + 1)

iterations = amount_labeled // hyperparams['batch_size']
total_steps = iterations * hyperparams['epochs']
hyperparams['scheduler'] = get_cosine_schedule_with_warmup(optimizer, 
                                                           hyperparams['warmup']*iterations, total_steps)

6235 samples selected (over 11870)
1609 samples selected (over 6235)
4626 samples selected (over 6235)


In [None]:
output = next(iter(train_labeled_loader))
out = model(output[0])
out_w, out_s = (out).chunk(2)
ps_lb = torch.softmax(out_w.detach_(), dim=-1)
probs, target = torch.max(ps_lb, dim=-1)

In [None]:
model.eval()
with torch.no_grad():
    out_eval = model(output[0])
    out_eval.numpy()

In [7]:
part_1, part_2 = utils.sample_gt(salinas_gt, 0.1, mode='better_disjoint')
utils.display_predictions(convert_to_color(part_1), vis, caption="Part 1 ground truth")
utils.display_predictions(convert_to_color(part_2), vis, caption="Part 2 ground truth")
part_3, part_4 = utils.sample_gt(part_1, 0.1, mode='better_disjoint')
utils.display_predictions(convert_to_color(part_3), vis, caption="Part 3 ground truth")
utils.display_predictions(convert_to_color(part_4), vis, caption="Part 4 ground truth")

In [None]:
np.unique(part_4)

In [None]:
utils.display_predictions(convert_to_color(train_labeled_gt), vis, caption="Labeled train ground truth")
utils.display_predictions(convert_to_color(train_unlabeled_gt), vis, caption="Unlabeled train ground truth")
utils.display_predictions(convert_to_color(val_gt), vis, caption="Validation ground truth")

In [8]:
CLASS_BALANCING = True

if CLASS_BALANCING:
    weights_balance = utils.compute_imf_weights(train_gt, hyperparams['n_classes'], salinas_ignored_labels)
    hyperparams['weights'] = torch.from_numpy(weights_balance)

In [9]:
hyperparams['weights']

tensor([0.0000, 1.1592, 0.6362, 1.1571, 1.7005, 0.8805, 0.5990, 0.6537, 0.2072,
        0.3814, 0.7082, 2.1271, 1.2069, 2.4884, 2.1700, 0.3205, 1.2588],
       dtype=torch.float64)

In [22]:
CHECKPOINT = None #checkpoint to load weights from, string from where to load model

print(hyperparams)
print("Network :")
with torch.no_grad():
    for input, _ in train_labeled_loader:
        break
    summary(model.to(hyperparams['device']), input.size()[1:])
    # We would like to use device=hyperparams['device'] altough we have
    # to wait for torchsummary to be fixed first.

if CHECKPOINT is not None:
    model.load_state_dict(torch.load(CHECKPOINT))

try:
    train(model, optimizer, loss, train_labeled_loader, train_unlabeled_loader, hyperparams['epochs'], 
          scheduler=hyperparams['scheduler'], device=hyperparams['device'], threshold=hyperparams['threshold'],
          val_loader=val_loader, display=vis)
except KeyboardInterrupt:
    # Allow the user to stop the training
    pass

probabilities = test(model, salinas_img, hyperparams)
prediction = np.argmax(probabilities, axis=-1)

Training the network:   0%|          | 0/10 [00:00<?, ?it/s]
  0%|          | 0/22 [00:00<?, ?it/s][A

{'patch_size': 5, 'ignored_labels': [0], 'flip_augmentation': True, 'radiation_augmentation': False, 'mixture_augmentation': False, 'center_pixel': True, 'supervision': 'full', 'batch_size': 64, 'epochs': 10, 'dataset': 'Salinas', 'n_classes': 17, 'test_stride': 1, 'scheduler': <torch.optim.lr_scheduler.LambdaLR object at 0x7faa4fa47a10>, 'weights': tensor([0.0000, 0.7640, 0.8997, 0.8341, 2.4965, 1.3377, 0.8501, 0.9530, 0.3093,
        0.5086, 1.0519, 1.5413, 1.7126, 3.6927, 3.2824, 0.4758, 1.8658],
       dtype=torch.float64), 'device': device(type='cpu'), 'n_bands': 204, 'warmup': 1, 'threshold': 0.95}
Network :
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv3d-1        [-1, 20, 202, 3, 3]             560
            Conv3d-2        [-1, 20, 101, 3, 3]           1,220
            Conv3d-3        [-1, 35, 101, 1, 1]          18,935
            Conv3d-4         [-1, 35, 51, 1, 1]          


  5%|▍         | 1/22 [00:10<03:43, 10.65s/it][A
  9%|▉         | 2/22 [00:20<03:30, 10.53s/it][A
 14%|█▎        | 3/22 [00:30<03:16, 10.36s/it][A
 18%|█▊        | 4/22 [00:40<03:02, 10.12s/it][A
 23%|██▎       | 5/22 [00:51<02:55, 10.33s/it][A
 27%|██▋       | 6/22 [01:00<02:41, 10.06s/it][A
 32%|███▏      | 7/22 [01:10<02:30, 10.00s/it][A
 36%|███▋      | 8/22 [01:20<02:20, 10.02s/it][A
 41%|████      | 9/22 [01:30<02:09,  9.93s/it][A
 45%|████▌     | 10/22 [01:40<02:00, 10.01s/it][A
Training the network:  10%|█         | 1/10 [01:47<16:09, 107.68s/it]
  0%|          | 0/22 [00:00<?, ?it/s][A

Saving neural network weights in 2020-06-03 11:32:09.692558_epoch1_0.07



  5%|▍         | 1/22 [00:10<03:37, 10.38s/it][A
  9%|▉         | 2/22 [00:20<03:27, 10.39s/it][A
 14%|█▎        | 3/22 [00:31<03:16, 10.35s/it][A
 18%|█▊        | 4/22 [00:41<03:06, 10.38s/it][A
 23%|██▎       | 5/22 [00:52<02:57, 10.46s/it][A
 27%|██▋       | 6/22 [01:01<02:43, 10.24s/it][A
 32%|███▏      | 7/22 [01:11<02:32, 10.16s/it][A
 36%|███▋      | 8/22 [01:21<02:18,  9.92s/it][A
 41%|████      | 9/22 [01:30<02:07,  9.79s/it][A
 45%|████▌     | 10/22 [01:40<02:00, 10.08s/it][A
Training the network:  20%|██        | 2/10 [03:36<14:23, 107.95s/it]
  0%|          | 0/22 [00:00<?, ?it/s][A

Saving neural network weights in 2020-06-03 11:33:58.277503_epoch2_0.09



  5%|▍         | 1/22 [00:09<03:29,  9.96s/it][A
  9%|▉         | 2/22 [00:20<03:22, 10.11s/it][A
 14%|█▎        | 3/22 [00:30<03:12, 10.12s/it][A
 18%|█▊        | 4/22 [00:40<03:00, 10.04s/it][A
 23%|██▎       | 5/22 [00:50<02:48,  9.91s/it][A
 27%|██▋       | 6/22 [00:59<02:37,  9.85s/it][A
 32%|███▏      | 7/22 [01:09<02:26,  9.77s/it][A
 36%|███▋      | 8/22 [01:19<02:17,  9.81s/it][A
 41%|████      | 9/22 [01:29<02:08,  9.91s/it][A
 45%|████▌     | 10/22 [01:39<01:59,  9.97s/it][A
Training the network:  30%|███       | 3/10 [05:24<12:35, 107.89s/it]
  0%|          | 0/22 [00:00<?, ?it/s][A

Saving neural network weights in 2020-06-03 11:35:46.016682_epoch3_0.15



  5%|▍         | 1/22 [00:09<03:22,  9.66s/it][A
  9%|▉         | 2/22 [00:19<03:16,  9.82s/it][A
 14%|█▎        | 3/22 [00:30<03:08,  9.92s/it][A
 18%|█▊        | 4/22 [00:40<03:01, 10.08s/it][A
 23%|██▎       | 5/22 [00:50<02:52, 10.13s/it][A
 27%|██▋       | 6/22 [01:00<02:42, 10.13s/it][A
 32%|███▏      | 7/22 [01:11<02:32, 10.19s/it][A
 36%|███▋      | 8/22 [01:21<02:21, 10.14s/it][A
 41%|████      | 9/22 [01:30<02:08,  9.90s/it][A
 45%|████▌     | 10/22 [01:39<01:59,  9.98s/it][A
Training the network:  40%|████      | 4/10 [07:11<10:46, 107.71s/it]
  0%|          | 0/22 [00:00<?, ?it/s][A

Saving neural network weights in 2020-06-03 11:37:33.301173_epoch4_0.16



  5%|▍         | 1/22 [00:09<03:22,  9.64s/it][A
  9%|▉         | 2/22 [00:19<03:14,  9.70s/it][A
 14%|█▎        | 3/22 [00:29<03:03,  9.65s/it][A
 18%|█▊        | 4/22 [00:38<02:51,  9.55s/it][A
 23%|██▎       | 5/22 [00:47<02:41,  9.51s/it][A
 27%|██▋       | 6/22 [00:57<02:32,  9.53s/it][A
 32%|███▏      | 7/22 [01:06<02:22,  9.48s/it][A
 36%|███▋      | 8/22 [01:16<02:13,  9.50s/it][A
 41%|████      | 9/22 [01:26<02:05,  9.62s/it][A
 45%|████▌     | 10/22 [01:35<01:55,  9.59s/it][A
Training the network:  50%|█████     | 5/10 [08:54<08:52, 106.45s/it]
  0%|          | 0/22 [00:00<?, ?it/s][A

Saving neural network weights in 2020-06-03 11:39:16.811619_epoch5_0.27



  5%|▍         | 1/22 [00:09<03:18,  9.45s/it][A
  9%|▉         | 2/22 [00:18<03:08,  9.43s/it][A
 14%|█▎        | 3/22 [00:28<02:59,  9.44s/it][A
 18%|█▊        | 4/22 [00:38<02:52,  9.60s/it][A
 23%|██▎       | 5/22 [00:47<02:42,  9.55s/it][A
 27%|██▋       | 6/22 [00:57<02:31,  9.49s/it][A
 32%|███▏      | 7/22 [01:06<02:23,  9.60s/it][A
 36%|███▋      | 8/22 [01:16<02:13,  9.54s/it][A
 41%|████      | 9/22 [01:26<02:05,  9.65s/it][A
 45%|████▌     | 10/22 [01:36<01:55,  9.62s/it][A
Training the network:  60%|██████    | 6/10 [10:39<07:03, 105.80s/it]
  0%|          | 0/22 [00:00<?, ?it/s][A

Saving neural network weights in 2020-06-03 11:41:01.089682_epoch6_0.29



  5%|▍         | 1/22 [00:09<03:24,  9.72s/it][A
  9%|▉         | 2/22 [00:19<03:13,  9.68s/it][A
 14%|█▎        | 3/22 [00:28<03:02,  9.61s/it][A
 18%|█▊        | 4/22 [00:38<02:53,  9.66s/it][A
 23%|██▎       | 5/22 [00:48<02:45,  9.71s/it][A
 27%|██▋       | 6/22 [00:58<02:36,  9.78s/it][A
 32%|███▏      | 7/22 [01:08<02:27,  9.82s/it][A
 36%|███▋      | 8/22 [01:17<02:16,  9.76s/it][A
 41%|████      | 9/22 [01:27<02:07,  9.78s/it][A
 45%|████▌     | 10/22 [01:37<01:56,  9.73s/it][A
Training the network:  70%|███████   | 7/10 [12:25<05:17, 105.91s/it]
  0%|          | 0/22 [00:00<?, ?it/s][A

Saving neural network weights in 2020-06-03 11:42:47.253482_epoch7_0.11



  5%|▍         | 1/22 [00:09<03:23,  9.71s/it][A
  9%|▉         | 2/22 [00:19<03:14,  9.71s/it][A
 14%|█▎        | 3/22 [00:28<03:03,  9.65s/it][A
 18%|█▊        | 4/22 [00:38<02:53,  9.64s/it][A
 23%|██▎       | 5/22 [00:48<02:47,  9.85s/it][A
 27%|██▋       | 6/22 [00:59<02:39,  9.99s/it][A
 32%|███▏      | 7/22 [01:09<02:32, 10.14s/it][A
 36%|███▋      | 8/22 [01:19<02:21, 10.08s/it][A
 41%|████      | 9/22 [01:29<02:08,  9.91s/it][A
 45%|████▌     | 10/22 [01:38<01:58,  9.89s/it][A
Training the network:  80%|████████  | 8/10 [14:12<03:32, 106.19s/it]
  0%|          | 0/22 [00:00<?, ?it/s][A

Saving neural network weights in 2020-06-03 11:44:34.113611_epoch8_0.37



  5%|▍         | 1/22 [00:11<03:59, 11.39s/it][A
  9%|▉         | 2/22 [00:23<03:54, 11.71s/it][A
 14%|█▎        | 3/22 [00:34<03:35, 11.33s/it][A
 18%|█▊        | 4/22 [00:44<03:19, 11.06s/it][A
 23%|██▎       | 5/22 [00:54<03:02, 10.73s/it][A
 27%|██▋       | 6/22 [01:04<02:46, 10.41s/it][A
 32%|███▏      | 7/22 [01:15<02:38, 10.54s/it][A
 36%|███▋      | 8/22 [01:26<02:29, 10.68s/it][A
 41%|████      | 9/22 [01:35<02:14, 10.31s/it][A
 45%|████▌     | 10/22 [01:45<02:06, 10.51s/it][A
Training the network:  90%|█████████ | 9/10 [16:04<01:48, 108.09s/it]
  0%|          | 0/22 [00:00<?, ?it/s][A

Saving neural network weights in 2020-06-03 11:46:26.634661_epoch9_0.52



  5%|▍         | 1/22 [00:09<03:24,  9.72s/it][A
  9%|▉         | 2/22 [00:19<03:14,  9.72s/it][A
 14%|█▎        | 3/22 [00:28<03:03,  9.64s/it][A
 18%|█▊        | 4/22 [00:38<02:54,  9.71s/it][A
 23%|██▎       | 5/22 [00:48<02:47,  9.83s/it][A
 27%|██▋       | 6/22 [00:58<02:37,  9.85s/it][A
 32%|███▏      | 7/22 [01:09<02:31, 10.10s/it][A
 36%|███▋      | 8/22 [01:19<02:19,  9.96s/it][A
                                                                     
Training the network:  90%|█████████ | 9/10 [17:44<01:48, 108.09s/it]
 41%|████      | 9/22 [01:39<02:11, 10.14s/it][A
 45%|████▌     | 10/22 [01:39<01:59,  9.95s/it][A




Training the network: 100%|██████████| 10/10 [17:51<00:00, 107.18s/it]
Inference on the image:   0%|          | 0/1679 [00:00<?, ?it/s]

Saving neural network weights in 2020-06-03 11:48:13.773102_epoch10_0.10


Inference on the image: 1680it [02:27, 11.41it/s]                          


In [23]:
run_results = utils.metrics(prediction, test_gt, ignored_labels=hyperparams['ignored_labels'], n_classes=hyperparams['n_classes'])

mask = np.zeros(salinas_gt.shape, dtype='bool')
for l in hyperparams['ignored_labels']:
    mask[salinas_gt == l] = True
prediction[mask] = 0

color_prediction = convert_to_color(prediction)
utils.display_predictions(color_prediction, vis, gt=convert_to_color(test_gt), caption="Prediction vs. test ground truth")

utils.show_results(run_results, vis, label_values=salinas_label_values)

Confusion matrix :
[[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0]
 [  64    0    0    0    0    0 1481    0    0    0    0    0    0    0
     0    0    0]
 [   0    0    0    0    0    0 2920    0    0    0    0    0    0    0
     0    0    0]
 [   0    0    0    0    0 1347  204    0    0    0    0    0    0    0
     0    0    0]
 [   0    0    0    0    0  894  195    0    0    0    0    0    0    0
     0    0    0]
 [   0    0    0    0    0   13 2093    0    0    0    0    0    0    0
     0    0    0]
 [   0    0    0    0    0    0 3103    0    0    0    0    0    0    0
     0    0    0]
 [   0    0    0    0    0    0 2799    0    0    0    0    0    0    0
     0    0    0]
 [ 114    0    0    0    0    3 8677    0    0    0    0    0    0    0
     0    0    0]
 [   0    0    0    0    0 4791   24    0    0    0    0    0    0    0
     0    0    0]
 [  57    0    0    0    0 1268 1224    0    0    0    0    0    0    0
     0   

## MixUp Data Augmentation

Define functions for data augmentation for mixup according to implementation at https://github.com/facebookresearch/mixup-cifar10/blob/master/train.py

In [None]:
def mixup_data(x, y, alpha=1.0, use_cuda=False):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)


Define training function that implements mixup augmentation