In [10]:
import numpy as np
import sklearn.model_selection
import utils
import visdom
from datasets import get_dataset, HyperX
import seaborn as sns

In [18]:
vis = visdom.Visdom()
salinas_img, salinas_gt, salinas_label_values, salinas_ignored_labels, salinas_rgb_bands, salinas_palette = get_dataset("Salinas")
N_CLASSES = len(salinas_label_values)
N_BANDS = salinas_img.shape[-1]

if salinas_palette is None:
    # Generate color palette
    salinas_palette = {0: (0, 0, 0)}
    for k, color in enumerate(sns.color_palette("hls", len(salinas_label_values) - 1)):
        salinas_palette[k + 1] = tuple(np.asarray(255 * np.array(color), dtype='uint8'))
invert_palette = {v: k for k, v in salinas_palette.items()}

def convert_to_color(x):
    return utils.convert_to_color_(x, palette=salinas_palette)
def convert_from_color(x):
    return utils.convert_from_color_(x, palette=invert_palette)

Setting up a new session...


In [20]:
def sample_gt(gt, train_size, mode='random'):
    """Extract a fixed percentage of samples from an array of labels.
    Args:
        gt: a 2D array of int labels
        percentage: [0, 1] float
    Returns:
        train_gt, test_gt: 2D arrays of int labels
    """
    indices = np.nonzero(gt)
    X = list(zip(*indices)) # x,y features
    y = gt[indices].ravel() # classes
    train_gt = np.zeros_like(gt)
    test_gt = np.zeros_like(gt)
    if train_size > 1:
       train_size = int(train_size)

    if mode == 'random':
       train_indices, test_indices = sklearn.model_selection.train_test_split(X, train_size=train_size, stratify=y)
       train_indices = [list(t) for t in zip(*train_indices)]
       test_indices = [list(t) for t in zip(*test_indices)]
       train_gt[train_indices] = gt[train_indices]
       test_gt[test_indices] = gt[test_indices]
    elif mode == 'fixed':
       print("Sampling {} with train size = {}".format(mode, train_size))
       train_indices, test_indices = [], []
       for c in np.unique(gt):
           if c == 0:
              continue
           indices = np.nonzero(gt == c)
           X = list(zip(*indices)) # x,y features

           train, test = sklearn.model_selection.train_test_split(X, train_size=train_size)
           train_indices += train
           test_indices += test
       train_indices = [list(t) for t in zip(*train_indices)]
       test_indices = [list(t) for t in zip(*test_indices)]
       train_gt[train_indices] = gt[train_indices]
       test_gt[test_indices] = gt[test_indices]

    elif mode == 'disjoint':
        train_gt = np.copy(gt)
        test_gt = np.copy(gt)
        for c in np.unique(gt):
            mask = gt == c
            for x in range(gt.shape[0]):
                first_half_count = np.count_nonzero(mask[:x, :])
                second_half_count = np.count_nonzero(mask[x:, :])
                try:
                    ratio = first_half_count / second_half_count
                    if ratio > 0.9 * train_size and ratio < 1.1 * train_size:
                        break
                except ZeroDivisionError:
                    continue
            mask[:x, :] = 0
            train_gt[mask] = 0

        test_gt[train_gt > 0] = 0
    elif mode == 'better_disjoint':
        train_gt = np.copy(gt)
        test_gt = np.copy(gt)
        if train_size > 0.5:
            ratios = np.zeros(gt.shape[0])
        else:
            ratios = np.ones(gt.shape[0])
        for c in np.unique(gt):
            mask = gt == c
            for x in range(gt.shape[0]):
                first_half_count = np.count_nonzero(mask[:x, :])
                second_half_count = np.count_nonzero(mask[x:, :])
                try:
                    ratio = first_half_count / second_half_count
                    ratios[x] = ratio
                except ZeroDivisionError:
                    continue
            idx = (np.abs(ratios - train_size)).argmin()
            mask[:idx, :] = 0
            train_gt[mask] = 0
            print(idx)

        test_gt[train_gt > 0] = 0
        print(ratios*100)
    else:
        raise ValueError("{} sampling is not implemented yet.".format(mode))
    return train_gt, test_gt

In [28]:
gt = salinas_gt
train_size = 0.1
indices = np.nonzero(gt)
X = list(zip(*indices)) # x,y features
y = gt[indices].ravel() # classes
train_gt = np.zeros_like(gt)
test_gt = np.zeros_like(gt)
if train_size > 1:
    train_size = int(train_size)
train_gt = np.copy(gt)
test_gt = np.copy(gt)
if train_size > 0.5:
    ratios = np.zeros(gt.shape[0])
else:
    ratios = np.ones(gt.shape[0])

In [26]:
for c in np.unique(gt):
    mask = gt == c
    for x in range(gt.shape[0]):
        first_half_count = np.count_nonzero(mask[:x, :])
        second_half_count = np.count_nonzero(mask[x:, :])
        try:
            ratio = first_half_count / second_half_count
            ratios[x] = ratio
        except ZeroDivisionError:
            continue
    idx = (np.abs(ratios - train_size)).argmin()
    mask[:idx, :] = 0
    train_gt[mask] = 0
    print(idx)

    test_gt[train_gt > 0] = 0
    print(ratios*100)

61
[0.00000000e+00 3.82324959e-01 6.76774102e-01 9.55064143e-01
 1.22950092e+00 1.46927872e+00 1.70837945e+00 1.94131329e+00
 2.17165196e+00 2.39751263e+00 2.62067723e+00 2.83739148e+00
 3.04570364e+00 3.24924794e+00 3.44984113e+00 3.65121525e+00
 3.84958898e+00 4.05252392e+00 4.24862313e+00 4.44163367e+00
 4.62959562e+00 4.81245056e+00 4.99594575e+00 5.17425977e+00
 5.35123241e+00 5.51902954e+00 5.68540160e+00 5.83853470e+00
 5.98816876e+00 6.13031816e+00 6.27483166e+00 6.41576391e+00
 6.56105635e+00 6.70274927e+00 6.85083080e+00 6.99530516e+00
 7.13815604e+00 7.27734890e+00 7.41487878e+00 7.55479206e+00
 7.69099913e+00 7.82347041e+00 7.95013168e+00 8.07094082e+00
 8.18380328e+00 8.29484328e+00 8.40404886e+00 8.51347491e+00
 8.62105123e+00 8.72884105e+00 8.83476600e+00 8.94089753e+00
 9.04514919e+00 9.14960057e+00 9.25425224e+00 9.35700576e+00
 9.45995274e+00 9.55888009e+00 9.66009701e+00 9.75727220e+00
 9.86097453e+00 9.96699543e+00 1.00796012e+01 1.01924379e+01
 1.03140489e+01 1.043

310
[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00

In [29]:
mask = gt == 1

In [27]:
part_1, part_2 = sample_gt(salinas_gt, 0.1, mode='better_disjoint')
utils.display_predictions(convert_to_color(part_1), vis, caption="Part 1 ground truth")
utils.display_predictions(convert_to_color(part_2), vis, caption="Part 2 ground truth")


61
238
195
158
15
17
13
14
120
395
310
310
265
395
395
395
460
[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.0000

In [22]:

part_3, part_4 = sample_gt(part_1, 0.1, mode='better_disjoint')
utils.display_predictions(convert_to_color(part_3), vis, caption="Part 3 ground truth")
utils.display_predictions(convert_to_color(part_4), vis, caption="Part 4 ground truth")

61
238
195
158
15
17
13
14
120
395
310
310
265
395
395
395
460
[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.0000