# Build Human Created Dataset

In [None]:
import pygame
import time
import torch
import random
import cv2
import numpy as np
from data.swarmset import ContinuingDataset, SwarmDataset
from networks.embedding import NoveltyEmbedding
from networks.archive import DataAggregationArchive
from scipy import ndimage

def resizeInput(X, w=200):
    frame = X.astype(np.uint8)
    resized = cv2.resize(frame, dsize=(w, w), interpolation=cv2.INTER_AREA)
    return resized

def translate(img, offset=(10, 10)):
    h, w = img.shape
    xoff, yoff = offset
    if xoff < 0: xpadding = (0, -xoff)
    else: xpadding = (xoff, 0)
    if yoff < 0: ypadding = (0, -yoff)
    else: ypadding = (yoff, 0)
    img = np.pad(img, (xpadding, ypadding))

    if xoff >= 0 and yoff >= 0:
        return img[:w, :w]
    elif xoff < 0 and yoff >= 0:
        return img[-w:, :w]
    elif xoff >= 0 and yoff < 0:
        return img[:w, -w:]
    return img[-w:, -w:]

def zoom_at(img, zoom, coord=None):
    # Adapted from https://stackoverflow.com/questions/69050464/zoom-into-image-with-opencv
    h, w = [ zoom * i for i in img.shape ]
    if coord is None: cx, cy = w/2, h/2
    else: cx, cy = [ zoom*c for c in coord ]
    img = cv2.resize( img, (0, 0), fx=zoom, fy=zoom)
    img = img[ int(round(cy - h/zoom * .5)) : int(round(cy + h/zoom * .5)),
               int(round(cx - w/zoom * .5)) : int(round(cx + w/zoom * .5))]
    return img

def getRotation(image, index=None):
    angles = [30 * i for i in range(1, 12)]
    if index:
        theta = angles[index]
    else:
        theta = random.choice(angles)
    rot = ndimage.rotate(image, theta)
    return resizeInput(rot, 50)

def getBlur(image, index=None):
    blurs = [0.5, 1.0, 1.5]
    if index:
        blur = blurs[index]
    else:
        blur = random.choice(blurs)
    return ndimage.gaussian_filter(image, sigma=blur)

def getZoomOut(image, index=None):
    paddings = [10, 20, 30]
    if index:
        padding = paddings[index]
    else:
        padding = random.choice(paddings)
    padded = np.pad(image, padding, mode='constant')
    return resizeInput(padded, 50)

def getRandomTransformation(image):
    transformation_choices = ["Rotation", "Blur", "Zoom", "Translate"]
    weights = [0.4, 0.3, 0.0, 0.3]
    # weights = [1.0, 0.0, 0.0, 0.0]
    choice = random.choices(transformation_choices, weights, k=1)[0]
    if choice == "Rotation":
        theta = random.choice([30 * i for i in range(1, 12)])
        rot = ndimage.rotate(image, theta)
        rot = resizeInput(rot, 50)
        return rot
    elif choice == "Blur":
        blur = random.choice([0.5, 1.0, 1.5])
        return ndimage.gaussian_filter(image, sigma=blur)
    elif choice == "Zoom":
        # zoom = random.choice([1.06, 1.12, 1.18])
        padding = random.choice([10])
        padded = np.pad(image, padding, mode='constant')
        return resizeInput(padded, 50)
    elif choice == "Translate":
        # offsets = [i for i in range(-10, 10, 2)]
        # offset = (random.choice(offsets), random.choice(offsets))
        offset = (2, 2)
        return translate(image, offset)

def rand_shape(scr, index=None):
    scr.fill((0, 0, 0))
    AGENTS = 24
    choices = ["Cir", "Tri", "Sqr", "Ellipse", "Random-Dots", "Random-Arcs", "Dispersal", "Aggregation"]
    classes = [0, 1, 2, 3, 4, 4, 5, 6]
    if index is not None:
        choice = choices[index]
    else:
        choice = random.choice(choices)

    if choice == "Cir":
        r = 100
        pygame.draw.circle(screen, (255, 255, 255), (200, 200), r, 3)
    elif choice == "Tri":
        pygame.draw.polygon(screen, (255, 255, 255), [[250, 20], [20, 480], [480, 480]], 3)
    elif choice == "Sqr":
        rect = pygame.Rect(100, 100, 300, 300)
        pygame.draw.rect(scr, (255, 255, 255), rect, 3)
    elif choice == "Ellipse":
        rect = pygame.Rect(100, 150, 300, 200)
        pygame.draw.ellipse(scr, (255, 255, 255), rect, 3)
    elif choice == "Random-Dots":
        for i in range(AGENTS):
            x, y = random.randint(10, 490), random.randint(10, 490)
            r = 10
            pygame.draw.circle(screen, (255, 255, 255), [x, y], r, 0)
    elif choice == "Random-Arcs":
        for i in range(AGENTS):
            x, y = random.randint(10, 490), random.randint(10, 490)
            rect = pygame.Rect(x, y, 75, 75)
            start_angle = random.random() * np.pi * 2
            end_angle = start_angle + (2 * np.pi / 3)
            pygame.draw.arc(screen, (255, 255, 255), rect, start_angle, end_angle, 3)
    elif choice == "Dispersal":
        for i in range(AGENTS):
            x, y = random.choice([(random.choice([15, 485]), random.randint(15, 485)), (random.randint(15, 485), random.choice([15, 485]))])
            r = 17
            pygame.draw.circle(screen, (255, 255, 255), [x, y], r, 3)
    elif choice == "Aggregation":
        for i in range(AGENTS):
            x, y = (random.randint(175, 225), random.randint(175, 225))
            r = 25
            pygame.draw.circle(screen, (255, 255, 255), [x, y], r, 3)
    else:
        raise Exception("Bad Shape!")

    pygame.display.flip()
    out = pygame.surfarray.array2d(scr)
    _class = classes[choices.index(choice)]
    return resizeInput(out, 50), _class

SAMPLES = 8
NUM_CREATIONS = 8
pygame.init()
pygame.display.set_caption("Evolutionary Novelty Search")
screen = pygame.display.set_mode((500, 500))

dataset = SwarmDataset("data/human-mini")

import time

#Generate "Of interest" First
for i in range(SAMPLES):
    if i == 4 or i == 5: continue
    original, shape = rand_shape(screen, index=i)
    dataset.new_sample(original, [shape], [shape])
    for j in range(11):
        newA, _ = rand_shape(screen, index=i)
        rot = getRotation(newA, index=j)
    for j in range(3):
        newB, _ = rand_shape(screen, index=i)
        newC, _ = rand_shape(screen, index=i)
        blurred = getBlur(newB, index=j)
        z_out = getZoomOut(newC, index=j)
        dataset.new_sample(original, [shape], [shape])
        dataset.new_sample(blurred, [shape], [shape])
        dataset.new_sample(z_out, [shape], [shape])
    for j in range(-8, 9, 4):
        for k in range(-8, 9, 4):
            if j == 0 and k == 0: continue
            new, _ = rand_shape(screen, index=i)
            translation = translate(new, offset=(j, k))
            dataset.new_sample(translation, [shape], [shape])

    print(f"{i} - {(i*100) / SAMPLES}% Complete")

# Generate "Out of Interest" Next
for i in range(SAMPLES):
    original, shape = rand_shape(screen, index=(4 if i % 2 == 0 else 5))
    # original, shape = rand_shape(screen, index=5)
    dataset.new_sample(original, [shape], [shape])
    for j in range(11):
        newA, _ = rand_shape(screen, index=i)
        rot = getRotation(newA, index=j)
    for j in range(3):
        newB, _ = rand_shape(screen, index=i)
        newC, _ = rand_shape(screen, index=i)
        blurred = getBlur(newB, index=j)
        z_out = getZoomOut(newC, index=j)
        dataset.new_sample(original, [shape], [shape])
        dataset.new_sample(blurred, [shape], [shape])
        dataset.new_sample(z_out, [shape], [shape])
    for j in range(-8, 9, 4):
        for k in range(-8, 9, 4):
            if j == 0 and k == 0: continue
            new, _ = rand_shape(screen, index=i)
            translation = translate(new, offset=(j, k))
            dataset.new_sample(translation, [shape], [shape])

    print(f"{i} - {(i*100) / SAMPLES}% Complete")

# Pretraining

In [None]:
# Save Model
import torch
import time
import random
from data.swarmset import ContinuingDataset, SwarmDataset
from networks.embedding import NoveltyEmbedding
from networks.archive import DataAggregationArchive
from networks.ensemble import Ensemble
import numpy as np
from scipy import ndimage
import cv2

PRETRAINING = True
target = 0.0005
loss = 50
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ensemble = Ensemble(size=3, output_size=5, lr_series=[30e-4, 30e-4, 30e-4], learning_decay=0.9, decay_step=4, threshold=9.0, weight_decay=1e-4, new_model=True)
ensemble.load_ensemble("human-mini-E")
sampled_dataset = SwarmDataset("data/human-mini", rank=0)

class_distinction = {i:[] for i in range(7)}
for i in range(len(sampled_dataset)):
    _class = sampled_dataset[i][1][0]
    class_distinction[_class].append(i)

triplets = []

RAND_CLASS = 4
for j in range(len(class_distinction[RAND_CLASS])):
    anchor = class_distinction[RAND_CLASS][j]
    for k in range(3): # Select 3 positive examples and move on
        k = random.randint(j, len(class_distinction[RAND_CLASS]) - 1)
        pos = class_distinction[RAND_CLASS][k]
        for l in range(len(class_distinction)):
            if l == j: continue
            if not class_distinction[l]: continue
            for m in range(5): # Select 3 negative examples and move on
                m = random.randint(0, len(class_distinction[l]) - 1)
                neg = class_distinction[l][m]
                if not [anchor, pos, neg] in triplets:
                    triplets.append([anchor, pos, neg])

for i in class_distinction:
    if i == RAND_CLASS: continue
    for j in range(len(class_distinction[i])):
        anchor = class_distinction[i][j]
        for k in range(3): # Select 3 positive examples and move on
            k = random.randint(j, len(class_distinction[i]) - 1)
            pos = class_distinction[i][k]
            for l in range(i + 1, len(class_distinction)):
                if not class_distinction[l]: continue
                for m in range(3): # Select 3 negative examples and move on
                    m = random.randint(0, len(class_distinction[l]) - 1)
                    neg = class_distinction[l][m]
                    if not [anchor, pos, neg] in triplets:
                        triplets.append([anchor, pos, neg])

print(len(triplets))
random.shuffle(triplets)
triplets = triplets[:2000]

def pretraining(data, ensemble, data_cutoff=None, data_size=500):
    total_loss = np.array([0.0 for i in range(len(ensemble.ensemble))])
    total_updates = 0
    BATCH_SIZE = 3

    random.shuffle(triplets)
    pull_set = [k for k in range(min(len(triplets), data_size))]
    random.shuffle(pull_set)
    for index in range(0, len(pull_set), BATCH_SIZE):
        i = pull_set[index]
        if total_updates % 20 == 0:
            print(f"Unsupervised Training.. {(total_updates * BATCH_SIZE * 100) / data_size}")

        AUGMENT_SIZE = 1
        if i + (BATCH_SIZE * AUGMENT_SIZE) >= len(pull_set):
            continue

        temp_losses = np.array([0.0 for _ in ensemble.ensemble])

        anchors = np.array([data[triplets[i + (j % AUGMENT_SIZE)][0]][0] for j in range(AUGMENT_SIZE * BATCH_SIZE)])
        positives = np.array([data[triplets[i + (j % AUGMENT_SIZE)][1]][0] for j in range(AUGMENT_SIZE * BATCH_SIZE)])
        negatives = np.array([data[triplets[i + (j % AUGMENT_SIZE)][2]][0] for j in range(AUGMENT_SIZE * BATCH_SIZE)])

        anchors = np.expand_dims(anchors, axis=1)
        positives = np.expand_dims(positives, axis=1)
        negatives = np.expand_dims(negatives, axis=1)

        losses = ensemble.train_batch(anchors, positives, negatives)
        temp_losses += losses

        total_loss += temp_losses
        total_updates += 1

    return total_loss, total_updates

t_1 = time.time()
if PRETRAINING:
    epochs = 0
    loss_history = []
    while loss > target:
        losses, total_updates = pretraining(sampled_dataset, ensemble, data_cutoff=None, data_size=1000)
        average_loss = losses / total_updates
        lr = ensemble.evaluate_lr(average_loss)
        locale_loss = sum(average_loss) / len(average_loss)
        loss_history.append(locale_loss)
        loss = (sum(loss_history[-3:]) / 3) if len(loss_history) > 3 else 50
        print(f"LR: {lr}")
        print(f"Losses: {average_loss}")
        print(f"Epoch {epochs}, loss: {locale_loss}, windowed_loss: {loss}")
        epochs += 1

print(f"Total Pre-training Time: {time.time() - t_1}")
ensemble.save_ensemble(f"{int(time.time())}")

In [None]:
ensemble.save_ensemble(f"{int(time.time())}")

# Supervised Learning

In [None]:
import torch
import time
from data.swarmset import ContinuingDataset, SwarmDataset
from networks.embedding import NoveltyEmbedding
from networks.archive import DataAggregationArchive
from networks.ensemble import Ensemble
import numpy as np
from scipy import ndimage
import random
classification_set = {
    0 : [],
    1 : [],
    2 : []
}

PRETRAINING = True
target = 0.01
loss = 50
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ensemble = Ensemble(size=3, output_size=5, lr_series=[15e-4, 15e-4, 15e-4], learning_decay=0.9, decay_step=4, threshold=6.0, weight_decay=1e-4)
ensemble.load_ensemble("toy-surgery")
sampled_dataset = SwarmDataset("data/toy", rank=0)
data = sampled_dataset

# Separate
for i in range(len(sampled_dataset)):
    _class = sampled_dataset[i][1][0]
    classification_set[_class].append(i)

# Pair Up
SAMPLES = 3000
triplets = []
for i in range(SAMPLES):
    classA = random.randint(0, 2)
    classB = random.randint(0, 2)

    anchor = random.choice(classification_set[classA])
    positive = random.choice(classification_set[classA])
    negative = random.choice(classification_set[classB])
    triplet = [anchor, positive, negative]
    if triplet not in triplets:
        triplets.append(triplet)

# Training
BATCH_SIZE = 5
EPOCH_DATA_LIM = 500
while loss > target:
    total_updates = 0
    total_loss = np.array([0.0 for i in range(len(ensemble.ensemble))])
    random.shuffle(triplets)
    temp_triplets = triplets[:EPOCH_DATA_LIM]
    for i in range(0, len(temp_triplets), BATCH_SIZE):
        if total_updates % 10 == 0:
            print(f"Unsupervised Training.. {(total_updates * BATCH_SIZE * 100) / len(temp_triplets)}")

        if i + BATCH_SIZE > len(triplets):
            break

        anchors = np.array([data[temp_triplets[i + j][0]][0] for j in range(BATCH_SIZE)])
        positives = np.array([data[temp_triplets[i + j][1]][0] for j in range(BATCH_SIZE)])
        negatives = np.array([data[temp_triplets[i + j][2]][0] for j in range(BATCH_SIZE)])

        anchors = np.expand_dims(anchors, axis=1)
        positives = np.expand_dims(positives, axis=1)
        negatives = np.expand_dims(negatives, axis=1)

        losses = ensemble.train_batch(anchors, positives, negatives)
        total_loss += losses
        total_updates += 1

    l = total_loss / total_updates
    lr = ensemble.evaluate_lr(l)
    loss = sum(l) / len(l)
    print(f"Losses: {l}, LR: {lr}, Loss: {loss}")

print("Complete!")
# ensemble.save_ensemble(f"{int(time.time())}")

In [None]:
ensemble.save_ensemble(f"{int(time.time())}")

# Visualize Embeddings with Classes

In [None]:
import torch
import time
from data.swarmset import ContinuingDataset, SwarmDataset
from networks.embedding import NoveltyEmbedding
from networks.archive import DataAggregationArchive
from networks.ensemble import Ensemble
import numpy as np
from scipy import ndimage
import random
from sklearn.manifold import TSNE

PRETRAINING = True
ENSEMBLE_MEMBER = 1
target = 0.01
loss = 50
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ensemble = Ensemble(size=3, output_size=5, lr_series=[15e-4, 15e-4, 15e-4], learning_decay=0.7, decay_step=1, threshold=9.0, weight_decay=1e-4, new_model=True)
ensemble.load_ensemble("human-mini-G")
# ensemble.load_ensemble("tiny-toy-C")
# ensemble.load_ensemble("toy-HIL-forced")
ensemble.eval_mode()
sampled_dataset = SwarmDataset("data/human-mini", rank=0)
data = sampled_dataset

embeddings = []
classes = []
for i in range(len(data)):
    image, _class = sampled_dataset[i][0], sampled_dataset[i][1][0]
    image = np.expand_dims(image, axis=0)
    embed = ensemble.ensemble[ENSEMBLE_MEMBER].forward(torch.tensor(image, device=device, dtype=torch.float))
    embed = embed.detach().cpu().squeeze(dim=0).numpy()
    embeddings.append(embed)
    classes.append(_class)

embeddings = np.array(embeddings)

In [None]:
reduced = TSNE(
    n_components=2,
    learning_rate="auto",
    init="pca",
    perplexity=40,
    early_exaggeration=1
).fit_transform(embeddings)

In [None]:
import matplotlib.pyplot as plot

color_classes = {
    -1: [0.33, 0.33, 0.33],
    0: [1, 0, 0], # Red
    1: [0, 1, 0], # Green
    2: [0, 0, 1], # Blue
    3: [0, 1, 1], # Cyan
    4: [1, 1, 0], # Yellow
    5: [0.5, 0, 0.25], # Pink
    6: [0.75, 0.325, 0.0] # Brown
}

label_classes = {
    -1: "Unlabeled",
    0: "Cyclic",
    1: "Triangle",
    2: "Square",
    3: "Ellipse",
    4: "Random-Dots",
    5: "Dispersal",
    6: "Aggregation"
}

lim = len(reduced)
x = [reduced[i][0] for i in range(lim)]
y = [reduced[i][1] for i in range(lim)]
colors = [color_classes[classes[i]] for i in range(lim)]
labels = [label_classes[classes[i]] for i in range(lim)]
plot.grid(True)
plot.scatter(x, y, c=colors)
plot.legend()

In [None]:
import os

def CSVLineToVec(line):
    line_list = line.strip().replace("\n", "").split(",")
    float_list = []
    for i in line_list:
        float_list.append(float(i))
    float_list = np.array(float_list)
    return float_list

color_classes = {
    0: [1, 0, 0],
    1: [0, 1, 0],
    2: [0, 0, 1],
}

label_classes = {
    0: "Circles",
    1: "Triangles",
    2: "Squares",
}

triples = []
ensemble.eval_mode()
OUT = "data/toy-oracle"
with open(os.path.join(OUT, "triplets.txt"), "r") as f:
    lines = f.readlines()
    for line in lines:
        triplet = CSVLineToVec(line)
        triples.append([int(triplet[0]), int(triplet[1]), int(triplet[2])])

target_triplet = triples[13]
lim = 500
x = [reduced[i][0] for i in range(lim)]
y = [reduced[i][1] for i in range(lim)]
colors = [color_classes[classes[i]] for i in range(lim)]
labels = [label_classes[classes[i]] for i in range(lim)]

# colors[target_triplet[0]] = [0, 0, 0]
# colors[target_triplet[1]] = [0, 0, 0]
# colors[target_triplet[2]] = [0.2, 0, 0]

plot.grid(True)
plot.scatter(x, y, c=colors)