In [None]:
# import time
#
# from data.swarmset import SwarmDataset, DataBuilder
#
# baseline_data = DataBuilder("data/full", steps=1200, agents=24)
# baseline_data.create()
# baseline_data.evolution.close()

# Embed into Latent Space

In [13]:
import torch
from data.swarmset import ContinuingDataset, SwarmDataset
from networks.embedding import NoveltyEmbedding
from generation.evolution import ModifiedHaltingEvolution
from networks.archive import DataAggregationArchive
from hil.HIL import HIL

def update_network(network, loss_fn, optim, anchor_image, pos_image, neg_image):
    optim.zero_grad()
    anchor_out, pos_out, neg_out = network.network_with_transforms(anchor_image, pos_image, neg_image)
    loss = loss_fn(anchor_out, pos_out, neg_out)
    loss.backward()
    optim.step()
    return loss

def pretraining(data, network, loss_fn, optim, data_size=500):
    samples = np.random.random_integers(0, len(data) - 1, (data_size, 2))
    total_loss = 0
    total_updates = 0
    for i, s in enumerate(samples):
        if i % 50 == 0:
            print(f"Unsupervised Training.. {(i * 100) / data_size}")
        anchor_image = data[s[0]][0]
        pos_image = data[s[0]][0]


        if len(data) - 2 > s[1] > 2:
            neg_images = [
                # data[s[1] - 2][0],
                # data[s[1] - 1][0],
                data[s[1]][0],
                # data[s[1] + 1][0],
                # data[s[1] + 2][0],
            ]
        else:
            neg_images = [
                data[s[1]][0]
            ]

        for neg in neg_images:
            loss = update_network(network, loss_fn, optim, anchor_image, pos_image, neg)
            total_loss += loss.item()
            total_updates += 1

    return total_loss / total_updates


def human_in_the_loop(anchor_dataset, network, optimizer, loss_fn, HIL_archive, random_archive, stop_at):
    print("HIL TIME!")
    improvements, human_loss, triplet_helpfulness, embedded_archive = hil.humanInput(anchor_dataset, network, optimizer, loss_fn, HIL_archive, random_archive, stop_at)
    print(f"Improvement Count: {improvements}, loss: {human_loss}")

    HIL_archive.save_to_file(f"data/queries/{trial_name}_hil.csv")
    random_archive.save_to_file(f"data/queries/{trial_name}_rand.csv")
    return improvements, human_loss, triplet_helpfulness, embedded_archive

In [3]:
import torch
from data.swarmset import ContinuingDataset, SwarmDataset
from networks.embedding import NoveltyEmbedding
from generation.evolution import ModifiedHaltingEvolution
from networks.archive import DataAggregationArchive
from hil.HIL import HIL

import numpy as np
import time
from scipy import ndimage
from torch.utils.tensorboard import SummaryWriter

trial_name = f"{str(int(time.time()))}"

TRAIN = True
CLUSTER_AND_DISPLAY = True
WRITE_OUT = False
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

writer = SummaryWriter()
network = NoveltyEmbedding().to(device)

SAVE_CLUSTER_IMAGES = True
SAVE_CLUSTER_MEDOIDS = True
PRETRAINING = True
HUMAN_IN_LOOP = True
SYNTHETIC_HIL = True
EVOLUTION = True
EPOCHS = 20
DATA_SIZE = 10000
EVOLUTIONS_PER_EPOCH = 3

anchor_dataset = ContinuingDataset("data")
sampled_dataset = SwarmDataset("data/full", rank=0)
evolution, _ = ModifiedHaltingEvolution.defaultEvolver(steps=800, n_agents=24, evolve_population=100, seed=None)
optimizer = torch.optim.Adam(network.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=8, gamma=0.8)

# Margin was 10
loss_fn = torch.nn.TripletMarginLoss(margin=15)
hil = HIL(name=trial_name, synthetic=SYNTHETIC_HIL, data_limiter=DATA_SIZE)
HIL_archive = DataAggregationArchive()
random_archive = DataAggregationArchive(scalar=True)
EPSILON = 0.5

simulation_time = 0
evolution_time = 0
training_time = 0
hil_time = 0

# TODO: Add Randomly-sampled Contrastive/Triplet Loss
if PRETRAINING:
    network.load_model("unsupervised_decay_0.7_target_0.08")
    print("Pretrained model loaded!")

dataset = anchor_dataset if EVOLUTION else sampled_dataset

if TRAIN:
    STOP_FLAG = False
    for epoch in range(EPOCHS):
        if STOP_FLAG:
            break


        # Record the accuracy of the medoids with respect to the synthetic policy
        medoid_acc, cluster_acc = 0, 0
        if len(dataset) > 0 and SAVE_CLUSTER_MEDOIDS:
            if EVOLUTION:
                hil.synthetic_knowledge = hil.syntheticBehaviorSpace(dataset)
                print(f"Synthetic Human Knowledge Size: {len(hil.synthetic_knowledge.labels_)}")
            medoid_acc, cluster_acc = hil.record_medoids(network, dataset)

        # Cluster current dataset, display clusters, and save for analysis
        if len(dataset) > 0 and  SAVE_CLUSTER_IMAGES:
            hil.embed_and_cluster(network, dataset, auto_quit=True)
            evolution.restart_screen()
        start_time = time.time()

        if EVOLUTION:
            for gen in range(EVOLUTIONS_PER_EPOCH):
                # Simulate current population + Save Data
                for i in range(len(evolution.getPopulation())):
                    # The collection of the original behavior vector below is only used to collect data to compare with the baseline
                    visual_behavior, genome, baseline_behavior = evolution.next()
                    dataset.new_entry(visual_behavior, genome, baseline_behavior)
                simulation_time += (time.time() - start_time)

                # Then, evolve
                start_time = time.time()
                embedded_archive = hil.getEmbeddedArchive(dataset, network)
                evolution.overwriteArchive(embedded_archive, random_archive)
                embedded_behavior = embedded_archive.archive[-evolution.evolve_config.population:]
                evolution.overwriteBehavior(embedded_behavior)
                evolution.evolve()
                evolution.restart_screen()
                evolution_time += (time.time() - start_time)

        start_time = time.time()
        # Human in the Loop determines behavior embedding
        if HUMAN_IN_LOOP:
            improvements, human_loss, triplet_helpfulness, _ =   human_in_the_loop(
                                                                dataset,
                                                                network,
                                                                optimizer,
                                                                loss_fn,
                                                                HIL_archive,
                                                                random_archive,
                                                                len(dataset)
                                                            )
            hil_time += (time.time() - start_time)

        # Train on past user information
        start_time = time.time()
        anchor_list, pos_list, neg_list = None, None, None
        loss = None

        average_loss = 50
        loop = 0
        TRANSFORMS = 10
        network.train()
        while (average_loss > 0.05) and loop < 20:

            loss_sum = 0
            total_loss = 0

            for i, (anchor, pos, neg) in enumerate(HIL_archive):
                anchor_encoding = np.expand_dims(dataset[anchor][0], axis=0)
                similar_encoding = np.expand_dims(dataset[pos][0], axis=0)
                anti_encoding = np.expand_dims(dataset[neg][0], axis=0)

                if anchor_list is None:
                    anchor_list = np.array([anchor_encoding])
                    pos_list = np.array([similar_encoding])
                    neg_list = np.array([anti_encoding])

                else:
                    anchor_list = np.concatenate((anchor_list, np.array([anchor_encoding])))
                    pos_list = np.concatenate((pos_list, np.array([similar_encoding])))
                    neg_list = np.concatenate((neg_list, np.array([anti_encoding])))

                optimizer.zero_grad()

                for i in range(TRANSFORMS):
                    anchor_out, pos_out, neg_out = network.network_with_transforms(anchor_list, pos_list, neg_list)
                    loss = loss_fn(anchor_out, pos_out, neg_out)
                    loss_sum += loss.item()
                    if loss.item() > 0:
                        loss.backward()
                        optimizer.step()
                    total_loss += 1

                anchor_list, pos_list, neg_list = None, None, None

                if loss is not None and i > 0 and i % 50 == 0:
                    print(f"Epoch Progress: {(i*100) / len(HIL_archive)}%, Immediate Loss: {loss.item()}")

            average_loss = loss_sum / (total_loss + 1)
            print(f"Loop: {loop}, Average Loss: {average_loss}")
            loop += 1

        print(f"Epoch: {epoch}")
        training_time += (time.time() - start_time)

        writer.add_scalar("Loss/Average", loss_sum / (total_loss + 1), epoch)

        if SAVE_CLUSTER_MEDOIDS:
            writer.add_scalar("Accuracy/MedoidClassification", medoid_acc, epoch)
            writer.add_scalar("Accuracy/RandomSampleClassification", cluster_acc, epoch)

        if HUMAN_IN_LOOP:
            writer.add_scalar("Loss/TripletQuality", triplet_helpfulness, epoch)
            writer.add_scalar("Queries/Total_Human_Queries", epoch*8*9, epoch)
            writer.add_scalar("Queries/Total_Triplets_Generated", len(HIL_archive), epoch)
            writer.add_scalar("Queries/Total_Random_Classes", len(random_archive), epoch)

        if EVOLUTION:
            writer.add_scalar("Novelty/Highest", evolution.behavior_discovery.getBestScore(), epoch)
            writer.add_scalar("Novelty/Average", evolution.behavior_discovery.getAverageScore(), epoch)

        writer.add_scalar("Time/Simulation", simulation_time, epoch)
        writer.add_scalar("Time/HIL", hil_time, epoch)
        writer.add_scalar("Time/Evolution", evolution_time, epoch)
        writer.add_scalar("Time/Training", training_time, epoch)

        scheduler.step()

evolution.close()

HIL Init!
Pretrained model loaded!


KeyboardInterrupt: 

# Pretraining

In [None]:
# Save Model
target = 0.08
loss = 50
network = NoveltyEmbedding().to(device)
optimizer = torch.optim.Adam(network.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9)
if PRETRAINING:
    epochs = 0
    while loss > target:
        loss = pretraining(sampled_dataset, network, loss_fn, optimizer, data_size=600)
        print(f"Epoch {epochs}, loss: {loss}")
        epochs += 1
        scheduler.step()

network.save_model()

  samples = np.random.random_integers(0, len(data) - 1, (data_size, 2))


Unsupervised Training.. 0.0
Unsupervised Training.. 8.333333333333334
Unsupervised Training.. 16.666666666666668
Unsupervised Training.. 25.0
Unsupervised Training.. 33.333333333333336
Unsupervised Training.. 41.666666666666664
Unsupervised Training.. 50.0
Unsupervised Training.. 58.333333333333336
Unsupervised Training.. 66.66666666666667
Unsupervised Training.. 75.0
Unsupervised Training.. 83.33333333333333
Unsupervised Training.. 91.66666666666667
Epoch 0, loss: 13.88220010439555
Unsupervised Training.. 0.0
Unsupervised Training.. 8.333333333333334
Unsupervised Training.. 16.666666666666668
Unsupervised Training.. 25.0
Unsupervised Training.. 33.333333333333336
Unsupervised Training.. 41.666666666666664
Unsupervised Training.. 50.0
Unsupervised Training.. 58.333333333333336
Unsupervised Training.. 66.66666666666667
Unsupervised Training.. 75.0
Unsupervised Training.. 83.33333333333333
Unsupervised Training.. 91.66666666666667
Epoch 1, loss: 11.486022001902262
Unsupervised Training..

In [None]:
print(optimizer.lr)

In [None]:
import time
import torch
from data.swarmset import SwarmDataset, DataBuilder
from networks.embedding import NoveltyEmbedding
from NovelSwarmBehavior.novel_swarms.novelty.NoveltyArchive import NoveltyArchive
from NovelSwarmBehavior.novel_swarms.config.ResultsConfig import ResultsConfig
from NovelSwarmBehavior.novel_swarms.results.results import main as results
from NovelSwarmBehavior.novel_swarms.config.defaults import ConfigurationDefaults
from data.swarmset import SwarmDataset, DataBuilder

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
network = NoveltyEmbedding().to(device)
network.load_model("trialA-10-18-2022")
# anchor_dataset = SwarmDataset("data/full", rank=0)

WRITE_OUT = True
if WRITE_OUT:
    network.eval()
    test_archive = NoveltyArchive()
    for i in range(len(anchor_dataset)):
        anchor_encoding, genome, _ = anchor_dataset[i]
        anchor_encoding = torch.from_numpy(anchor_encoding).to(device).float()
        embedding = network(anchor_encoding.unsqueeze(0)).squeeze(0).cpu().detach().numpy()
        test_archive.addToArchive(embedding, genome)

In [None]:
from ui.clustering_gui import ClusteringGUI
import pygame

agent_config = ConfigurationDefaults.DIFF_DRIVE_AGENT
world_config = ConfigurationDefaults.RECTANGULAR_WORLD
world_config.addAgentConfig(agent_config)
config = ResultsConfig(archive=test_archive, k_clusters=6, world_config=world_config, tsne_perplexity=16, tsne_early_exaggeration=12, skip_tsne=False)
gui = ClusteringGUI(config)
gui.displayGUI()
# results(config)
pygame.quit()

# Clustering + Analysis

In [None]:
from ui.clustering_gui import ClusteringGUI

# Cluster over saved behaviors
archive = NoveltyArchive()
for i, (_, genome, behavior, _, _, _, _) in enumerate(anchor_dataset):
    archive.addToArchive(vec=behavior, genome=genome)

agent_config = ConfigurationDefaults.DIFF_DRIVE_AGENT
world_config = ConfigurationDefaults.RECTANGULAR_WORLD
world_config.addAgentConfig(agent_config)
config = ResultsConfig(archive=archive, k_clusters=7, world_config=world_config, tsne_perplexity=20, tsne_early_exaggeration=2, skip_tsne=False)

gui = ClusteringGUI(config)
gui.displayGUI()

In [None]:
import matplotlib.pyplot as plt

plt.plot(human_l_hist, "b", label='Human Loss')
plt.ylabel("Loss")
plt.xlabel("Time (Epochs)")
plt.legend()