In [None]:
import torch
import time
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from data.swarmset import ContinuingDataset, SwarmDataset
from src.networks.embedding import NoveltyEmbedding
from src.networks.archive import DataAggregationArchive
from src.networks.ensemble import Ensemble
import numpy as np
from scipy import ndimage
import random
from sklearn.manifold import TSNE
import matplotlib
import matplotlib.pyplot as plot

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

PRETRAINING = True
target = 0.01
loss = 50
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ensemble = Ensemble(size=3, output_size=5, lr=15e-4, learning_decay=0.7, decay_step=1, threshold=9.0, weight_decay=1e-4, new_model=True, init="Random")

"""
BASELINE - Pretraining + HIL.
"""
VALIDATION_FILE = "validation-data-two-sensor.txt"
VALIDATION_DATA = SwarmDataset("../data/validation-two-sensor-model", rank=0)
VALIDATION_SIZE = 200
TESTING_FILE = "original-hand-labeled-classes.txt"
TESTING_DATA = SwarmDataset("../data/full-mini", rank=0)
TESTING_SIZE = 1000

ensemble.eval_mode()

for dataset, label, size in [(TESTING_DATA, TESTING_FILE, TESTING_SIZE), (VALIDATION_DATA, VALIDATION_FILE, VALIDATION_SIZE)]:
    sampled_dataset = dataset
    embeddings = []
    classes = []
    # for i in range(len(data)):
    for i in range(size):
        image, embed = sampled_dataset[i][0], sampled_dataset[i][2]
        for j in range(len(embed)):
            if j == -1:
                embed[j] = 0
            embed[j] = abs(embed[j])
        embeddings.append(embed)
        print(embed)

    embeddings = np.array(embeddings)
    reduced = TSNE(
        n_components=2,
        learning_rate="auto",
        init="pca",
        perplexity=120,
        early_exaggeration=24
    ).fit_transform(embeddings)

    def CSVLineToVec(line):
        line_list = line.strip().replace("\n", "").split(",")
        float_list = []
        for i in line_list:
            float_list.append(float(i))
        float_list = np.array(float_list)
        return float_list

    color_classes = {
        -1: [0.33, 0.33, 0.33],
        0: [0.8, 0.8, 0.8],
        1: [0, 1, 0],
        2: [0, 0, 1],
        3: [0, 1, 1], # Cyan
        4: [1, 1, 0], # Yellow
        5: [1.0, 0, 0.7] # Pink
    }

    label_classes = {
        -1: "Unlabeled",
        0: "Random",
        1: "Cyclic Pursuit",
        2: "Milling",
        3: "Aggregation",
        4: "Dispersal",
        5: "Wall Following",
        6: "Sub-Orbit",
        7: "None",
        8: "Path",
        9: "Flower"
    }

    lim = len(reduced)
    print(lim)
    classes = [-1 for i in range(lim)]

    OUT = "../data/oracle"
    with open(os.path.join(OUT, label), "r") as f:
        lines = f.readlines()
        print(len(lines))
        for line in lines:
            triplet = CSVLineToVec(line)
            # if int(triplet[1]) == 0:
            #     classes[int(triplet[0])] = -1
            #     # reduced[int(triplet[0])] = (0.0, 0.0)
            # elif int(triplet[1]) != 0:
            #     classes[int(triplet[0])] = 1
            # else:
            #     classes[int(triplet[0])] = int(triplet[1])
            classes[int(triplet[0])] = int(triplet[1])

    # plot.xlim(-5000, 20000)
    # plot.ylim(-15000, 10000)
    plot.xlabel("t-SNE X reduction")
    plot.ylabel("t-SNE Y reduction")

    for key in label_classes:
        x = [reduced[i][0] for i in range(lim) if (classes[i] == key)]
        y = [reduced[i][1] for i in range(lim) if (classes[i] == key)]
        colors = [color_classes[classes[i]] for i in range(len(reduced)) if (classes[i] == key)]
        labels = [label_classes[classes[i]] for i in range(len(reduced)) if (classes[i] == key)]
        plot.grid(True)
        if x:
            plot.scatter(x, y, c=colors, label=label_classes[key])

    plot.legend()
    plot.show()