In [98]:
import torch
import time
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from data.swarmset import ContinuingDataset, SwarmDataset
from networks.embedding import NoveltyEmbedding
from networks.archive import DataAggregationArchive
from networks.ensemble import Ensemble
import numpy as np
from scipy import ndimage
import random
from sklearn.manifold import TSNE
import matplotlib
import matplotlib.pyplot as plt

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

def CSVLineToVec(line):
    line_list = line.strip().replace("\n", "").split(",")
    float_list = []
    for i in line_list:
        float_list.append(float(i))
    float_list = np.array(float_list)
    return float_list

In [106]:
# For single Sensor Baseline Model
# TRUTH_FILE = "validation-data-two-sensor.txt"
# TRUTH_FILE = "validation-data-baseline.txt"

"""
BASELINE - Pretraining only. No HIL.
"""
VALIDATION_FILE = "validation-data-baseline.txt"
VALIDATION_DATA = SwarmDataset("../data/validation-easy-model", rank=0)
TESTING_FILE = "original-hand-labeled-classes.txt"
TESTING_DATA = SwarmDataset("../data/full-mini", rank=0)
ENSEMBLE_PATH = "../checkpoints/ensembles/01-20-23-baseline"

"""
BASELINE - Pretraining + HIL.
"""
# VALIDATION_FILE = "validation-data-baseline.txt"
# VALIDATION_DATA = SwarmDataset("../data/validation-easy-model", rank=0)
# TESTING_FILE = "original-hand-labeled-classes.txt"
# TESTING_DATA = SwarmDataset("../data/full-mini", rank=0)
# ENSEMBLE_PATH = "../checkpoints/ensembles/01-24-23-baseline-HIL-B"


OUT = "../data/oracle"
validation_classes = []
with open(os.path.join(OUT, VALIDATION_FILE), "r") as f:
    lines = f.readlines()
    validation_classes = [-1 for i in range(len(lines))]
    for line in lines:
        triplet = CSVLineToVec(line)
        validation_classes[int(triplet[0])] = int(triplet[1])

testing_classes = []
with open(os.path.join(OUT, TESTING_FILE), "r") as f:
    lines = f.readlines()
    testing_classes = [-1 for i in range(len(lines))]
    for line in lines:
        triplet = CSVLineToVec(line)
        testing_classes[int(triplet[0])] = int(triplet[1])

validation_set = []
testing_set = []
for i, _class in enumerate(testing_classes):
    testing_set.append((i, _class))

for i, _class in enumerate(validation_classes):
    validation_set.append((i, _class))

print(len(validation_set), len(testing_set))

200 1000


In [107]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ensemble = Ensemble(size=3, output_size=5, lr=15e-4, learning_decay=0.7, decay_step=1, threshold=9.0, weight_decay=1e-4, new_model=True, init="Random")
ensemble.load_ensemble(ENSEMBLE_PATH, full=True)
ensemble.eval_mode()

for metric in ["TRAIN", "VAL"]:
    a = []
    sampled_dataset = TESTING_DATA if metric == "TRAIN" else VALIDATION_DATA
    c_set = testing_set if metric == "TRAIN" else validation_set
    classes = testing_classes if metric == "TRAIN" else validation_classes
    print("=" * 20)
    print(f"{metric} results")
    for i in range(len(ensemble.ensemble)):
        embedded_positions = []
        for j, c in enumerate(classes):
            image, _ = sampled_dataset[j][0], sampled_dataset[j][1][0]
            image = np.expand_dims(image, axis=0)
            embed = ensemble.ensemble[i].forward(torch.tensor(image, device=device, dtype=torch.float))
            embed = embed.detach().cpu().squeeze(dim=0).numpy()
            embedded_positions.append(embed)

        # Evaluate Accuracy
        MAX_SEARCH = 30000
        correct, total = 0, 0
        for x, _classX in c_set:
            for y, _classY in c_set:
                if x == y:
                    continue
                for z, _classZ in c_set:
                    if x == z or y == z:
                        continue
                    # If _classX and _classY are both random, ignore.
                    if _classX == 0 and _classY == 0:
                        continue
                    if _classZ != _classX and _classX == _classY:
                        positive_dist = np.linalg.norm(embedded_positions[x] - embedded_positions[y])
                        negative_dist = np.linalg.norm(embedded_positions[x] - embedded_positions[z])
                        if positive_dist < negative_dist:
                            correct += 1
                        total += 1
                    if total > MAX_SEARCH:
                        break
                if total > MAX_SEARCH:
                    break
            if total > MAX_SEARCH:
                break

        acc = correct * 100 / total
        a.append(acc)
        print(f"Ensemble {i} ~ Accuracy: {acc}")

    print(f"Average: {sum(a) / 3}")
    print("=" * 20)

Adjusting learning rate of group 0 to 1.5000e-03.
Adjusting learning rate of group 0 to 1.5000e-03.
Adjusting learning rate of group 0 to 1.5000e-03.
TRAIN results
Ensemble 0 ~ Accuracy: 45.72514249525016
Ensemble 1 ~ Accuracy: 55.99146695110163
Ensemble 2 ~ Accuracy: 77.08076397453418
Average: 59.59912447362865
VAL results
Ensemble 0 ~ Accuracy: 80.05399820006
Ensemble 1 ~ Accuracy: 78.75404153194893
Ensemble 2 ~ Accuracy: 79.02736575447486
Average: 79.2784684954946
