In [1]:
import torch
import time
import os
import sys
import random
from data.swarmset import ContinuingDataset, SwarmDataset
from src.networks.embedding import NoveltyEmbedding
from src.networks.archive import DataAggregationArchive
from torchvision.transforms import RandomResizedCrop, RandomHorizontalFlip, RandomVerticalFlip
from src.networks.ensemble import Ensemble
import numpy as np
from scipy import ndimage
import random
from sklearn.manifold import TSNE
import matplotlib
import matplotlib.pyplot as plt
import cv2

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

def CSVLineToVec(line):
    line_list = line.strip().replace("\n", "").split(",")
    float_list = []
    for i in line_list:
        float_list.append(float(i))
    float_list = np.array(float_list)
    return float_list

def resizeInput(X, w=200):
    frame = X.astype(np.uint8)
    resized = cv2.resize(frame, dsize=(w, w), interpolation=cv2.INTER_AREA)
    return resized

def translate(img, offset=(10, 10)):
    h, w = img.shape
    xoff, yoff = offset
    if xoff < 0: xpadding = (0, -xoff)
    else: xpadding = (xoff, 0)
    if yoff < 0: ypadding = (0, -yoff)
    else: ypadding = (yoff, 0)
    img = np.pad(img, (xpadding, ypadding))

    if xoff >= 0 and yoff >= 0:
        return img[:w, :w]
    elif xoff < 0 and yoff >= 0:
        return img[-w:, :w]
    elif xoff >= 0 and yoff < 0:
        return img[:w, -w:]
    return img[-w:, -w:]

def zoom_at(img, zoom, coord=None):
    # Adapted from https://stackoverflow.com/questions/69050464/zoom-into-image-with-opencv
    h, w = [ zoom * i for i in img.shape ]
    if coord is None: cx, cy = w/2, h/2
    else: cx, cy = [ zoom*c for c in coord ]
    img = cv2.resize( img, (0, 0), fx=zoom, fy=zoom)
    img = img[ int(round(cy - h/zoom * .5)) : int(round(cy + h/zoom * .5)),
               int(round(cx - w/zoom * .5)) : int(round(cx + w/zoom * .5))]
    return img

def get_color_distortion(X, s=3.0):
    X = X + s * np.random.randn(X.shape[0], X.shape[1])
    return X

def getRandomFlip(X):
    tmp = torch.tensor(X).unsqueeze(0)
    flipper_A = RandomHorizontalFlip(0.5)
    flipper_B = RandomVerticalFlip(0.5)
    image = flipper_A(flipper_B(tmp))
    image = image.squeeze(0).numpy()
    return image

def getRandomTransformation(image, k=2):
    transformation_choices = ["Rotation", "Blur", "Zoom", "Translate", "Distort", "ResizedCrop"]
    # weights = [0.4, 0.3, 0.0, 0.2]
    # weights = [1.0, 0.0, 0.0, 0.0]
    # choices = random.choices(transformation_choices, weights, k=k)
    choices = ["ResizedCrop"]
    # choices = []
    if "RandomFlip" in choices:
        image = getRandomFlip(image)
    if "ResizedCrop" in choices:
        tmp = torch.tensor(image).unsqueeze(0)
        flipper = RandomHorizontalFlip(0.5)
        cropper = RandomResizedCrop(size=(50,50), scale=(0.6, 1.0), ratio=(1.0, 1.0))
        image = flipper(cropper(tmp))
        image = image.squeeze(0).numpy()
    if "Rotation" in choices:
        theta = random.choice([90, 180, 270])
        image = ndimage.rotate(image, theta)
    if "Blur" in choices:
        blur = random.choice([0.5, 1.0, 1.5])
        image = ndimage.gaussian_filter(image, sigma=blur)
    if "Zoom" in choices:
        # zoom = random.choice([1.06, 1.12, 1.18])
        padding = random.choice([10])
        padded = np.pad(image, padding, mode='constant')
        image = resizeInput(padded, 50)
    if "Translate" in choices:
        offsets = [i for i in range(-10, 10, 2)]
        offset = (random.choice(offsets), random.choice(offsets))
        # offset = (2, 2)
        image = translate(image, offset)
    if "Distort" in choices:
        strength = random.choice([3.0, 5.0, 10.0])
        image = get_color_distortion(image, s=strength)
    if "Flip" in choices:
        tmp = torch.tensor(image).unsqueeze(0)
        flipper = RandomHorizontalFlip(1.0)
        image = flipper(tmp)
        image = image.squeeze(0).numpy()
    return image

pygame 2.1.2 (SDL 2.0.16, Python 3.10.6)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [32]:
"""
Generate Triplets based off of labeled classes (IID_TRIPLETS) or triplets based off ensemble Queries
"""
FROM_SCRATCH = False
HEURISTIC = False
IID_TRIPLETS = False
EIGHT_FOR_EIGHT_TRIPLETS = True
TWO_SENSOR = False

if TWO_SENSOR:
    TRUTH_FILE = "gecco-two-sensor-classes.txt" if not HEURISTIC else "heuristic-two-sensor.txt"
    DATASET = SwarmDataset("../data/gecco-two-sensor", rank=0) if not HEURISTIC else SwarmDataset("../data/gecco-filtered-two-sensor")
    ENSEMBLE_PATH = "../checkpoints/ensembles/01-28-23-2S-Pre-B" if not HEURISTIC else "../checkpoints/ensembles/01-30-23-2S-Heur-Pre-B"
else:
    TRUTH_FILE = "original-hand-labeled-classes.txt" if not HEURISTIC else "heuristic-simple-model-classes.txt"
    DATASET = SwarmDataset("../data/full-mini", rank=0) if not HEURISTIC else SwarmDataset("../data/filtered-full")
    ENSEMBLE_PATH = "../checkpoints/ensembles/01-20-23-baseline" if not HEURISTIC else "../checkpoints/ensembles/01-26-23-heuristic-BL-pretraining"

if FROM_SCRATCH:
    ENSEMBLE_PATH = None
OUT = "../data/oracle"
classes = [-1 for i in range(500)]
with open(os.path.join(OUT, TRUTH_FILE), "r") as f:
    lines = f.readlines()
    for i, line in enumerate(lines):
        if i > len(classes) - 1:
            break
        triplet = CSVLineToVec(line)
        classes[int(triplet[0])] = int(triplet[1])

triplets = []

if IID_TRIPLETS:
    for i, i_c in enumerate(classes):
        if i_c == 0:
            continue
        continue_to_top = False
        for j, j_c in enumerate(classes):
            if j_c != i_c:
                continue
            if i == j:
                continue
            for k, k_c in enumerate(classes):
                if k_c == 0:
                    continue
                if k_c == i_c or k_c == j_c:
                    continue
                # if i_c == 0:
                #     if not (i, i, k) in triplets:
                #         triplets.append((i, i, k))
                #         continue_to_top = True
                triplets.append((i, j, k))
            if continue_to_top:
                break

elif EIGHT_FOR_EIGHT_TRIPLETS:
    for m in range(0, len(classes), 8):
        if m + 8 > len(classes):
            continue
        temp_classes = classes[m:m+8]
        for i, i_c in enumerate(temp_classes):
            continue_to_top = False
            for j, j_c in enumerate(temp_classes):
                if j_c != i_c:
                    continue
                if i == j:
                    continue
                for k, k_c in enumerate(temp_classes):
                    if k_c == i_c or k_c == j_c:
                        continue
                    if not (m + i, m + j, m + k) in triplets:
                        triplets.append((m + i, m + j, m + k))

# Else, use an ensemble to create the triplets.
else:
    print("No Implementation Yet")

print(len(triplets))

3204


In [33]:
random.shuffle(triplets)
for j in triplets[:300]:
    print(j, (classes[j[0]], classes[j[1]], classes[j[2]]))

(15, 8, 13) (0, 0, 1)
(80, 87, 83) (4, 4, 1)
(245, 243, 240) (0, 0, 2)
(131, 128, 132) (2, 2, 0)
(425, 427, 431) (0, 0, 4)
(217, 218, 219) (0, 0, 5)
(245, 247, 244) (0, 0, 4)
(415, 414, 411) (0, 0, 2)
(28, 31, 27) (0, 0, 4)
(241, 245, 240) (0, 0, 2)
(31, 25, 29) (0, 0, 4)
(253, 254, 252) (0, 0, 4)
(114, 117, 116) (0, 0, 2)
(340, 336, 339) (0, 0, 5)
(132, 134, 131) (0, 0, 2)
(378, 379, 376) (0, 0, 4)
(121, 122, 120) (0, 0, 1)
(125, 124, 120) (0, 0, 1)
(152, 156, 158) (0, 0, 1)
(336, 341, 342) (0, 0, 5)
(24, 28, 29) (0, 0, 4)
(111, 105, 106) (0, 0, 2)
(125, 122, 120) (0, 0, 1)
(304, 308, 309) (2, 2, 0)
(356, 353, 355) (0, 0, 2)
(347, 345, 349) (0, 0, 2)
(382, 379, 376) (0, 0, 4)
(459, 456, 461) (0, 0, 5)
(110, 105, 106) (0, 0, 2)
(306, 305, 304) (0, 0, 2)
(369, 370, 372) (0, 0, 1)
(358, 356, 355) (0, 0, 2)
(276, 279, 272) (0, 0, 2)
(9, 8, 13) (0, 0, 1)
(235, 237, 232) (0, 0, 4)
(296, 303, 298) (0, 0, 4)
(446, 441, 440) (0, 0, 3)
(437, 432, 439) (0, 0, 4)
(257, 260, 262) (0, 0, 2)
(203, 2

In [34]:
loss = 50
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
ensemble = Ensemble(size=3, output_size=5, lr=1e-2, weight_decay=0, new_model=True, dynamic_lr=True, manual_schedulers=True, init="Random")
if ENSEMBLE_PATH is not None:
    ensemble.load_ensemble(ENSEMBLE_PATH, full=True)
sampled_dataset = DATASET

def pretraining(data, ensemble, data_cutoff=None, data_size=500):
    if data_cutoff is None:
        data_cutoff = len(data) - 1
    # np.random.seed(0)
    random.shuffle(triplets)
    samples = triplets[:data_size]
    total_loss = np.array([0.0 for i in range(len(ensemble.ensemble))])

    BATCH_SIZE = 4096
    total_updates = 0
    total_batches = max(len(samples), data_size) // BATCH_SIZE

    # Batch the data
    for i in range(0, len(samples), BATCH_SIZE):
        # AUGMENT_SIZE = 1
        if i + (BATCH_SIZE) >= len(samples):
            continue

        print(f"Unsupervised Training.. {(total_updates * 100) / total_batches}")

        temp_losses = np.array([0.0 for _ in ensemble.ensemble])

        anchors = np.array([data[samples[i + j][0]][0] for j in range(BATCH_SIZE)])

        pretraining = random.random() < 0.6
        if pretraining:
            positives = np.array(
                [
                    getRandomTransformation(data[samples[i + j][0]][0]) for j in range(BATCH_SIZE)
                ]
            )
        else:
            positives = np.array(
                [
                    getRandomFlip(data[samples[i + j][1]][0]) for j in range(BATCH_SIZE)
                ]
            )

        negatives = np.array([data[samples[i + j][2]][0] for j in range(BATCH_SIZE)])

        anchors = np.expand_dims(anchors, axis=1)
        positives = np.expand_dims(positives, axis=1)
        negatives = np.expand_dims(negatives, axis=1)

        losses = ensemble.train_batch(anchors, positives, negatives)
        temp_losses += losses

        total_loss += temp_losses
        total_updates += 1

    return total_loss, max(total_updates, 1)

t_1 = time.time()
epochs = 0
loss_history = []
while epochs < 300:
    losses, total_updates = pretraining(sampled_dataset, ensemble, data_cutoff=9999, data_size=(4096 * 12))
    average_loss = losses / total_updates
    locale_loss = sum(average_loss) / len(average_loss)
    loss_history.append(locale_loss)
    loss = (sum(loss_history[-3:]) / 3) if len(loss_history) > 3 else 50
    print(f"Losses: {average_loss}")
    print(f"Epoch {epochs}, loss: {locale_loss}, windowed_loss: {loss}")
    epochs += 1
    ensemble.step_schedulers(losses)
    print(f"LR: {ensemble.get_lr()}")

print(f"Total Pre-training Time: {time.time() - t_1}")

cuda
Losses: [0. 0. 0.]
Epoch 0, loss: 0.0, windowed_loss: 50
LR: [0.01, 0.01, 0.01]
Losses: [0. 0. 0.]
Epoch 1, loss: 0.0, windowed_loss: 50
LR: [0.01, 0.01, 0.01]
Losses: [0. 0. 0.]
Epoch 2, loss: 0.0, windowed_loss: 50
LR: [0.01, 0.01, 0.01]
Losses: [0. 0. 0.]
Epoch 3, loss: 0.0, windowed_loss: 0.0
LR: [0.01, 0.01, 0.01]
Losses: [0. 0. 0.]
Epoch 4, loss: 0.0, windowed_loss: 0.0
LR: [0.01, 0.01, 0.01]
Losses: [0. 0. 0.]
Epoch 5, loss: 0.0, windowed_loss: 0.0
LR: [0.01, 0.01, 0.01]
Losses: [0. 0. 0.]
Epoch 6, loss: 0.0, windowed_loss: 0.0
LR: [0.01, 0.01, 0.01]
Losses: [0. 0. 0.]
Epoch 7, loss: 0.0, windowed_loss: 0.0
LR: [0.01, 0.01, 0.01]
Losses: [0. 0. 0.]
Epoch 8, loss: 0.0, windowed_loss: 0.0
LR: [0.01, 0.01, 0.01]
Losses: [0. 0. 0.]
Epoch 9, loss: 0.0, windowed_loss: 0.0
LR: [0.01, 0.01, 0.01]
Losses: [0. 0. 0.]
Epoch 10, loss: 0.0, windowed_loss: 0.0
LR: [0.01, 0.01, 0.01]
Losses: [0. 0. 0.]
Epoch 11, loss: 0.0, windowed_loss: 0.0
LR: [0.01, 0.01, 0.01]
Losses: [0. 0. 0.]
Epoch

In [46]:
ensemble.save_ensemble(f"../checkpoints/ensembles/{int(time.time())}", full=True)