In [1]:
import torch
import time
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import random
from data.swarmset import ContinuingDataset, SwarmDataset
from networks.embedding import NoveltyEmbedding
from networks.archive import DataAggregationArchive
from torchvision.transforms import RandomResizedCrop, RandomHorizontalFlip, RandomVerticalFlip
from networks.ensemble import Ensemble
import numpy as np
from scipy import ndimage
import random
from sklearn.manifold import TSNE
import matplotlib
import matplotlib.pyplot as plt
import cv2

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

def CSVLineToVec(line):
    line_list = line.strip().replace("\n", "").split(",")
    float_list = []
    for i in line_list:
        float_list.append(float(i))
    float_list = np.array(float_list)
    return float_list

def resizeInput(X, w=200):
    frame = X.astype(np.uint8)
    resized = cv2.resize(frame, dsize=(w, w), interpolation=cv2.INTER_AREA)
    return resized

def translate(img, offset=(10, 10)):
    h, w = img.shape
    xoff, yoff = offset
    if xoff < 0: xpadding = (0, -xoff)
    else: xpadding = (xoff, 0)
    if yoff < 0: ypadding = (0, -yoff)
    else: ypadding = (yoff, 0)
    img = np.pad(img, (xpadding, ypadding))

    if xoff >= 0 and yoff >= 0:
        return img[:w, :w]
    elif xoff < 0 and yoff >= 0:
        return img[-w:, :w]
    elif xoff >= 0 and yoff < 0:
        return img[:w, -w:]
    return img[-w:, -w:]

def zoom_at(img, zoom, coord=None):
    # Adapted from https://stackoverflow.com/questions/69050464/zoom-into-image-with-opencv
    h, w = [ zoom * i for i in img.shape ]
    if coord is None: cx, cy = w/2, h/2
    else: cx, cy = [ zoom*c for c in coord ]
    img = cv2.resize( img, (0, 0), fx=zoom, fy=zoom)
    img = img[ int(round(cy - h/zoom * .5)) : int(round(cy + h/zoom * .5)),
               int(round(cx - w/zoom * .5)) : int(round(cx + w/zoom * .5))]
    return img

def get_color_distortion(X, s=3.0):
    X = X + s * np.random.randn(X.shape[0], X.shape[1])
    return X

def getRandomTransformation(image, k=2):
    transformation_choices = ["Rotation", "Blur", "Zoom", "Translate", "Distort", "ResizedCrop"]
    # weights = [0.4, 0.3, 0.0, 0.2]
    # weights = [1.0, 0.0, 0.0, 0.0]
    # choices = random.choices(transformation_choices, weights, k=k)
    choices = ["ResizedCrop"]
    # choices = []
    if "RandomFlip" in choices:
        tmp = torch.tensor(image).unsqueeze(0)
        flipper_A = RandomHorizontalFlip(0.5)
        flipper_B = RandomVerticalFlip(0.5)
        image = flipper_A(flipper_B(tmp))
        image = image.squeeze(0).numpy()
    if "ResizedCrop" in choices:
        tmp = torch.tensor(image).unsqueeze(0)
        flipper = RandomHorizontalFlip(0.5)
        cropper = RandomResizedCrop(size=(50,50), scale=(0.6, 1.0), ratio=(1.0, 1.0))
        image = flipper(cropper(tmp))
        image = image.squeeze(0).numpy()
    if "Rotation" in choices:
        theta = random.choice([90, 180, 270])
        image = ndimage.rotate(image, theta)
    if "Blur" in choices:
        blur = random.choice([0.5, 1.0, 1.5])
        image = ndimage.gaussian_filter(image, sigma=blur)
    if "Zoom" in choices:
        # zoom = random.choice([1.06, 1.12, 1.18])
        padding = random.choice([10])
        padded = np.pad(image, padding, mode='constant')
        image = resizeInput(padded, 50)
    if "Translate" in choices:
        offsets = [i for i in range(-10, 10, 2)]
        offset = (random.choice(offsets), random.choice(offsets))
        # offset = (2, 2)
        image = translate(image, offset)
    if "Distort" in choices:
        strength = random.choice([3.0, 5.0, 10.0])
        image = get_color_distortion(image, s=strength)
    if "Flip" in choices:
        tmp = torch.tensor(image).unsqueeze(0)
        flipper = RandomHorizontalFlip(1.0)
        image = flipper(tmp)
        image = image.squeeze(0).numpy()
    return image

pygame 2.1.2 (SDL 2.0.16, Python 3.10.6)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
"""
Generate Triplets based off of labeled classes (IID_TRIPLETS) or triplets based off ensemble Queries
"""
FROM_SCRATCH = False
HEURISTIC = True
IID_TRIPLETS = True
TRUTH_FILE = "original-hand-labeled-classes.txt" if not HEURISTIC else "heuristic-simple-model-classes.txt"
DATASET = SwarmDataset("../data/full-mini", rank=0) if not HEURISTIC else SwarmDataset("../data/filtered-full")
ENSEMBLE_PATH = "../checkpoints/ensembles/01-20-23-baseline" if not HEURISTIC else "../checkpoints/ensembles/01-26-23-heuristic-BL-pretraining"
if FROM_SCRATCH:
    ENSEMBLE_PATH = None
OUT = "../data/oracle"
classes = [-1 for i in range(400)]
with open(os.path.join(OUT, TRUTH_FILE), "r") as f:
    lines = f.readlines()
    for i, line in enumerate(lines):
        if i > len(classes) - 1:
            break
        triplet = CSVLineToVec(line)
        classes[int(triplet[0])] = int(triplet[1])

triplets = []

if IID_TRIPLETS:
    for i, i_c in enumerate(classes):
        if i_c == 0:
            continue
        continue_to_top = False
        for j, j_c in enumerate(classes):
            if j_c != i_c:
                continue
            if i == j:
                continue
            for k, k_c in enumerate(classes):
                # if k_c == 0:
                #     continue
                if k_c == i_c or k_c == j_c:
                    continue
                # if i_c == 0:
                #     if not (i, i, k) in triplets:
                #         triplets.append((i, i, k))
                #         continue_to_top = True
                triplets.append((i, j, k))
            if continue_to_top:
                break

# Else, use an ensemble to create the triplets.
else:
    print("No Implementation Yet")

print(len(triplets))

4181320


In [121]:
for j in triplets[:45]:
    print(j, (classes[j[0]], classes[j[1]], classes[j[2]]))

(0, 4, 1) (5, 5, 0)
(0, 4, 2) (5, 5, 4)
(0, 4, 3) (5, 5, 0)
(0, 4, 5) (5, 5, 4)
(0, 4, 6) (5, 5, 4)
(0, 4, 7) (5, 5, 0)
(0, 4, 8) (5, 5, 0)
(0, 4, 9) (5, 5, 1)
(0, 4, 11) (5, 5, 0)
(0, 4, 13) (5, 5, 1)
(0, 4, 14) (5, 5, 3)
(0, 4, 15) (5, 5, 4)
(0, 4, 16) (5, 5, 1)
(0, 4, 17) (5, 5, 0)
(0, 4, 18) (5, 5, 3)
(0, 4, 19) (5, 5, 0)
(0, 4, 20) (5, 5, 2)
(0, 4, 21) (5, 5, 2)
(0, 4, 23) (5, 5, 4)
(0, 4, 24) (5, 5, 1)
(0, 4, 25) (5, 5, 0)
(0, 4, 26) (5, 5, 1)
(0, 4, 27) (5, 5, 1)
(0, 4, 28) (5, 5, 0)
(0, 4, 30) (5, 5, 4)
(0, 4, 31) (5, 5, 0)
(0, 4, 32) (5, 5, 0)
(0, 4, 33) (5, 5, 0)
(0, 4, 34) (5, 5, 0)
(0, 4, 35) (5, 5, 0)
(0, 4, 36) (5, 5, 0)
(0, 4, 37) (5, 5, 0)
(0, 4, 38) (5, 5, 4)
(0, 4, 39) (5, 5, 0)
(0, 4, 40) (5, 5, 0)
(0, 4, 41) (5, 5, 0)
(0, 4, 42) (5, 5, 3)
(0, 4, 43) (5, 5, 1)
(0, 4, 44) (5, 5, 4)
(0, 4, 45) (5, 5, 4)
(0, 4, 46) (5, 5, 0)
(0, 4, 47) (5, 5, 0)
(0, 4, 48) (5, 5, 4)
(0, 4, 49) (5, 5, 1)
(0, 4, 50) (5, 5, 0)


In [None]:
loss = 50
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
ensemble = Ensemble(size=3, output_size=5, lr=3e-2, weight_decay=0, new_model=True, manual_schedulers=True, init="Random")
if ENSEMBLE_PATH is not None:
    ensemble.load_ensemble(ENSEMBLE_PATH, full=True)
sampled_dataset = DATASET

def pretraining(data, ensemble, data_cutoff=None, data_size=500):
    if data_cutoff is None:
        data_cutoff = len(data) - 1
    # np.random.seed(0)
    random.shuffle(triplets)
    samples = triplets[:data_size]
    total_loss = np.array([0.0 for i in range(len(ensemble.ensemble))])

    BATCH_SIZE = 4096
    total_updates = 0
    total_batches = max(len(samples), data_size) // BATCH_SIZE

    # Batch the data
    for i in range(0, len(samples), BATCH_SIZE):
        # AUGMENT_SIZE = 1
        if i + (BATCH_SIZE) >= len(samples):
            continue

        print(f"Unsupervised Training.. {(total_updates * 100) / total_batches}")

        temp_losses = np.array([0.0 for _ in ensemble.ensemble])

        anchors = np.array([data[samples[i + j][0]][0] for j in range(BATCH_SIZE)])

        positives = np.array(
            [
                getRandomTransformation(data[samples[i + j][1]][0]) if samples[i + j][0] != samples[i + j][1] else getRandomTransformation(data[samples[i + j][0]][0])
                for j in range(BATCH_SIZE)
            ]
        )

        negatives = np.array([data[samples[i + j][2]][0] for j in range(BATCH_SIZE)])

        anchors = np.expand_dims(anchors, axis=1)
        positives = np.expand_dims(positives, axis=1)
        negatives = np.expand_dims(negatives, axis=1)

        losses = ensemble.train_batch(anchors, positives, negatives)
        temp_losses += losses

        total_loss += temp_losses
        total_updates += 1

    return total_loss, max(total_updates, 1)

t_1 = time.time()
epochs = 0
loss_history = []
while epochs < 150:
    losses, total_updates = pretraining(sampled_dataset, ensemble, data_cutoff=9999, data_size=(4096 * 8))
    average_loss = losses / total_updates
    locale_loss = sum(average_loss) / len(average_loss)
    loss_history.append(locale_loss)
    loss = (sum(loss_history[-3:]) / 3) if len(loss_history) > 3 else 50
    print(f"Losses: {average_loss}")
    print(f"Epoch {epochs}, loss: {locale_loss}, windowed_loss: {loss}")
    epochs += 1
    ensemble.step_schedulers()
    print(f"LR: {ensemble.get_lr()}")

print(f"Total Pre-training Time: {time.time() - t_1}")

cuda
Adjusting learning rate of group 0 to 3.0000e-04.
Adjusting learning rate of group 0 to 3.0000e-04.
Adjusting learning rate of group 0 to 3.0000e-04.
Unsupervised Training.. 0.0
Unsupervised Training.. 12.5
Unsupervised Training.. 25.0
Unsupervised Training.. 37.5
Unsupervised Training.. 50.0
Unsupervised Training.. 62.5
Unsupervised Training.. 75.0
Losses: [14.23053346 25.89887374 18.78745515]
Epoch 0, loss: 19.638954117184593, windowed_loss: 50
Adjusting learning rate of group 0 to 3.2255e-03.
Adjusting learning rate of group 0 to 3.2255e-03.
Adjusting learning rate of group 0 to 3.2255e-03.
LR: [0.0032255105879601177, 0.0032255105879601177, 0.0032255105879601177]
Unsupervised Training.. 0.0
Unsupervised Training.. 12.5
Unsupervised Training.. 25.0
Unsupervised Training.. 37.5
Unsupervised Training.. 50.0
Unsupervised Training.. 62.5
Unsupervised Training.. 75.0
Losses: [10.30940519 17.10629204 10.44111272]
Epoch 1, loss: 12.618936652228946, windowed_loss: 50
Adjusting learning 

In [140]:
ensemble.save_ensemble(f"../checkpoints/ensembles/{int(time.time())}", full=True)