In [44]:
import torch
import time
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import random
from data.swarmset import ContinuingDataset, SwarmDataset
from networks.embedding import NoveltyEmbedding
from networks.archive import DataAggregationArchive
from torchvision.transforms import RandomResizedCrop, RandomHorizontalFlip, RandomVerticalFlip
from networks.ensemble import Ensemble
import numpy as np
from scipy import ndimage
import random
from sklearn.manifold import TSNE
import matplotlib
import matplotlib.pyplot as plt
import cv2

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

def CSVLineToVec(line):
    line_list = line.strip().replace("\n", "").split(",")
    float_list = []
    for i in line_list:
        float_list.append(float(i))
    float_list = np.array(float_list)
    return float_list

def resizeInput(X, w=200):
    frame = X.astype(np.uint8)
    resized = cv2.resize(frame, dsize=(w, w), interpolation=cv2.INTER_AREA)
    return resized

def translate(img, offset=(10, 10)):
    h, w = img.shape
    xoff, yoff = offset
    if xoff < 0: xpadding = (0, -xoff)
    else: xpadding = (xoff, 0)
    if yoff < 0: ypadding = (0, -yoff)
    else: ypadding = (yoff, 0)
    img = np.pad(img, (xpadding, ypadding))

    if xoff >= 0 and yoff >= 0:
        return img[:w, :w]
    elif xoff < 0 and yoff >= 0:
        return img[-w:, :w]
    elif xoff >= 0 and yoff < 0:
        return img[:w, -w:]
    return img[-w:, -w:]

def zoom_at(img, zoom, coord=None):
    # Adapted from https://stackoverflow.com/questions/69050464/zoom-into-image-with-opencv
    h, w = [ zoom * i for i in img.shape ]
    if coord is None: cx, cy = w/2, h/2
    else: cx, cy = [ zoom*c for c in coord ]
    img = cv2.resize( img, (0, 0), fx=zoom, fy=zoom)
    img = img[ int(round(cy - h/zoom * .5)) : int(round(cy + h/zoom * .5)),
               int(round(cx - w/zoom * .5)) : int(round(cx + w/zoom * .5))]
    return img

def get_color_distortion(X, s=3.0):
    X = X + s * np.random.randn(X.shape[0], X.shape[1])
    return X

def getRandomTransformation(image, k=2):
    transformation_choices = ["Rotation", "Blur", "Zoom", "Translate", "Distort", "ResizedCrop"]
    # weights = [0.4, 0.3, 0.0, 0.2]
    # weights = [1.0, 0.0, 0.0, 0.0]
    # choices = random.choices(transformation_choices, weights, k=k)
    choices = ["ResizedCrop", "Rotation"]
    if "ResizedCrop" in choices:
        tmp = torch.tensor(image).unsqueeze(0)
        flipper = RandomHorizontalFlip(0.5)
        cropper = RandomResizedCrop(size=(50,50), scale=(0.6, 1.0), ratio=(1.0, 1.0))
        image = flipper(cropper(tmp))
        image = image.squeeze(0).numpy()
    if "Rotation" in choices:
        theta = random.choice([90, 180, 270])
        image = ndimage.rotate(image, theta)
    if "Blur" in choices:
        blur = random.choice([0.5, 1.0, 1.5])
        image = ndimage.gaussian_filter(image, sigma=blur)
    if "Zoom" in choices:
        # zoom = random.choice([1.06, 1.12, 1.18])
        padding = random.choice([10])
        padded = np.pad(image, padding, mode='constant')
        image = resizeInput(padded, 50)
    if "Translate" in choices:
        offsets = [i for i in range(-10, 10, 2)]
        offset = (random.choice(offsets), random.choice(offsets))
        # offset = (2, 2)
        image = translate(image, offset)
    if "Distort" in choices:
        strength = random.choice([3.0, 5.0, 10.0])
        image = get_color_distortion(image, s=strength)
    if "Flip" in choices:
        tmp = torch.tensor(image).unsqueeze(0)
        flipper = RandomHorizontalFlip(1.0)
        image = flipper(tmp)
        image = image.squeeze(0).numpy()
    return image

In [51]:
"""
Generate Triplets based off of labeled classes (IID_TRIPLETS) or triplets based off ensemble Queries
"""
HEURISTIC = False
IID_TRIPLETS = True
TRUTH_FILE = "original-hand-labeled-classes.txt" if not HEURISTIC else "NONE"
OUT = "../data/oracle"
classes = [-1 for i in range(200)]
with open(os.path.join(OUT, TRUTH_FILE), "r") as f:
    lines = f.readlines()
    for i, line in enumerate(lines):
        if i > len(classes) - 1:
            break
        triplet = CSVLineToVec(line)
        classes[int(triplet[0])] = int(triplet[1])

triplets = []

if IID_TRIPLETS:
    for i, i_c in enumerate(classes):
        if i_c == 0:
            continue
        for j, j_c in enumerate(classes):
            if j_c != i_c:
                continue
            for k, k_c in enumerate(classes):
                if k_c == i_c or k_c == j_c:
                    continue
                triplets.append((i, j, k))

# Else, use an ensemble to create the triplets.
else:
    print("No Implementation Yet")

print(len(triplets))

127728


In [46]:
for j in triplets[:15]:
    print(j, (classes[j[0]], classes[j[1]], classes[j[2]]))

(1, 1, 0) (1, 1, 0)
(1, 1, 2) (1, 1, 0)
(1, 1, 3) (1, 1, 5)
(1, 1, 4) (1, 1, 0)
(1, 1, 5) (1, 1, 0)
(1, 1, 6) (1, 1, 0)
(1, 1, 7) (1, 1, 0)
(1, 1, 8) (1, 1, 0)
(1, 1, 9) (1, 1, 0)
(1, 1, 10) (1, 1, 0)
(1, 1, 11) (1, 1, 0)
(1, 1, 12) (1, 1, 0)
(1, 1, 14) (1, 1, 4)
(1, 1, 15) (1, 1, 0)
(1, 1, 16) (1, 1, 0)


In [52]:
loss = 50
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
ensemble = Ensemble(size=3, output_size=5, lr=4e-2, weight_decay=0, new_model=True, manual_schedulers=True, init="Random")
ensemble.load_ensemble("../checkpoints/ensembles/01-20-23-baseline", full=True)
sampled_dataset = SwarmDataset("../data/full-mini", rank=0)

def pretraining(data, ensemble, data_cutoff=None, data_size=500):
    if data_cutoff is None:
        data_cutoff = len(data) - 1
    # np.random.seed(0)
    samples = triplets
    total_loss = np.array([0.0 for i in range(len(ensemble.ensemble))])

    BATCH_SIZE = 4096
    total_updates = 0
    total_batches = max(len(samples), data_size) // BATCH_SIZE

    # Batch the data
    for i in range(0, len(samples), BATCH_SIZE):
        # AUGMENT_SIZE = 1
        if i + (BATCH_SIZE) >= len(samples):
            continue

        print(f"Unsupervised Training.. {(total_updates * 100) / total_batches}")

        temp_losses = np.array([0.0 for _ in ensemble.ensemble])

        anchors = np.array([data[samples[i + j][0]][0] for j in range(BATCH_SIZE)])
        positives = np.array([data[samples[i + j][1]][0] for j in range(BATCH_SIZE)])
        negatives = np.array([data[samples[i + j][2]][0] for j in range(BATCH_SIZE)])

        anchors = np.expand_dims(anchors, axis=1)
        positives = np.expand_dims(positives, axis=1)
        negatives = np.expand_dims(negatives, axis=1)

        losses = ensemble.train_batch(anchors, positives, negatives)
        temp_losses += losses

        total_loss += temp_losses
        total_updates += 1

    return total_loss, max(total_updates, 1)

t_1 = time.time()
epochs = 0
loss_history = []
while epochs < 30:
    losses, total_updates = pretraining(sampled_dataset, ensemble, data_cutoff=9999, data_size=(4096 * 3))
    average_loss = losses / total_updates
    locale_loss = sum(average_loss) / len(average_loss)
    loss_history.append(locale_loss)
    loss = (sum(loss_history[-3:]) / 3) if len(loss_history) > 3 else 50
    print(f"Losses: {average_loss}")
    print(f"Epoch {epochs}, loss: {locale_loss}, windowed_loss: {loss}")
    epochs += 1
    ensemble.step_schedulers()

print(f"Total Pre-training Time: {time.time() - t_1}")

cuda
Adjusting learning rate of group 0 to 4.0000e-02.
Adjusting learning rate of group 0 to 4.0000e-02.
Adjusting learning rate of group 0 to 4.0000e-02.
Unsupervised Training.. 0.0
Unsupervised Training.. 3.225806451612903
Unsupervised Training.. 6.451612903225806
Unsupervised Training.. 9.67741935483871
Unsupervised Training.. 12.903225806451612
Unsupervised Training.. 16.129032258064516
Unsupervised Training.. 19.35483870967742
Unsupervised Training.. 22.580645161290324
Unsupervised Training.. 25.806451612903224
Unsupervised Training.. 29.032258064516128
Unsupervised Training.. 32.25806451612903
Unsupervised Training.. 35.483870967741936
Unsupervised Training.. 38.70967741935484
Unsupervised Training.. 41.935483870967744
Unsupervised Training.. 45.16129032258065
Unsupervised Training.. 48.38709677419355
Unsupervised Training.. 51.61290322580645
Unsupervised Training.. 54.83870967741935
Unsupervised Training.. 58.064516129032256
Unsupervised Training.. 61.29032258064516
Unsupervised

In [53]:
ensemble.save_ensemble(f"../checkpoints/ensembles/{int(time.time())}", full=True)