# Generate Random Object Sorting Tasks

In [1]:
import numpy as np

In [2]:
def create_sorting_dataset(vocab_size, dim, seqs_length, n_seqs):

    # generate random features for each object
    objects = np.random.normal(size=(vocab_size, dim))

    # generate random permutations of length `seqs_length` out of `vocab_size`
    seqs = np.array([np.random.choice(range(vocab_size), size=seqs_length, replace=False) for _ in range(n_seqs)])
    
    # remove duplicate seqs (although very unlikely)
    _, unique_seq_idxs = np.unique(seqs, axis=0, return_inverse=True)
    seqs = seqs[unique_seq_idxs]

    # create object sequences
    object_seqs = objects[seqs]
    
    sorted_seqs = np.sort(seqs, axis=1)

    arg_sort = np.argsort(seqs, axis=1)

    
    # add `START_TOKEN` to beginning of sorting 
    start_token = seqs_length
    start_tokens = np.array([START_TOKEN] * len(arg_sort))[np.newaxis].T
    arg_sort = np.hstack([start_tokens, arg_sort])

    return objects, seqs, sorted_seqs, arg_sort, object_seqs, start_token

## Task 1

In [3]:
# dataset 1
vocab_size = 64
dim = 32
seqs_length = 10
START_TOKEN = seqs_length
n_seqs = 10_0000

objects, seqs, sorted_seqs, arg_sort, object_seqs, start_token = create_sorting_dataset(vocab_size, dim, seqs_length, n_seqs)

target = arg_sort[:, :-1]
labels = arg_sort[:, 1:]

data = {
    'objects': objects, 'seqs': seqs, 'sorted_seqs': sorted_seqs, 'arg_sort': arg_sort,
    'object_seqs': object_seqs, 'target': target, 'labels': labels, 'start_token': start_token
    }

np.save('object_sorting_datasets/task1_object_sort_dataset.npy', data)

## Task 2

In [4]:
# dataset 2 (same paramters, just re-generate objects randomly)
vocab_size = 64
dim = 32
seqs_length = 10
START_TOKEN = seqs_length
n_seqs = 10_0000

objects, seqs, sorted_seqs, arg_sort, object_seqs, start_token = create_sorting_dataset(vocab_size, dim, seqs_length, n_seqs)

target = arg_sort[:, :-1]
labels = arg_sort[:, 1:]

data = {
    'objects': objects, 'seqs': seqs, 'sorted_seqs': sorted_seqs, 'arg_sort': arg_sort,
    'object_seqs': object_seqs, 'target': target, 'labels': labels, 'start_token': start_token
    }

np.save('object_sorting_datasets/task2_object_sort_dataset.npy', data)

## Task 2 Reshuffled

In [5]:
data = np.load('object_sorting_datasets/task2_object_sort_dataset.npy', allow_pickle=True).item()
objects = data['objects']
seqs = data['seqs']

reshuffle = np.random.choice(64, size=64, replace=False)
objects_ = objects[reshuffle]
object_seqs_ = objects_[seqs]

data['reshuffle'] = reshuffle
data['objects'] = objects_
data['object_seqs'] = object_seqs_

np.save('object_sorting_datasets/task2_reshuffled_object_sort_dataset.npy', data)