In [35]:
import numpy as np

# Constants
P = 15
D = 10  # You can set this to an appropriate value
N = 5   # You can set this to an appropriate value
K = 3   # You can set this to an appropriate value
L = 2   # You can set this to an appropriate value
epsilon = 0.1

# Function to sample D-dimensional vectors from a normal distribution
def sample_mu():
    return np.random.normal(0, 1/D, D)

# Function to sample items using the given formula for x˜i
def sample_item(mu, epsilon):
    eta = np.random.normal(0, 1, D)
    x_tilde = mu + epsilon * eta / np.sqrt(1 + epsilon**2)
    return x_tilde

# Generate Gaussian Mixture Model
classes_mu = [sample_mu() for _ in range(K)]

# Function to generate a training sequence
def generate_sequence():
    sequence = []
    target_position = 2*N
    target_class = np.random.randint(0, K)
    class_list = [0] * ((2 * N) + 1)

    class_list[0] = target_class
    for i in range(N):
      class_list[2*i] = (np.random.randint(0,K))

    # Ensure each class is assigned to a unique label
    class_labels = np.random.choice(range(L), K)
    label_contents = [sample_mu() for _ in range(L)]  # Contents of the labels

    for i in range(2*N):
        is_item_position = i % 2 == 0
        is_label_position = not is_item_position

        if is_item_position:
            item_xi = sample_item(classes_mu[class_list[i]], epsilon)
            sequence.append(np.concatenate([np.eye(P)[i], item_xi]))
        else:
              # Use the selected labels
            label_index = class_labels[class_list[i]]
            label = label_contents[label_index]
            sequence.append(np.concatenate([np.eye(P)[int(i)], label]))

    target_item = sample_item(classes_mu[target_class], epsilon)
    sequence.append(np.concatenate([np.eye(P)[int(target_position)], target_item]))
    label_index = class_labels[target_class]
    correct_label = label_contents[label_index]
    return sequence, correct_label, target_class

# Generate training dataset
num_sequences = 1000  # You can set this to an appropriate value
dataset = []

for _ in range(num_sequences):
    sequence, correct_label, target_class = generate_sequence()
    dataset.append((sequence, correct_label, target_class))

# Print an example sequence
example_sequence,example_correct_label, example_target_class = dataset[0]
print("Example Sequence:")
for i, pos_encoding_item in enumerate(example_sequence):
    print(f"Position {i}: {pos_encoding_item}")
print("Correct label", example_correct_label)
print("Target Class:", example_target_class)


Example Sequence:
Position 0: [ 1.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.         -0.22797864  0.10712151 -0.01286809
 -0.246122   -0.00425965 -0.01427579  0.06962517 -0.18549074 -0.06131528
  0.14522681]
Position 1: [ 0.          1.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.17805142 -0.01783496  0.14125632
 -0.02222502 -0.08435749  0.08617444  0.03138167  0.02682666 -0.10807492
 -0.02971255]
Position 2: [ 0.          0.          1.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.01021196  0.15218549 -0.09369412
 -0.17001145  0.02345362 -0.073257   -0.08662129 -0.2354042   0.0622107
 -0.02675362]
Position 3: [ 0.          0.          0.          1.          0.          0.


In [36]:
#putting the data in the correct format
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

# Convert sequences to a single NumPy array
sequences_array = np.array([seq for seq, _, _ in dataset], dtype=np.float32)

# Convert the NumPy array to a PyTorch tensor
sequence_tensor = torch.tensor(sequences_array)

# Similarly, convert correct labels and target classes to NumPy arrays
correct_labels_array = np.array([label for _, label, _ in dataset], dtype=np.float32)
correct_labels_tensor = torch.tensor(correct_labels_array)

target_classes_array = np.array([target for _, _, target in dataset], dtype=np.float32)
target_classes_tensor = torch.tensor(target_classes_array)

# Define the dataset
dataset = TensorDataset(sequence_tensor, correct_labels_tensor, target_classes_tensor)

In [51]:
#n_ctx by d_model = 11 by 25   (here d_model is 25 quite small)
sequence = dataset[0][0]
sequence.shape

torch.Size([11, 25])

To do next:
1) Calculate and test attention patterns
2) classifier as per the paper
3) pull the model together
4) initialise and train the model