In [1]:
from train_and_eval import train
from create_task import *
from ESBN_reimplementation import ESBN
import numpy as np
import tensorflow as tf
from PIL import Image
import tensorboard
import datetime
%load_ext tensorboard

2024-02-23 15:56:33.471407: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.




In this notebook, we run the relevant code for replicating the original findings of (Webb 2021) and implementing a relational bottleneck as a seperate processing stream in deep language models.
Firstly, we will run the training script for the replication and evaluate the performance of both the esbn, a transformer and the abstractor (Webb 2023).
Secondly, we will integrate the abstractor and the esbn into a small language model and compare the performance with a transformer of similar size.

In [2]:
# Set experiment parameters
m_holdout = 95
n_shapes = 100
y_dim = 4


# Set train parameters
batch_size = 32
train_set_size = 360
train_proportion = 0.95
epochs = 150
lr = 5e-4
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

# Set test parameters
test_batch_size = 100
test_set_size = 100

# Randomly assigns objects to training or test set
all_shapes = np.arange(n_shapes)
np.random.shuffle(all_shapes)
if m_holdout > 0:
    train_shapes = all_shapes[m_holdout:]
    test_shapes = all_shapes[:m_holdout]
else:
    train_shapes = all_shapes
    test_shapes = all_shapes


# Generate training and test sets
train_set, test_set = create_task(train_shapes, test_shapes, train_set_size, test_set_size, train_proportion, m_holdout, n_shapes)
train_data = tf.data.Dataset.from_tensor_slices(train_set).batch(batch_size).prefetch(20)
test_data = tf.data.Dataset.from_tensor_slices(test_set).batch(batch_size).prefetch(20)

# Load images
all_imgs = []
for i in range(n_shapes):
    img_fname = "../imgs/" + str(i) + ".png"
    img = tf.convert_to_tensor(np.array(Image.open(img_fname)), dtype=tf.float32) / 255.
    all_imgs.append(img)
all_imgs = tf.stack(all_imgs, 0)

In [3]:
# Create model
model = ESBN(y_dim)

train(model, train_data, test_data, all_imgs, optimizer, loss_fn, epochs)

Epoch: 0:   0%|          | 0/12 [00:00<?, ?it/s]2024-02-23 15:57:00.730449: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int64 and shape [360]
	 [[{{node Placeholder/_1}}]]
Epoch: 0: 100%|██████████| 12/12 [00:03<00:00,  3.93it/s]
2024-02-23 15:57:03.789700: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int64 and shape [100]
	 [[{{node Placeholder/_1}}]]
Epoch: 1: 100%|██████████| 12/12 [00:02<00:00,  4.08it/s]
Epoch: 2: 100%|██████████| 12/12 [00:03<00:00,  3.58it/s]
Epoch: 3: 100%|██████████| 12/12 [00:03<00:00,  3.75it/s]
Epoch: 4: 100%|██████████| 12/12 [00