In [7]:
from train_and_eval import train
from create_task import *
from ESBN_reimplementation import ESBN
import numpy as np
import tensorflow as tf
from PIL import Image
import tensorboard
%load_ext tensorboard

In this notebook, we run the relevant code for replicating the original findings of (Webb 2021) and implementing a relational bottleneck as a seperate processing stream in deep language models.
Firstly, we will run the training script for the replication and evaluate the performance of both the esbn, a transformer and the abstractor (Webb 2023).
Secondly, we will integrate the abstractor and the esbn into a small language model and compare the performance with a transformer of similar size.

In [2]:
from create_task import create_task

In [3]:
# Set experiment parameters
m_holdout = 0
n_shapes = 10
y_dim = 4


# Set train parameters
batch_size = 32
train_set_size = 1000
train_proportion = 0.95
epochs = 5
lr = 5e-4
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

# Set test parameters
test_batch_size = 100
test_set_size = 1000

# Randomly assigns objects to training or test set
all_shapes = np.arange(n_shapes)
np.random.shuffle(all_shapes)
if m_holdout > 0:
    train_shapes = all_shapes[m_holdout:]
    test_shapes = all_shapes[:m_holdout]
else:
    train_shapes = all_shapes
    test_shapes = all_shapes


# Generate training and test sets
train_set, test_set = create_task(train_shapes, test_shapes, train_set_size, test_set_size, train_proportion, m_holdout, n_shapes)
train_data = tf.data.Dataset.from_tensor_slices(train_set).batch(batch_size).prefetch(20)
test_data = tf.data.Dataset.from_tensor_slices(test_set).batch(batch_size).prefetch(20)

# Load images
all_imgs = []
for i in range(n_shapes):
    img_fname = "../imgs/" + str(i) + ".png"
    img = tf.convert_to_tensor(np.array(Image.open(img_fname)), dtype=tf.float32) / 255.
    all_imgs.append(img)
all_imgs = tf.stack(all_imgs, 0)

In [4]:
# Remove previous logs
%rm -rf ./logs/
# Create model
model = ESBN(y_dim)
# Train and evaluate
train(model, train_data, test_data, all_imgs, optimizer, loss_fn, epochs)

2024-02-23 15:05:00.142641: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int64 and shape [1000]
	 [[{{node Placeholder/_1}}]]
32it [00:07,  4.01it/s]
2024-02-23 15:05:08.159328: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int64 and shape [1000]
	 [[{{node Placeholder/_1}}]]
32it [00:07,  4.41it/s]
32it [00:06,  4.71it/s]
32it [00:06,  4.88it/s]
32it [00:06,  4.86it/s]


In [10]:
# View results in tensorboard
%tensorboard --logdir ../logs/gradient_tape