<a href="https://colab.research.google.com/github/Santosh-Gupta/NaturalLanguageRecommendations/blob/srihari-dev/notebooks/model_debug.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers --quiet
%tensorflow_version 2.x

UsageError: Line magic function `%tensorflow_version` not found.


In [1]:
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Lambda, Dense, Activation, Concatenate
from transformers import TFBertModel
print('TensorFlow:', tf.__version__)

TensorFlow: 2.1.0-rc0


In [2]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.MirroredStrategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
REPLICAS:  2


In [3]:
batch_size = 8
embedding_dim = 512
autotune = tf.data.experimental.AUTOTUNE

In [4]:
def get_random_title():
    return tf.random.uniform(shape=[512], maxval=200, dtype=tf.int32)

def get_random_citation():
    vector = tf.random.uniform(shape=[embedding_dim], minval=-1, maxval=1, dtype=tf.float32)
    normed_vector = tf.math.l2_normalize(vector)
    return normed_vector

def generate_sample():
    title = get_random_title()
    posCitations = get_random_citation()
    return (title, posCitations), tf.constant(1., dtype=tf.float32)

In [5]:
@tf.function
def loss_fn(_, probs):
    bs = tf.shape(probs)[0]
    labels = tf.eye(bs, bs)
    return tf.losses.categorical_crossentropy(labels, probs)
    
def create_model():
    title = tf.keras.Input(shape=(512,), dtype=tf.int32) # from bert encoder
    citation = tf.keras.Input(shape=(512,)) # normalized word2vec outputs
    
    bert_model = TFBertModel.from_pretrained('bert-base-uncased')
    titleOut = bert_model(title)
    titleOutMean = tf.reduce_mean(titleOut[0], axis=1)
    titleOutSim = Dense(units=embedding_dim, activation='tanh', name='DenseTitle')(titleOutMean)

    citationSim = Dense(units=embedding_dim, activation='tanh', name='DenseCitation')(citation)

    # Get dot product of each of title x citation combinations
    dotProduct = tf.reduce_sum(tf.multiply(titleOutSim[:, None, :], citationSim), axis=-1)
    probs = tf.nn.softmax(dotProduct, axis=-1)
    model = tf.keras.Model(inputs=[title, citation], outputs=[probs])
    return model

In [6]:
with strategy.scope():
    model = create_model()
    model.compile(loss=loss_fn, optimizer=tf.optimizers.Adam())

In [7]:
with strategy.scope():
    dataset = tf.data.Dataset.range(5000)
    dataset = dataset.shuffle(512)
    dataset = dataset.map(lambda _ : generate_sample(), num_parallel_calls=autotune)
    dataset = dataset.batch(batch_size, drop_remainder=False)
    dataset = dataset.prefetch(autotune)
    print(tf.data.experimental.get_structure(dataset))

((TensorSpec(shape=(None, 512), dtype=tf.int32, name=None), TensorSpec(shape=(None, 512), dtype=tf.float32, name=None)), TensorSpec(shape=(None,), dtype=tf.float32, name=None))


In [8]:
for batch in dataset.take(1):
    (title, citations), labels = batch
    outputs = model((title, citations), training=True)
outputs.shape

TensorShape([8, 8])

In [9]:
model.fit(dataset, epochs=5, steps_per_epoch=100)

Train for 100 steps
Epoch 1/5
INFO:tensorflow:batch_all_reduce: 198 all-reduces with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1').
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1').
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1').
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/

KeyboardInterrupt: 