<a href="https://colab.research.google.com/github/Santosh-Gupta/NaturalLanguageRecommendations/blob/srihari-dev/notebooks/model_debug.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers --quiet

In [1]:
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Lambda, Dense, Activation, Concatenate
from transformers import TFBertModel
print('TensorFlow:', tf.__version__)

TensorFlow: 2.1.0-rc0


In [2]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

REPLICAS:  1


In [3]:
batch_size = 8
embedding_dim = 512
autotune = tf.data.experimental.AUTOTUNE

In [4]:
def get_random_title():
    return tf.random.uniform(shape=[512], maxval=200, dtype=tf.int32)

def get_random_citation():
    vector = tf.random.uniform(shape=[embedding_dim], minval=-1, maxval=1, dtype=tf.float32)
    normed_vector = tf.math.l2_normalize(vector)
    return normed_vector

def generate_sample():
    title = get_random_title()
    posCitations = get_random_citation()
    return title, posCitations

def create_labels(title, posCitations):
    batch_size = tf.shape(title)[0]
    return (title, posCitations), tf.eye(batch_size, batch_size)

In [5]:
def create_model():
    title = tf.keras.Input(shape=(512,), dtype=tf.int32) # from bert encoder
    citation = tf.keras.Input(shape=(512,)) # normalized word2vec outputs
    
    bert_model = TFBertModel.from_pretrained('bert-base-uncased')
    titleOut = bert_model(title)
    titleOutMean = tf.reduce_mean(titleOut[0], axis=1)
    titleOutSim = Dense(units=embedding_dim, activation='tanh', name='DenseTitle')(titleOutMean)

    citationSim = Dense(units=embedding_dim, activation='tanh', name='DenseCitation')(citation)

    # Get dot product of each of title x citation combinations
    dotProduct = tf.reduce_sum(tf.multiply(titleOutSim[:, tf.newaxis, :], citationSim), axis=-1)
    
    # Softmax across rows to get sum == 1 for each row
    probs = tf.nn.softmax(dotProduct, axis=-1)
    return tf.keras.Model(inputs=[title, citation], outputs=[probs])

In [6]:
with strategy.scope():
    model = create_model()
    model.compile(loss=tf.losses.CategoricalCrossentropy(from_logits=False),
                optimizer=tf.optimizers.Adam())

In [7]:
with strategy.scope():
    dataset = tf.data.Dataset.range(5000)
    dataset = dataset.shuffle(512)
    dataset = dataset.map(lambda _ : generate_sample(), num_parallel_calls=autotune)
    dataset = dataset.batch(batch_size, drop_remainder=False)
    dataset = dataset.map(create_labels, num_parallel_calls=autotune)
    dataset = dataset.prefetch(autotune)
    print(tf.data.experimental.get_structure(dataset))

((TensorSpec(shape=(None, 512), dtype=tf.int32, name=None), TensorSpec(shape=(None, 512), dtype=tf.float32, name=None)), TensorSpec(shape=(None, None), dtype=tf.float32, name=None))


In [8]:
model.fit(dataset, epochs=5, steps_per_epoch=100)

Train for 100 steps
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fbd887d0cf8>