# Profiling a model
In this notebook, we'll analyze the performance of a LSTM layer. LSTM cells in TensorFlow can have varying performance depending on the implementation.

For a comprehensive benchmark, see https://returnn.readthedocs.io/en/latest/tf_lstm_benchmark.html

In [1]:
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.python.client import timeline

import numpy as np


In [2]:
def run_and_trace(tensor, trace_path):
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        result = sess.run(tensor, options=options,
                          run_metadata=run_metadata)

        generated_timeline = timeline.Timeline(run_metadata.step_stats)
        trace_content = generated_timeline.generate_chrome_trace_format()
        with open(trace_path, 'w') as f:
            f.write(trace_content)
        return result

In [4]:
num_units = 1280

X = tf.constant(np.random.normal(size=([1, 1024, 128])).astype(np.float32))
lstm = tf.contrib.rnn.LSTMBlockCell(num_units, forget_bias=1.0)
outputs, _ = tf.nn.dynamic_rnn(lstm, X, dtype=tf.float32)

result1 = run_and_trace(outputs, 'lstm_trace.json')

In [5]:
lstm_fused = tf.contrib.rnn.LSTMBlockFusedCell(num_units, forget_bias=1.0)
outputs_fused, _ = lstm_fused(X, dtype=tf.float32)
result2 = run_and_trace(outputs_fused, 'lstm_fused_trace.json')