# Example notebook 1: visualize word embeddings

In [1]:
import os
from zeugma import GloVeTransformer

# First download embeddings, if they are already 
# there it will be automatically skipped
GloVeTransformer.download_embeddings()  

# load embedding transformer
embedding_transformer = GloVeTransformer()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Create the embeddings matrix for the words present in a given corpus

In [2]:
from zeugma import TextsToSequences

# Define a corpus to get a list of relevant words
corpus = ['love like enjoy happy', 'hate anger unhappy']

tts = TextsToSequences()
tts.fit(corpus)
word_index = tts.word_index

def create_embedding_matrix(word_index, embedding_transformer):
    """ Prepare the embedding matrix """
    embedding_dim = embedding_transformer.transform(['the']).shape[1]
    embedding_matrix = np.zeros((len(word_index)+1, embedding_dim))
    for word, i in word_index.items():
        embedding_matrix[i] = embedding_transformer.transform([word])[0]
    return embedding_matrix

embedding_matrix = create_embedding_matrix(word_index, embedding_transformer)

## Setup tensorboard configuration

In [3]:
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector

# Create the log directory
LOG_DIR = os.path.join(os.environ['PWD'], 'logs')
if not os.path.exists(LOG_DIR):
    os.mkdir(LOG_DIR)

# Write word names in a file for tensorboard display
with open(os.path.join(LOG_DIR, 'metadata.tsv'), 'w') as f:
    f.write('Word\n' + '\n'.join(word_index.keys()))
    
# Create a tensorflow variable from embedding matrix
embedding_var = tf.Variable(embedding_matrix)

# Initialise tensorflow session
init = tf.global_variables_initializer()
session = tf.Session()
session.run(init)

# Save embeddings in model.ckpt
saver = tf.train.Saver()
saver.save(session, os.path.join("logs", "model.ckpt"))

# Add metadata (i.e. words here) to the embeddings points
summary_writer = tf.summary.FileWriter(LOG_DIR)
config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name
embedding.metadata_path = os.path.join(LOG_DIR, 'metadata.tsv')

projector.visualize_embeddings(summary_writer, config)

## Run tensorboard locally

In [5]:
# Run tensorboard to visualize the embeddings in your
# browser on port 6006: http://localhost:6006
# Navigate to the 'Projector' tab to visualize the embeddings
!tensorboard --logdir logs/ --host 127.0.0.1

  from ._conv import register_converters as _register_converters
TensorBoard 1.6.0 at http://127.0.0.1:6006 (Press CTRL+C to quit)
^C
