# FastText model to TensorBoard

Install dependencies

In [None]:
import sys
import re
sys.path.append("./../")

In [None]:
pip install fasttext

Set variables

In [None]:
output_model_file = "/home/jovyan/work/resources/data_ignored/fasttext_rad.bin"

In [None]:
pip install -q tensorflow

In [None]:
# import statements
# from pathlib import PurePath
import os

import fasttext
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops

from tensorboard.plugins import projector
from tensorboard.plugins.projector import ProjectorConfig

In [None]:
# load pre-trained fasttext model
model = fasttext.load_model(output_model_file)

In [None]:
for i, w in enumerate(model.get_words()):
    print(w)
    if i > 4:
        break

In [None]:
#hide_output
# number of words in the dataset
VOCAB_SIZE = len(model.get_words())

# size of the dimension of each word vector
EMBEDDING_DIM = len(model.get_word_vector(w))

# 2D numpy array initialised to store words with their vector representation
embed = np.zeros((VOCAB_SIZE, EMBEDDING_DIM))
embed.shape

In [None]:
# store the vector representation of each word in the 2D numpy array
for i, word in enumerate(model.get_words()):
    embed[i] = model.get_word_vector(word)
embed

In [None]:
# path to store the words
tsv_file_path = "tensorboard/metadata.tsv"
os.makedirs("tensorboard", exist_ok=True)

In [None]:
#hide output
with open(tsv_file_path, "w+", encoding="utf-8") as f:
    for i, word in enumerate(model.get_words()):
        f.write(word + "\n")  # write the words to an external file
embed.shape

In [None]:
ops.reset_default_graph()  # clearing the default graph stack


def register_embedding(
    embedding_tensor_name: str, meta_data_fname: str, log_dir: str,
) -> None:

    """
    Configuring the projector to be read by the tensorboard.
    
    Args:
    embedding_tensor_name(str): embeddings file name
    meta_data_fname(str): metadata file name
    log_dir(str): folder where tensorboard files and the metadata file are saved
    
    Returns:
    None    
    
    """
    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = embedding_tensor_name
    embedding.metadata_path = meta_data_fname
    projector.visualize_embeddings(
        log_dir, config
    )  # storing the configuration files of projector where tensorboard files are saved

In [None]:
def save_labels_tsv(labels: list, filepath: str, log_dir: str,) -> None:

    """
    Storing the vocabulary of words in the dataset to a file
    
    Args:
    labels: vocabulary i.e. words in the dataset
    filepath: metadata file name
    log_dir: "folder where tensorboard files and projector files are saved
    
    Returns:
    None  
    
    """

    with open(os.path.join(log_dir, filepath), "w") as f:
        for label in labels:
            f.write("{}\n".format(label))

In [None]:
# folder which will contain all the tensorboard log files
LOG_DIR = '/home/jovyan/work/resources/data_ignored/tb2files'
os.makedirs(LOG_DIR, exist_ok=True)

# Labels i.e. the words in the dataset will be stored in this file
META_DATA_FNAME = "meta.tsv"

# name of the file which will have the embeddings stored
EMBEDDINGS_TENSOR_NAME = "embeddings"

# path for checkpoint of the saved embeddings
EMBEDDINGS_FPATH = os.path.join(LOG_DIR, EMBEDDINGS_TENSOR_NAME + ".ckpt")
STEP = 0


x = embed  # array containing the embeddings
y = model.get_words()  # list containing the vocabulary
register_embedding(EMBEDDINGS_TENSOR_NAME, META_DATA_FNAME, LOG_DIR)
save_labels_tsv(y, META_DATA_FNAME, LOG_DIR)

In [None]:
tensor_embeddings = tf.Variable(
    x, name=EMBEDDINGS_TENSOR_NAME
)  # creation of the tensorflow variable, x: array which contains the embeddings,
# name: name of the file which will have the embeddings stored

In [None]:
#hide_output

saver = tf.compat.v1.train.Saver(
    [tensor_embeddings]
)  # Tensorflow variable passed as argument for saver object to be initialised
saver.save(
    sess=None, global_step=STEP, save_path=EMBEDDINGS_FPATH
)  # saving the checkpoint for the embedding files

In [None]:
pip install tensorboard

In [None]:
%load_ext tensorboard
%tensorboard --logdir {LOG_DIR} --host 0.0.0.0 --port 6006

In [None]:
# !kill 123