# word2vec skip-gram model using NCE loss
Author: Chip Huyen<br/>
Jupyter scribe: Jiageng Liu<br/>
Prepared for the class CS 20SI: "TensorFlow for Deep Learning Research"<br/>
[https://cs20si.stanford.edu](cs20si.stanford.edu)<br/>

In [1]:
# Compatibility with Python 2.7
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [2]:
import os 
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import numpy as np
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector

from process_data import process_data

## Constants

In [3]:
VOCAB_SIZE = 50000
BATCH_SIZE = 128
EMBED_SIZE = 128 # dimension of the word embedding vectors
SKIP_WINDOW = 1 # the context window
NUM_SAMPLED = 64    # Number of negative examples to sample.
LEARNING_RATE = 1.0
NUM_TRAIN_STEPS = 10000
SKIP_STEP = 2000 # how many steps to skip before reporting the loss

## word2vec

**Step 0**: read in data

In [4]:
batch_gen = process_data(VOCAB_SIZE, BATCH_SIZE, SKIP_WINDOW)

Dataset ready


**Step 1**: define the placeholders for input and output

In [5]:
with tf.name_scope('data'):
    center_words = tf.placeholder(tf.int32, shape=[BATCH_SIZE], name='center_words')
    target_words = tf.placeholder(tf.int32, shape=[BATCH_SIZE, 1], name='target_words')

**Step 2**: define weights.

In [6]:
with tf.name_scope('embedding_matrix'):
    embed_matrix = tf.Variable(tf.random_uniform([VOCAB_SIZE, EMBED_SIZE], -1.0, 1.0), 
                        name='embed_matrix')

**Step 3**: define the inference<br>
**Step 4**: construct variables for NCE loss

In [7]:
with tf.name_scope('loss'):
    # Step 3: inference
    embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embed')

    # Step 4: NCE loss
    nce_weight = tf.Variable(tf.truncated_normal([VOCAB_SIZE, EMBED_SIZE],
                                                stddev=1.0 / (EMBED_SIZE ** 0.5)), 
                                                name='nce_weight')
    nce_bias = tf.Variable(tf.zeros([VOCAB_SIZE]), name='nce_bias')

    # define loss function to be NCE loss function
    loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight, 
                                        biases=nce_bias, 
                                        labels=target_words, 
                                        inputs=embed, 
                                        num_sampled=NUM_SAMPLED, 
                                        num_classes=VOCAB_SIZE), name='loss')

**Step 5**: define the opitimizer

In [8]:
optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)

**Step 6**: visualize the graph

In [9]:
from show_tf_graph import show_graph
show_graph(tf.get_default_graph())

**Step 7**: train our model

In [10]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps
    for index in range(NUM_TRAIN_STEPS):
        centers, targets = next(batch_gen)
        loss_batch, _ = sess.run([loss, optimizer], 
                                feed_dict={center_words: centers, target_words: targets})
        total_loss += loss_batch
        if (index + 1) % SKIP_STEP == 0:
            print('Average loss at step {}: {:5.1f}'.format(index, total_loss / SKIP_STEP))
            total_loss = 0.0

Average loss at step 1999: 114.3
Average loss at step 3999:  52.3
Average loss at step 5999:  33.2
Average loss at step 7999:  23.5
Average loss at step 9999:  17.8
