# Lecture 16: 2023-30-03 Vector Semantics II (cont.)

##  Lecture overview

- CBOW and Skip-gram analysis
- Using vectors to analyze sentence or document similarity

## CBOW and Skip-gram analysis

<img src="./images/mikolov.png" width="900" height="500" />

### CBOW



```python

import tensorflow as tf
from tqdm import tqdm

# Number of iterations to train the model
ITERATIONS = ...
# Tokenized text size
tokenized_text_size = ...
# Sliding window size for context words
WINDOW_SIZE = ...

# Start of the main training loop
for iter in tqdm(range(ITERATIONS)):
    # Variable to store the cumulative loss per epoch
    loss_per_epoch = 0

    # Sliding window over the tokenized text
    for start in range(tokenized_text_size - WINDOW_SIZE):
        # Extracting the window of words as indices
        indices = text_as_int[start:start + WINDOW_SIZE]

        # GradientTape is used to record the gradients during the forward pass
        with tf.GradientTape() as tape:
            # Variable to store the combined context vector
            combined_context = 0

            # Create a context slider by iterating through the indices
            for count, index in enumerate(indices):
                if count != WINDOW_SIZE // 2:
                    # Add the context word's vector to the combined context vector
                    combined_context += context_vector_matrix[index, :]

            # Normalize the combined context vector by dividing by the number of context words
            combined_context /= (WINDOW_SIZE - 1)

            # Calculate the dot product between the center vector matrix and the combined context vector
            output = tf.matmul(center_vector_matrix, tf.expand_dims(combined_context, 1))

            # Compute the softmax output for the center word
            softout = tf.nn.softmax(output, axis=0)
            # Calculate the loss for the center word
            loss = softout[indices[WINDOW_SIZE // 2]]

            # Calculate the log loss for the current window
            logloss = -tf.math.log(loss)

            # Update the cumulative loss for the current epoch
            loss_per_epoch += logloss.numpy()

            # Calculate the gradients for the context and center vector matrices
            grad = tape.gradient(logloss, [context_vector_matrix, center_vector_matrix])

            # Update the context and center vector matrices using the calculated gradients
            optimizer.apply_gradients(zip(grad, [context_vector_matrix, center_vector_matrix]))

    # Append the cumulative loss for the current epoch to the loss list
    loss_list.append(loss_per_epoch)

```

### [Gradient Tape](https://www.tensorflow.org/api_docs/python/tf/GradientTape)

In [3]:
import tensorflow as tf
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

x = tf.constant(3.0)
with tf.GradientTape() as g:
  g.watch(x)
  y = x * x
dy_dx = g.gradient(y, x)
print(dy_dx)


tf.Tensor(6.0, shape=(), dtype=float32)


### [Matmul](https://www.tensorflow.org/api_docs/python/tf/linalg/matmul)

In [22]:
# 2-D tensor
a = tf.constant([1, 2, 3, 4, 5, 6], shape=[2, 3])

# 2-D tensor
b = tf.constant([7, 8, 9, 10, 11, 12], shape=[3, 2])

c = tf.matmul(a, b)

d = tf.tensordot(a, b, axes=1)

print(a, b, c, d, sep='\n\n')

tf.Tensor(
[[1 2 3]
 [4 5 6]], shape=(2, 3), dtype=int32)

tf.Tensor(
[[ 7  8]
 [ 9 10]
 [11 12]], shape=(3, 2), dtype=int32)

tf.Tensor(
[[ 58  64]
 [139 154]], shape=(2, 2), dtype=int32)

tf.Tensor(
[[ 58  64]
 [139 154]], shape=(2, 2), dtype=int32)


### [Expand Dim](https://www.tensorflow.org/api_docs/python/tf/expand_dims)

In [10]:
image = tf.zeros([10,10,3])
image.shape

TensorShape([10, 10, 3])

In [12]:
expanded = tf.expand_dims(image, axis=0)
expanded.shape

TensorShape([1, 10, 10, 3])

In [13]:
image[0]

<tf.Tensor: shape=(10, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]], dtype=float32)>

In [14]:
expanded[0]

<tf.Tensor: shape=(10, 10, 3), dtype=float32, numpy=
array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0.

### [Softmax](https://www.tensorflow.org/api_docs/python/tf/nn/softmax)

In [19]:
softmax = tf.nn.softmax([-1, 0., 1.])
softmax


<tf.Tensor: shape=(), dtype=float32, numpy=1.0>

In [20]:
sum(softmax)

<tf.Tensor: shape=(), dtype=float32, numpy=1.0>

## Skip-gram

```python

import tensorflow as tf
from tqdm import tqdm

# Number of iterations to train the model
ITERATIONS = ...
# Tokenized text size
tokenize_text_size = ...
# Sliding window size for context words
WINDOW_SIZE = ...

# Start of the main training loop
for iter in tqdm(range(ITERATIONS)):
    # Variable to store the cumulative loss per epoch
    loss_per_epoch = 0

    # Sliding window over the tokenized text
    for start in range(tokenize_text_size - WINDOW_SIZE):
        # Extracting the window of words as indices
        indices = text_as_int[start:start + WINDOW_SIZE]

        # GradientTape is used to record the gradients during the forward pass
        with tf.GradientTape() as tape:
            # Initialize the loss for this window to 0
            loss = 0

            # Extract the center word's vector from the center vector matrix
            center_vector = center_vector_matrix[indices[WINDOW_SIZE // 2], :]
            # Calculate the dot product between context vector matrix and center vector
            output = tf.matmul(context_vector_matrix, tf.expand_dims(center_vector, 1))

            # Compute the softmax output for each context word
            softmax_output = tf.nn.softmax(output, axis=0)

            # Calculate the loss for each context word in the window
            for (count, index) in enumerate(indices):
                if count != WINDOW_SIZE // 2:
                    loss += softmax_output[index]

            # Calculate the log loss for the current window
            logloss = -tf.math.log(loss)

            # Update the cumulative loss for the current epoch
            loss_per_epoch += logloss.numpy()

            # Calculate the gradients for the context and center vector matrices
            grad = tape.gradient(logloss, [context_vector_matrix, center_vector_matrix])

            # Update the context and center vector matrices using the calculated gradients
            optimizer.apply_gradients(zip(grad, [context_vector_matrix, center_vector_matrix]))

    # Append the cumulative loss for the current epoch to the loss list
    loss_list.append(loss_per_epoch)


```

