## Word Embeddings

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# Dictionary mapping words to their indices
word_to_idx = {"I": 0, "love": 1, "eating": 2, "and": 3, "sleeping": 4}

# Create an embedding layer with 5 words in the vocabulary and an embedding size of 7
embeddings = nn.Embedding(5, 7)

# Get the index of the word 'love' from the dictionary
word_index = torch.tensor([word_to_idx["love"]])

# Print the index of the word 'love'
print(word_index)

tensor([1])


In [3]:
# Retrieve the embedding vector for the word 'love'
love = embeddings(word_index)

# Print the embedding vector
print(love)

# Print the shape of the embedding vector
print(love.shape)

tensor([[ 0.4332, -0.5624, -1.1483, -1.7561,  1.5084,  0.5255, -0.3182]],
       grad_fn=<EmbeddingBackward0>)
torch.Size([1, 7])


In [4]:
# Create a tensor containing indices for all words in the vocabulary
all_ind = torch.tensor([w for w in range(5)], dtype=torch.long)

# Retrieve the embedding vectors for all words using the embedding layer
all_words = embeddings(all_ind)

# Print the indices of all words
print(all_ind)

# Print the embedding vectors for all words
print(all_words)

# Print the shape of the tensor containing all word embeddings
print(all_words.shape)

tensor([0, 1, 2, 3, 4])
tensor([[-1.1862, -0.2399, -0.2611, -0.5782, -0.1403, -0.3588,  0.0552],
        [ 0.4332, -0.5624, -1.1483, -1.7561,  1.5084,  0.5255, -0.3182],
        [-0.8858, -0.3912,  1.3797,  0.1791, -0.7286, -1.1975, -1.0667],
        [-0.5861, -2.1039,  1.2275, -0.0865, -0.6502,  0.1334, -0.4111],
        [ 1.4001, -0.1799, -0.2872,  1.1480, -0.8147,  0.5520, -0.9192]],
       grad_fn=<EmbeddingBackward0>)
torch.Size([5, 7])


## Exercise: N-Gram Skip Model
### Given a sequence of words, we want to predict the ith word of the sequence: P(w(i)|w(i-1), w(i-2), .....)
#### Source: https://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html#an-example-n-gram-language-modeling  
