In [127]:
import numpy as np
import torch
import torch.nn as nn
from torch.nn.functional import relu, softmax
from torch.optim import SGD

In [128]:
text = "I like mozarella cheesy pizza"
words = text.lower().split()
words

['i', 'like', 'mozarella', 'cheesy', 'pizza']

In [129]:
vocabs = []
for word in words:
  if word not in vocabs:
    vocabs.append(word)

vocabs

['i', 'like', 'mozarella', 'cheesy', 'pizza']

In [130]:
word2idx = {word: i for i, word in enumerate(vocabs)}
idx2word = {i: word for word, i in word2idx.items()}
idx2word

{0: 'i', 1: 'like', 2: 'mozarella', 3: 'cheesy', 4: 'pizza'}

In [131]:
window_size = 1
data = []

for i, word in enumerate(vocabs):
  context_before = vocabs[max(0, i - window_size):i]
  context_after = vocabs[i+1:min(len(vocabs), i+1+window_size)]

  for context in context_before:
    data.append((word, context))
  for context in context_after:
    data.append((word, context))

data

[('i', 'like'),
 ('like', 'i'),
 ('like', 'mozarella'),
 ('mozarella', 'like'),
 ('mozarella', 'cheesy'),
 ('cheesy', 'mozarella'),
 ('cheesy', 'pizza'),
 ('pizza', 'cheesy')]

In [132]:
def one_hot_transform(word, vocabs_size):
  arr = np.zeros(vocabs_size)
  arr[word2idx[word]] = 1
  return arr

In [133]:
def one_hot_inverse_transform(one_hot_encoded: np.ndarray):
  return np.where(one_hot_encoded == 1)[0][0]

In [134]:
onehot_input = np.array([one_hot_transform(i, len(vocabs)) for i in vocabs])
onehot_input

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [135]:
class Word2Vec(nn.Module):
  def __init__(self, vocabs_size: int, lr: float = 0.01) -> None:
    super().__init__()
    self.embeddings_dimension = 3
    self.vocabs_size = vocabs_size
    self.w1 = nn.Parameter(torch.tensor(np.random.uniform(-1, 1, (vocabs_size, self.embeddings_dimension))), requires_grad=True)
    self.w2 = nn.Parameter(torch.tensor(np.random.uniform(-1, 1, (self.embeddings_dimension, vocabs_size))), requires_grad=True)
    self.learning_rate = lr

  def forward(self, input_encoded) -> torch.Tensor:
    h = torch.matmul(input_encoded, self.w1)
    u = torch.matmul(h, self.w2)

    # output = softmax(u, dim=-1)
    return u

  def get_optimizer(self) -> SGD:
    return SGD(self.parameters(), lr=self.learning_rate)

  def train(self, X: torch.Tensor, y: torch.Tensor, epochs: int = 100):
    optimizer = self.get_optimizer()
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
      total_loss = 0

      for x_train, label in zip(X, y):
        y_pred = self.forward(x_train)
        loss = criterion(y_pred, label)
        loss.backward()
        total_loss += float(loss)

      if total_loss < 0.0001:
        break

      optimizer.step()
      optimizer.zero_grad()

      print(f'Epoch {epoch}: Loss {total_loss}')

In [136]:
model = Word2Vec(len(vocabs), lr=0.1)
output = model(torch.tensor(onehot_input))
output

tensor([[-0.3899,  0.3364,  0.5358,  0.3011,  0.4720],
        [-0.3217, -1.2619,  0.6664,  0.5376, -0.1300],
        [ 0.2469, -0.5536, -0.1277, -0.0517, -0.3102],
        [ 0.9358, -0.5857, -0.0243, -0.1652, -0.2306],
        [ 0.1305, -2.2047,  0.5609,  0.4937, -0.5854]], dtype=torch.float64,
       grad_fn=<MmBackward0>)

In [137]:
y_pred = torch.argmax(output, dim=1)
print(f'Text: {text}')
print(f'Prediction: {" ".join([idx2word[int(i)] for i in y_pred])}')

Text: I like mozarella cheesy pizza
Prediction: mozarella mozarella i i mozarella


In [138]:
X_train = []
y_train = []
for x, y in data:
  X_train.append(one_hot_transform(x, len(vocabs)))
  y_train.append(word2idx[y])
print(f'X: {X_train}')
print(f'y: {y_train}')

X: [array([1., 0., 0., 0., 0.]), array([0., 1., 0., 0., 0.]), array([0., 1., 0., 0., 0.]), array([0., 0., 1., 0., 0.]), array([0., 0., 1., 0., 0.]), array([0., 0., 0., 1., 0.]), array([0., 0., 0., 1., 0.]), array([0., 0., 0., 0., 1.])]
y: [1, 0, 2, 1, 3, 2, 4, 3]


In [139]:
model.train(torch.tensor(X_train), torch.tensor(y_train))

Epoch 0: Loss 13.128059698670869
Epoch 1: Loss 12.129033919739198
Epoch 2: Loss 11.400912831092677
Epoch 3: Loss 10.791834119188726
Epoch 4: Loss 10.236171726907187
Epoch 5: Loss 9.704244292077515
Epoch 6: Loss 9.184919165538881
Epoch 7: Loss 8.677620321170126
Epoch 8: Loss 8.187599870141522
Epoch 9: Loss 7.7227829766603895
Epoch 10: Loss 7.291612526195818
Epoch 11: Loss 6.901364365890682
Epoch 12: Loss 6.55662067079123
Epoch 13: Loss 6.258250136496238
Epoch 14: Loss 6.0035086538971365
Epoch 15: Loss 5.787254948025664
Epoch 16: Loss 5.603530569727123
Epoch 17: Loss 5.446734417360708
Epoch 18: Loss 5.312149737937187
Epoch 19: Loss 5.196002875658103
Epoch 20: Loss 5.09531464921381
Epoch 21: Loss 5.007711360056235
Epoch 22: Loss 4.931264208368186
Epoch 23: Loss 4.8643724712951375
Epoch 24: Loss 4.805685089147413
Epoch 25: Loss 4.754049868540671
Epoch 26: Loss 4.708480056430776
Epoch 27: Loss 4.6681306369917435
Epoch 28: Loss 4.632279582476838
Epoch 29: Loss 4.600311609020942
Epoch 30: Los

In [140]:
output = model(torch.tensor(onehot_input))
y_pred = torch.argmax(output, dim=1)
print(f'Text: {text}')
print(f'Prediction: {" ".join([idx2word[int(i)] for i in y_pred])}')

Text: I like mozarella cheesy pizza
Prediction: like i like mozarella cheesy
