In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import json


In [6]:
with open('./archive/En-Or_word_pairs.json') as f:
  dictionary = json.load(f)
dataset = []
for word, meaning in dictionary.items():
  dataset.append((word, meaning))

In [7]:
train_dataloader = DataLoader(
    dataset=dataset,
    batch_size=16,
    shuffle=True,
)

In [13]:
class OdiaDictionaryModel(nn.Module):
  def __init__(self):
    super(OdiaDictionaryModel, self).__init__()
    self.embedding = nn.Embedding(len(dictionary), 128)
    self.linear = nn.Linear(128, len(dictionary))

  def forward(self, x):
    x = self.embedding(x)
    x = self.linear(x)
    return x

In [62]:
model = OdiaDictionaryModel()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [63]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

OdiaDictionaryModel(
  (embedding): Embedding(215339, 128)
  (linear): Linear(in_features=128, out_features=215339, bias=True)
)

In [64]:
def train_model(model, train_dataloader, loss_function, optimizer, num_epochs):
    for epoch in range(num_epochs):
        for batch in train_dataloader:
            x, y = batch
            x = x[0].to(device)
            y = y[0].to(device)  # Move data to the same device as the model
            output = model(x)  # Pass input through the model
            loss = loss_function(output, y)

            optimizer.zero_grad()  # Zero out the gradients
            loss.backward()  # Compute the gradients
            optimizer.step()  # Update the model parameters

    torch.save(model, 'odia_dictionary_model.pkl')