# Kanji Radical Match

Data processing, training, and output.

In [46]:
import utils as u
import torch

eng_to_rads = dict(list(u.load_eng_to_rads().items()))

tensor_in, tensor_out = u.dict_to_tensors(eng_to_rads)
tensor_in = tensor_in.to(torch.float32)
tensor_out = tensor_out.to(torch.float32)
print(tensor_in.shape[0], tensor_out.shape[0])

12944 12944


In [48]:
eng_vocab, rad_vocab = list(eng_to_rads.keys()), list(set(sum(eng_to_rads.values(), [])))
eng_word_to_idx = {word: idx for idx, word in enumerate(eng_vocab)}
print(len(eng_vocab), len(rad_vocab))

3883 781


In [62]:
from utils import KanjiFFNN
import torch.optim as optim
import torch.nn as nn


e2r_model = KanjiFFNN(tensor_in.shape[0], tensor_out.shape[0], tensor_out.shape[0])
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(e2r_model.parameters(), lr=0.01)

In [63]:
# Train model
epochs = 8
for i in range(0, epochs):
    # Zero the gradient buffers
    optimizer.zero_grad()
    output = e2r_model(tensor_in)
    # Large
    loss = loss_fn(tensor_out, output)
    loss.backward()
    # Update
    optimizer.step()
    print("Epoch {} Loss: {}".format(i, loss.data.numpy()))

Epoch 0 Loss: 2628493.5
Epoch 1 Loss: 2577212.5
Epoch 2 Loss: 2577212.5
Epoch 3 Loss: 2577212.5
Epoch 4 Loss: 2577212.5
Epoch 5 Loss: 2577212.5
Epoch 6 Loss: 2577212.5
Epoch 7 Loss: 2577212.5


In [64]:
def preprocess_input(word):
    one_hot = torch.zeros(len(eng_vocab))
    word_idx = eng_vocab.index(word)
    one_hot[word_idx] = 1
    return one_hot

In [65]:
def predict_radicals(word):
    input_tensor = preprocess_input(word).unsqueeze(0)
    out = e2r_model(input_tensor)
    _, top_indices = torch.topk(output, k=5)
    top_radicals = [rad_vocab[idx] for idx in top_indices[0]]
    return top_radicals

In [None]:
import torch

e2r_model.eval()
# This errors out
eng_word_example = 'strong'
predicted_radicals = predict_radicals(eng_word_example)
print(f"Predicted radicals for '{eng_word_example}': {predicted_radicals}")