In [22]:
# These are all the dependencies
# langchain lanchain-openai: for LangChain stuff
# request: for REST API calls
# mecab-python3 unidic-lite: for dividing Japanese sentences into words
# genanki: for creating Anki cards
! pip install -q langchain langchain-openai requests mecab-python3 unidic-lite genanki

In [14]:
# All the imports go here
from langchain_openai import OpenAI
from langchain_core.prompts import PromptTemplate
from google.colab import userdata
import requests
import MeCab
import urllib.parse
import genanki
import pprint

In [15]:
# Create an llm and prompt template

llm = OpenAI(
    model="gpt-3.5-turbo-instruct",
    temperature=0,
    max_retries=2,
    api_key=userdata.get('OPENAI_API_KEY'),
)

prompt =  PromptTemplate.from_template((
    "Translate the following sentece to English: {sentence}" "\n"
    "Translation = "
))

In [16]:
# Get a Japanese sentence from Tatoeba

SENTENCE_ID = 1297
TATOEBA_API_URL = "https://api.dev.tatoeba.org/unstable"

response = requests.get(f"{TATOEBA_API_URL}/sentences/{SENTENCE_ID}")

if response.status_code == 200:
    data = response.json()

else:
    print(f"Request failed with status code {response.status_code}")


In [17]:
# Inspect the answer from Tatoeba

print(f"Text: {data['data']['text']}")
print(f"Language: {data['data']['lang']}")

# There are 2 items in the translations array, why?
translations = [translation for translation in data['data']['translations'][0] if translation['lang'] == 'eng']
print(f"Available translations: {len(translations)}")
print(f"Translations: {translations[0]['text']}")


Text: きみにちょっとしたものをもってきたよ。
Language: jpn
Available translations: 1
Translations: I brought you a little something.


In [18]:
# Translate the sentence

llm.invoke(prompt.format(sentence=data['data']['text']))

'I brought something small for you.'

In [19]:
# Divide the sentence into words
wakati = MeCab.Tagger("-Owakati")
words = wakati.parse(data['data']['text']).split()
print(words)

['きみ', 'に', 'ちょっと', 'し', 'た', 'もの', 'を', 'もっ', 'て', 'き', 'た', 'よ', '。']


In [20]:
# Get the JLPT Classification for the words
JLPT_API_URL = "https://jlpt-vocab-api.vercel.app"

jlpt_classifications = []

for word in words:
  response = requests.get(f"{JLPT_API_URL}/api/words?word={urllib.parse.quote(word)}")

  if response.status_code == 200:
      jlpt_classifications.append(response.json())
      print(jlpt_classifications[-1])
  else:
      print(f"Request failed with status code {response.status_code}")

{'total': 0, 'offset': 0, 'limit': 10, 'words': []}
{'total': 0, 'offset': 0, 'limit': 10, 'words': []}
{'total': 1, 'offset': 0, 'limit': 10, 'words': [{'word': 'ちょっと', 'meaning': 'somewhat', 'furigana': '', 'romaji': 'chotto', 'level': 5}]}
{'total': 1, 'offset': 0, 'limit': 10, 'words': [{'word': 'し', 'meaning': '10^24 (kanji is JIS X 0212 kuten 4906); septillion (American); quadrillion (British)', 'furigana': '', 'romaji': 'shi', 'level': 1}]}
{'total': 0, 'offset': 0, 'limit': 10, 'words': []}
{'total': 0, 'offset': 0, 'limit': 10, 'words': []}
{'total': 0, 'offset': 0, 'limit': 10, 'words': []}
{'total': 0, 'offset': 0, 'limit': 10, 'words': []}
{'total': 0, 'offset': 0, 'limit': 10, 'words': []}
{'total': 0, 'offset': 0, 'limit': 10, 'words': []}
{'total': 0, 'offset': 0, 'limit': 10, 'words': []}
{'total': 0, 'offset': 0, 'limit': 10, 'words': []}
{'total': 0, 'offset': 0, 'limit': 10, 'words': []}


In [21]:
# Generate Anki cards for the words

my_model = genanki.Model(
  1607392319,
  'Simple Model',
  fields=[
    {'name': 'Question'},
    {'name': 'Answer'},
  ],
  templates=[
    {
      'name': 'Card 1',
      'qfmt': '{{Question}}',
      'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}',
    },
  ])

my_deck = genanki.Deck(
  2059400110,
  'Japanese Words'
)

for classification in jlpt_classifications:
  if classification['total'] > 0:

    my_note = genanki.Note(
      model=my_model,
      fields=[classification['words'][0]['word'], classification['words'][0]['meaning']]
    )

    pprint.pp(my_note)

    my_deck.add_note(my_note)

genanki.Package(my_deck).write_to_file('output.apkg')

Note(model=Model(model_id=1607392319, name='Simple Model', fields=[{'name': 'Question'}, {'name': 'Answer'}], templates=[{'name': 'Card 1', 'qfmt': '{{Question}}', 'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}'}], css='', model_type=0), fields=['ちょっと', 'somewhat'], sort_field='ちょっと', tags=_TagList([]), guid='Bh7bZ}W(V]')
Note(model=Model(model_id=1607392319, name='Simple Model', fields=[{'name': 'Question'}, {'name': 'Answer'}], templates=[{'name': 'Card 1', 'qfmt': '{{Question}}', 'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}'}], css='', model_type=0), fields=['し', '10^24 (kanji is JIS X 0212 kuten 4906); septillion (American); quadrillion (British)'], sort_field='し', tags=_TagList([]), guid='b2UTAu0K!?')
