In [8]:
import bs4
from bs4 import BeautifulSoup
import requests
import json
import urllib.request
import re
kanji_list = r'[㐀-䶵一-鿋豈-頻]'
ascii_char = r'[ -~]'
hiragana_full = r'[ぁ-ゟ]'
katakana_full = r'[=-ヿ]'

## FUNCTIONS ##

In [9]:
### Defining necessary functions ###

def request(action, **params):
    return {'action': action, 'params': params, 'version': 6}

def invoke(action, **params):
    requestJson = json.dumps(request(action, **params)).encode('utf-8')
    response = json.load(urllib.request.urlopen(urllib.request.Request('http://127.0.0.1:8765', requestJson)))
    if len(response) != 2:
        raise Exception('response has an unexpected number of fields')
    if 'error' not in response:
        raise Exception('response is missing required error field')
    if 'result' not in response:
        raise Exception('response is missing required result field')
    if response['error'] is not None:
        raise Exception(response['error'])
    return response['result']

invoke('createDeck', deck='test1')
#result = invoke('deckNames')
#print('got list of decks: {}'.format(result))
#invoke('deleteDecks', decks=['test1'], cardsToo=True)

# source: https://github.com/olsgaard/Japanese_nlp_scripts/blob/master/jp_regex.py
# -*- coding: utf-8 -*-

def remove_unicode_block(unicode_block, string):
	''' removes all chaacters from a unicode block and returns all remaining texts from string argument.
		Note that you must use the unicode blocks defined above, or patterns of similar form '''
	return re.sub(unicode_block, '', string)

def extract_unicode_block(unicode_block, string):
	''' extracts and returns all texts from a unicode block from string argument.
		Note that you must use the unicode blocks defined above, or patterns of similar form '''
	return re.findall(unicode_block, string)

In [10]:
# MELHORIA: colocar o furigana somente após o kanji, e não após o hiragana, pra formatação ficar melhor

expression = ''        # この文は例えばです
reading = ''          # この文[ぶん]は例えば[たとえば]です
sentence_kana = ''   # このぶんはたとえばです
sentence_en = ''    # This sentence is an example

def get_japanese_sentence(kanji):
    linkpagina = f'https://jisho.org/search/{kanji}%20%23sentences'
    page = requests.get(linkpagina, headers={'User-Agent': 'Mozilla/5.0'})
    soup = BeautifulSoup(page.content, 'html.parser')
    sentences = soup.find_all('div', class_='sentence_content')
    sentence_size = 1e9

    for i in range(len(sentences)):
        japanese_all_letters = sentences[i].find('ul', class_='japanese_sentence japanese japanese_gothic clearfix')
        
        expression_candidato = '' # この文は例えばです
        reading_candidato = ''    # この文[ぶん]は例えば[たとえば]です
        sentence_kana_candidato = '' # このぶんはたとえばです
        sentence_en_candidato = sentences[i].find('div', class_='english_sentence clearfix').find('span', class_='english').get_text() # This sentence is an example

        for count, l in enumerate(japanese_all_letters):
            if type(l) == bs4.element.NavigableString:  # Se for uma string solta
                expression_candidato += l
                reading_candidato += l
                sentence_kana_candidato += l
            elif type(l) == bs4.element.Tag:   # Se for um kanji com leitura
                if l.find('span', class_='furigana') != None:   # Se tiver furigana
                    furigana = l.find('span', class_='furigana').get_text()
                    word = l.find('span', class_='unlinked').get_text()
                    expression_candidato += word
                    sentence_kana_candidato += furigana
                    if count == 0:  # Unless it's the first word (?)
                        reading_candidato += word + '[' + furigana + ']'
                    else:   # for some reason, there must be a space before the kanji with furigana for Anki to display it correctly
                        reading_candidato += ' ' + word + '[' + furigana + ']'
                else:
                    word = l.find('span', class_='unlinked').get_text()
                    expression_candidato += word
                    reading_candidato += word
                    sentence_kana_candidato += word

        if abs(len(expression_candidato) - best_sentence_length) < sentence_size:   # escolher a sentença mais próxima do tamanho desejado
            expression = expression_candidato
            reading = reading_candidato
            sentence_kana = sentence_kana_candidato
            sentence_en = sentence_en_candidato
            sentence_size = abs(len(expression_candidato) - best_sentence_length)

    return expression, reading, sentence_kana, sentence_en

def get_definition(kanji):
    link_definicao = f'https://jisho.org/word/{kanji}'
    page_definicao = requests.get(link_definicao, headers={'User-Agent': 'Mozilla/5.0'})
    soup_definicao = BeautifulSoup(page_definicao.content, 'html.parser')
    primeira_definicao = soup_definicao.find('div', class_='concept_light clearfix')
    definicao = primeira_definicao.find('div', class_='meaning-definition zero-padding').find_all('span', class_='meaning-meaning')
    leituras = primeira_definicao.find_all('span', class_='furigana')
    definicao = definicao[0].get_text().split(';')
    
    leitura = ""
    definicoes = []
    
    for d in definicao:
        definicoes.append(d)
        if len(definicoes) == 3:
            break

    for i in range(len(leituras)):
        x = leituras[i].find_all('span', class_='kanji')
        if len(x) > 0:
            for j in range(len(x)):
                leitura += x[j].get_text()

    return leitura, definicoes



In [11]:
best_sentence_length = 20
deck_name = "test1"
kanjis = ["経営"]
for kanji in kanjis:

    expression, reading, sentence_kana, sentence_en = get_japanese_sentence(kanji)

    leitura, definicoes = get_definition(kanji)

    try:    
        invoke('addNote', 
            note= {
                "deckName": deck_name,
                "modelName": "Core 2000",
                "fields": {
                    'Optimized-Voc-Index': kanji,
                    'Vocabulary-Kanji': kanji,
                    'Vocabulary-Kana': leitura,
                    'Vocabulary-Furigana' : re.sub(hiragana_full, '', kanji) + '[' + leitura + ']' + re.sub(kanji_list, '', kanji),
                    'Vocabulary-English': '/'.join(definicoes),
                    'Expression': expression,
                    'Reading': reading,
                    'Sentence-Kana': sentence_kana,
                    'Sentence-English': sentence_en,
                },
                "options": {
                    "allowDuplicate": False,
                    "duplicateScope": "deck",
                    "duplicateScopeOptions": {
                        "deckName": deck_name,
                        "checkChildren": False,
                        "checkAllModels": False
                    }
                },
            }
        )
    except Exception as e:
        print(e)
        print("Erro ao adicionar o kanji " + kanji + " ao deck")
        continue