In [None]:
from bs4 import BeautifulSoup
import requests
import json
import urllib.request

In [None]:
def request(action, **params):
    return {'action': action, 'params': params, 'version': 6}

def invoke(action, **params):
    requestJson = json.dumps(request(action, **params)).encode('utf-8')
    response = json.load(urllib.request.urlopen(urllib.request.Request('http://127.0.0.1:8765', requestJson)))
    if len(response) != 2:
        raise Exception('response has an unexpected number of fields')
    if 'error' not in response:
        raise Exception('response is missing required error field')
    if 'result' not in response:
        raise Exception('response is missing required result field')
    if response['error'] is not None:
        raise Exception(response['error'])
    return response['result']

invoke('createDeck', deck='test1')
result = invoke('deckNames')
#print('got list of decks: {}'.format(result))
#invoke('deleteDecks', decks=['test1'], cardsToo=True)

# source: https://github.com/olsgaard/Japanese_nlp_scripts/blob/master/jp_regex.py
# -*- coding: utf-8 -*-
import re
kanji_list = r'[㐀-䶵一-鿋豈-頻]'
ascii_char = r'[ -~]'

## FUNCTIONS ##

def remove_unicode_block(unicode_block, string):
	''' removes all chaacters from a unicode block and returns all remaining texts from string argument.
		Note that you must use the unicode blocks defined above, or patterns of similar form '''
	return re.sub(unicode_block, '', string)

def extract_unicode_block(unicode_block, string):
	''' extracts and returns all texts from a unicode block from string argument.
		Note that you must use the unicode blocks defined above, or patterns of similar form '''
	return re.findall(unicode_block, string)



In [None]:
kanjis = ["合格"]
best_sentence_length = 20

for kanji in kanjis:
    linkpagina = f'https://tatoeba.org/eng/api_v0/search?from=jpn&query={kanji}&trans_filter=limit&trans_link=direct&trans_to=eng&to=eng&list=171073'
    page = requests.get(linkpagina, headers={'User-Agent': 'Mozilla/5.0'})

    data = json.loads(page.content)


    sentence_size = 1e9
    for i in range(len(data['results'])):
        if abs(len(data['results'][i]['text']) - best_sentence_length) < sentence_size:
            sentence = data['results'][i]['text']
            #print(sentence)
            sentence_reading = data['results'][i]['transcriptions'][0]['text']
            #print(sentence_reading)
            sentence_en = data['results'][i]['translations'][0][0]['text']
            #print(sentence_en)
            sentence_size = abs(len(data['results'][i]['text']) - best_sentence_length)

    sentence_kana = remove_unicode_block(ascii_char, remove_unicode_block(kanji_list, sentence_reading))
    sentence_reading = sentence_reading.replace('[', '').replace('|', '[')
    open_bracket = False
    # Adjust the brackets so that the furigana is displayed correctly when adding to Anki
    for i in range(1, len(sentence_reading)):
        if sentence_reading[i] == '[' and open_bracket == False:
            open_bracket = True
        elif sentence_reading[i] == '[' and open_bracket == True:
            sentence_reading = sentence_reading[:i] + '' + sentence_reading[i+1:] + ' '
        elif sentence_reading[i] == ']' and open_bracket == True:
            open_bracket = False
        
    sentence_reading = sentence_reading.replace(' ', '')
    
    is_kanji = False
    sentence_reading_formatted = ''
    # Add a space before the first kanji so the furigana is displayed correctly when adding to Anki
    for i in range(len(sentence_reading)):
        if re.match(kanji_list, sentence_reading[i]) and is_kanji == False and i != 0:
            sentence_reading_formatted += ' ' + sentence_reading[i]
            is_kanji = True
        elif re.match(kanji_list, sentence_reading[i]) and is_kanji == True:
            is_kanji = False
            sentence_reading_formatted += sentence_reading[i]
        else:
            sentence_reading_formatted += sentence_reading[i]
        
    sentence_reading = sentence_reading_formatted

    link_definicao = f'https://tangorin.com/definition/{kanji}'
    page_definicao = requests.get(link_definicao, headers={'User-Agent': 'Mozilla/5.0'})
    soup_definicao = BeautifulSoup(page_definicao.content, 'html.parser')
    definicao = soup_definicao.find_all('ul', class_='w-def')
    leitura = soup_definicao.find_all('span', class_='w-jpn-sm')

    definicoes = []
    for n in definicao[0].find_all('li', limit=3):
        definicoes.append(n.text)
    leitura = leitura[0].find_all('ruby')[0].text

    invoke('addNote', 
        note= {
            "deckName": "test1",
            "modelName": "Core 2000",
            "fields": {
                'Optimized-Voc-Index': '1',
                'Vocabulary-Kanji': kanji,
                'Vocabulary-Kana': leitura,
                'Vocabulary-Furigana' : kanji + ' [' + leitura + ']',
                'Vocabulary-English': '/'.join(definicoes),
                'Expression': sentence,
                'Reading': sentence_reading,
                'Sentence-Kana': sentence_kana,
                'Sentence-English': sentence_en,
            },
            "options": {
                "allowDuplicate": False,
                "duplicateScope": "deck",
                "duplicateScopeOptions": {
                    "deckName": "Default",
                    "checkChildren": False,
                    "checkAllModels": False
                }
            },
        }
)
