In [1]:
#AnkiConnect
import json
import urllib.request

def request(action, **params):
    return {'action': action, 'params': params, 'version': 6}

def invoke(action, **params):
    requestJson = json.dumps(request(action, **params)).encode('utf-8')
    response = json.load(urllib.request.urlopen(urllib.request.Request('http://127.0.0.1:8765', requestJson)))
    if len(response) != 2:
        raise Exception('response has an unexpected number of fields')
    if 'error' not in response:
        raise Exception('response is missing required error field')
    if 'result' not in response:
        raise Exception('response is missing required result field')
    if response['error'] is not None:
        raise Exception(response['error'])
    return response['result']

In [2]:
import polars as pl
import re
import collections
import glob
from pathlib import Path

In [3]:
deck_query = "\"deck:Japanese Vocab\" note:Lapis"
deck_field = 'Sentence'

In [4]:
def remove_bolds(sent: str) -> str:
    return re.sub(r"<.*?>", "", sent).strip()

In [5]:
notes = invoke('findNotes', query=deck_query)

In [6]:
noteInfos = invoke('notesInfo', notes=notes)

In [7]:
noteInfos[0]['fields'][deck_field]['value']

'<b>身内</b>に医者がいると、何かと安心だ。'

In [8]:
cleaned_sents = [remove_bolds(item['fields'][deck_field]['value'])+'\n' for item in noteInfos]

In [9]:
with open("test.txt", "w") as f:
    f.writelines(cleaned_sents)

## Load generated file and process it
Before continuing, do the following steps:
1. go to Tools -> AnkiMorphs -> Generators in Anki.
2. Select the folder where you save the above `text.txt` file and load it
3. Generate the priority file
4. Copy/move the priority file into the notebooks folder (right here)

In [10]:
in_df = pl.read_csv('priority-file.csv')

In [11]:
in_df.head()

Morph-Lemma,Morph-Inflection,Lemma-Priority,Inflection-Priority
str,str,i64,i64
"""の""","""の""",0,0
"""は""","""は""",1,1
"""が""","""が""",2,2
"""を""","""を""",3,3
"""に""","""に""",4,4


In [12]:
dict_folders = glob.glob('../dicts/*')

In [13]:
def parse_dict(json_in, dict_dicts: collections.defaultdict):
    for json_item in json_in:
        if type(json_item[2]) is int:  # Straight up frequency
            dict_dicts[json_item[0]].append(json_item[2])
        elif "frequency" in json_item[2]:
            if type(json_item[2]['frequency']) is dict:
                dict_dicts[json_item[0]].append(json_item[2]['frequency']['value'])
            else:
                dict_dicts[json_item[0]].append(json_item[2]['frequency'])
        elif "value" in json_item[2]:
            dict_dicts[json_item[0]].append(json_item[2]['value'])
            

In [14]:
dict_dicts = collections.defaultdict(list)
for dict_folder in dict_folders:
    for json_file_p in glob.glob(dict_folder + "/*"):
        file = Path(json_file_p)
        if(file.name == 'index.json'):
            continue
        with open(json_file_p, 'r') as f:
            parse_dict(json.load(f), dict_dicts)

In [15]:
mean_dicts = {k: sum(d)//len(d) for k, d in dict_dicts.items()}

In [16]:
in_df = in_df.with_columns(
    pl.col("Morph-Lemma").replace_strict(mean_dicts, default=15000).alias("morph_freq")
)

In [17]:
in_df = in_df.with_columns(
    pl.col("Morph-Inflection").replace_strict(mean_dicts, default=15000).alias("infl_freq")
)

In [18]:
in_df = in_df.sort('infl_freq')
in_df = in_df.with_columns(
    pl.int_range(pl.len(), dtype=pl.UInt32).alias('Inflection-Priority')
)

In [19]:
in_df = in_df.sort('morph_freq')
in_df = in_df.with_columns(
    pl.int_range(pl.len(), dtype=pl.UInt32).alias('Lemma-Priority')
)

In [20]:
in_df.write_csv('freq_sorted_prio.csv')