In [None]:
#AnkiConnect
import json
import urllib.request

def request(action, **params):
    return {'action': action, 'params': params, 'version': 6}

def invoke(action, **params):
    requestJson = json.dumps(request(action, **params)).encode('utf-8')
    response = json.load(urllib.request.urlopen(urllib.request.Request('http://127.0.0.1:8765', requestJson)))
    if len(response) != 2:
        raise Exception('response has an unexpected number of fields')
    if 'error' not in response:
        raise Exception('response is missing required error field')
    if 'result' not in response:
        raise Exception('response is missing required result field')
    if response['error'] is not None:
        raise Exception(response['error'])
    return response['result']

In [25]:
import polars as pl
import re

In [40]:
def remove_bolds(sent: str) -> str:
    return re.sub(r"<.*?>", "", sent)

In [15]:
notes = invoke('findNotes', query="\"deck:Japanese Vocab\" note:Lapis")

In [34]:
noteInfos = invoke('notesInfo', notes=notes)

In [38]:
noteInfos[0]['fields']['Sentence']['value']

'<b>身内</b>に医者がいると、何かと安心だ。'

In [50]:
cleaned_sents = [remove_bolds(item['fields']['Sentence']['value']).strip()+'\n' for item in noteInfos]

In [53]:
with open("test.txt", "w") as f:
    f.writelines(cleaned_sents)

## Load generated file and process it

In [58]:
in_df = pl.read_csv('priority-file.csv')

In [59]:
in_df.head()

Morph-Lemma,Morph-Inflection,Lemma-Priority,Inflection-Priority
str,str,i64,i64
"""の""","""の""",0,0
"""は""","""は""",1,1
"""が""","""が""",2,2
"""を""","""を""",3,3
"""に""","""に""",4,4


In [85]:
import collections

In [70]:
import glob
from pathlib import Path
import json

In [64]:
dict_folders = glob.glob('../dicts/*')

In [107]:
def parse_dict(json_in, dict_dicts: collections.defaultdict):
    for json_item in json_in:
        if type(json_item[2]) is int:  # Straight up frequency
            dict_dicts[json_item[0]].append(json_item[2])
        elif "frequency" in json_item[2]:
            if type(json_item[2]['frequency']) is dict:
                dict_dicts[json_item[0]].append(json_item[2]['frequency']['value'])
            else:
                dict_dicts[json_item[0]].append(json_item[2]['frequency'])
        elif "value" in json_item[2]:
            dict_dicts[json_item[0]].append(json_item[2]['value'])
            if type(json_item[2]['value']) is dict:
                print(json_item[2]['value'])
            

In [109]:
dict_dicts = collections.defaultdict(list)
for dict_folder in dict_folders:
    print('---')
    for json_file_p in glob.glob(dict_folder + "/*"):
        file = Path(json_file_p)
        if(file.name == 'index.json'):
            continue
        with open(json_file_p, 'r') as f:
            parse_dict(json.load(f), dict_dicts)

---
---
---
---
---
---


In [113]:
mean_dicts = {k: sum(d)//len(d) for k, d in dict_dicts.items()}

In [119]:
in_df = in_df.with_columns(
    pl.col("Morph-Lemma").replace_strict(mean_dicts, default=15000).alias("morph_freq")
)

In [120]:
in_df = in_df.with_columns(
    pl.col("Morph-Inflection").replace_strict(mean_dicts, default=15000).alias("infl_freq")
)

In [132]:
in_df = in_df.sort('infl_freq')
in_df = in_df.with_columns(
    pl.int_range(pl.len(), dtype=pl.UInt32).alias('Inflection-Priority')
)

In [133]:
in_df = in_df.sort('morph_freq')
in_df = in_df.with_columns(
    pl.int_range(pl.len(), dtype=pl.UInt32).alias('Lemma-Priority')
)

In [135]:
in_df.write_csv('freq_sorted_prio.csv')

In [136]:
in_df.filter(pl.col('Morph-Lemma') == '飛ぶ')

Morph-Lemma,Morph-Inflection,Lemma-Priority,Inflection-Priority,morph_freq,infl_freq
str,str,u32,u32,i64,i64
"""飛ぶ""","""飛ぶ""",434,243,844,844
"""飛ぶ""","""飛ん""",435,3601,844,15000
"""飛ぶ""","""飛び""",436,6925,844,51070
