# RememberKanji: traditional method

Creates mnemonic sentences by sampling sentences from a dataset.

In [1]:
import json
import logging
import os
import string

In [2]:
logging.basicConfig(level=logging.DEBUG)

## load data

In [4]:
## kanji data
with open('data/kanji.json', encoding='utf8') as f:
    kanji_data = json.load(f)
    
logging.info(f"amount of kanji: {len(kanji_data)}")

INFO:root:amount of kanji: 13108


In [5]:
## sentences
directory = './data/more_sentences/data'

string_sen = ""
for filename in os.listdir(directory):
    path = os.path.join(directory, filename)
    if os.path.isfile(path):
        with open(path, errors='ignore') as f:
            string_sen =  string_sen + f.read()

# remove all not-sentences
string_sen = string_sen.replace("### abstract ###", "")
string_sen = string_sen.replace("### introduction ###", "")
string_sen = string_sen.replace("CITATION", "")
string_sen = string_sen.replace(" ,", "")

# split into sentences
sentences = string_sen.split(".")

# bring back split delimiter (period)
sentences = [sentences[i].rstrip(' ') + "." for i in range(len(sentences))]

logging.info(f"amount of sentences: {len(sentences)}")

INFO:root:amount of sentences: 13650


In [6]:
## sentences
with open('data/stories.json') as f:
    sentences_data = json.load(f)

for story in sentences_data:
    for s in story['story'].split('.'):
        sentences.append(s + '.')

logging.info(f"amount of sentences: {len(sentences)}")

INFO:root:amount of sentences: 15159


In [7]:
## sentences
with open('data/sentences.json') as f:
    sentences_data = json.load(f)

for s in sentences_data:
    for question in s['questions']:
        for context in question['context']:
            sentences.append(context['text'])

logging.info(f"amount of sentences: {len(sentences)}")

INFO:root:amount of sentences: 39046


## execute task

In [8]:
## gets set of keywords from kanji character
def get_keywords(character):
    
    # check if character is present in dataset
    if character in kanji_data.keys():
        char_properties = kanji_data[character]
        
        # check if radicals are available
        if char_properties['wk_radicals'] != None:
            
            # add meaning as keyword
            keywords = [char_properties['wk_meanings'][0].lower()]
            
            # add radicals as keywords
            [keywords.append(rad.lower()) for rad in char_properties['wk_radicals']]
            logging.info(f"Keywords: {keywords}")
            return keywords
        else:
            logging.info("Radicals not available for this character. Try another one.")
            return []
    else:
        logging.info("Character not available. Try another one.")
        return []

In [14]:
## get keywords from given kanji
keywords = get_keywords(input("Insert a character: "))

INFO:root:Keywords: ['wear', 'horns', 'king', 'slide', 'eye']


In [15]:
## check for sentences
possible_sen = [sentences]
included_keywords = []
included_keywords = []
final_result = "No sentence found."

# find sentences with keywords
for i in range(len(keywords)):
    matching_sen = list(filter(lambda s: keywords[i] in s, possible_sen[i]))
    possible_sen.append(matching_sen)
    if len(matching_sen):
        final_result = matching_sen[0]
        included_keywords.append(keywords[i])


print(f"Sentence: {final_result}")
print(f"Included keywords: {included_keywords}")

Sentence:  He was very angry to think that any animal that he chose for a meal, should be so brazen as to wear such dangerous things as horns to scratch him while he ate.
Included keywords: ['wear', 'horns']
