In [1]:
from pprint import pprint as pp
from typing import Dict, List, Tuple

import random
import csv

# Building Knowledge Graph

## Extract Data from CSV

### Extract s7_nodes_kanji_meaning.csv

In [2]:
def get_data_kanji_from_csv(filepath: str):
    data = dict()
    with open(filepath) as file:
        next(file)
        rows = csv.reader(file, delimiter=",")
        for row in rows:
            num,kanji,meaning = row
            data[kanji] = meaning
    return data

In [3]:
data_kanji = get_data_kanji_from_csv(filepath="s7_nodes_kanji_meaning.csv")
print("len(data_kanji) = ", len(data_kanji))
pp(data_kanji)

len(data_kanji) =  2136
{'一': 'one',
 '丁': 'ward',
 '七': 'seven',
 '万': 'ten thousand',
 '丈': 'length',
 '三': 'three',
 '上': 'up',
 '下': 'below',
 '不': 'non-',
 '与': 'give',
 '且': 'moreover',
 '世': 'world',
 '丘': 'hill',
 '丙': 'third class',
 '両': 'both',
 '並': 'line-up',
 '中': 'middle',
 '串': 'skewer',
 '丸': 'circle',
 '丹': 'cinnabar',
 '主': 'master',
 '丼': 'bowl of food',
 '久': 'long time',
 '乏': 'scarce',
 '乗': 'ride',
 '乙': 'the latter',
 '九': 'nine',
 '乞': 'beg',
 '乱': 'riot',
 '乳': 'milk',
 '乾': 'dry',
 '亀': 'turtle',
 '了': 'finish',
 '予': 'in advance',
 '争': 'conflict',
 '事': 'abstract thing',
 '二': 'two',
 '互': 'mutually',
 '五': 'five',
 '井': 'well',
 '亜': 'sub-',
 '亡': 'deceased',
 '交': 'mix',
 '享': 'receive',
 '京': 'capital',
 '亭': 'pavilion',
 '人': 'person',
 '仁': 'humanity',
 '今': 'now',
 '介': 'jammed in',
 '仏': 'Buddha',
 '仕': 'do',
 '他': 'other',
 '付': 'attach',
 '仙': 'hermit',
 '代': 'substitute',
 '令': 'orders',
 '以': 'by means of',
 '仮': 'temporary',
 '仰': 'face-up',
 '

 '荒': 'laid waste',
 '荘': 'villa',
 '荷': 'luggage',
 '菊': 'chrysanthemum',
 '菌': 'germ',
 '菓': 'confectionery',
 '菜': 'vegetable',
 '華': 'splendor',
 '萎': 'wither',
 '落': 'fall',
 '葉': 'leaf',
 '著': 'renowned',
 '葛': 'arrowroot',
 '葬': 'interment',
 '蒸': 'steam',
 '蓄': 'amass',
 '蓋': 'cover',
 '蔑': 'ignore',
 '蔵': 'warehouse',
 '蔽': 'cover',
 '薄': 'dilute',
 '薦': 'recommend',
 '薪': 'fuel',
 '薫': 'fragrant',
 '薬': 'medicine',
 '藍': 'indigo',
 '藤': 'wisteria',
 '藩': 'clan',
 '藻': 'seaweed',
 '虎': 'tiger',
 '虐': 'tyrannize',
 '虚': 'void',
 '虜': 'captive',
 '虞': 'uneasiness',
 '虫': 'insect',
 '虹': 'rainbow',
 '蚊': 'mosquito',
 '蚕': 'silkworm',
 '蛇': 'snake',
 '蛍': 'lightning-bug',
 '蛮': 'barbarian',
 '蜂': 'bee',
 '蜜': 'honey',
 '融': 'dissolve',
 '血': 'blood',
 '衆': 'masses',
 '行': 'go',
 '術': 'art',
 '街': 'street',
 '衛': 'defense',
 '衝': 'collide',
 '衡': 'equilibrium',
 '衣': 'clothes',
 '表': 'express',
 '衰': 'decline',
 '衷': 'inmost',
 '袋': 'sack',
 '袖': 'sleeve',
 '被': 'incur',
 '裁': 'jud

### Extract s7_nodes_radical_meaning.csv

In [4]:
def get_data_radical_from_csv(filepath: str):
    data = dict()
    with open(filepath) as file:
        next(file)
        rows = csv.reader(file, delimiter=",")
        for row in rows:
            radical,meaning,_ = row
            data[radical] = meaning
    return data

In [5]:
data_radical = get_data_radical_from_csv(filepath="s7_nodes_radical_meaning.csv")
print("len(data_radical) = ", len(data_radical))
pp(data_radical)

len(data_radical) =  255
{'.阝': 'right village',
 'マ': 'kana ma',
 'ユ': 'kana yu',
 '世': 'generation',
 '个': 'human roof (top)',
 '丶': 'dot',
 '丷': 'eight-head',
 '丿': 'kana no',
 '乃': 'from',
 '久': 'long time',
 '乙': 'fish hook',
 '九': 'nine',
 '也': 'to be',
 '亅': 'feathered stick',
 '二': 'two',
 '五': 'five',
 '井': 'well',
 '亠': 'lid',
 '亡': 'deceased',
 '人': 'person',
 '亻': 'person',
 '儿': 'human legs',
 '元': 'beginning',
 '免': 'excuse',
 '入': 'enter',
 '八': 'eight',
 '冂': 'upside down box',
 '冊': 'tome',
 '冖': 'crown shaped wa',
 '冫': 'ice',
 '几': 'table',
 '凵': 'open box',
 '刀': 'sword',
 '刂': 'sword',
 '力': 'strong',
 '勹': 'wrapping',
 '勿': 'not',
 '匕': 'spoon',
 '匚': 'sideways box',
 '匸': 'hiding',
 '十': 'cross',
 '卜': 'divining rod',
 '卩': 'seal',
 '厂': 'cliff',
 '厶': 'myself',
 '又': 'again',
 '及': 'reach out',
 '口': 'mouth',
 '品': 'goods',
 '啇': 'antique',
 '囗': 'box',
 '土': 'earth',
 '士': 'samurai',
 '夂': 'winter',
 '夊': 'go slowly',
 '夕': 'evening',
 '大': 'big',
 '奄': 'cover'

### Extract s7_edges_kanji_radical.csv

In [6]:
def get_kanji_radical_edge(filepath: str):
    data = dict()
    with open(filepath) as file:
        next(file)
        rows = csv.reader(file, delimiter=",")
        for row in rows:
            kanji,radical_list = row
            data[kanji] = radical_list.split(':')
    return data

In [7]:
data_edges = get_kanji_radical_edge(filepath="s7_edges_kanji_radical.csv")
print("len(data_edges) = ", len(data_edges))
pp(data_edges)

len(data_edges) =  6215
{'丁': ['亅'],
 '七': ['丿', '乙', '匕'],
 '万': ['｜', '丿'],
 '丈': ['丶', '丿'],
 '三': ['二'],
 '上': ['卜'],
 '下': ['｜', '卜'],
 '不': ['｜', '丶', '丿'],
 '与': ['勹', '卜'],
 '丐': ['弓', '止', '疋'],
 '丑': ['｜', '丿'],
 '且': ['目'],
 '丕': ['｜', '丶', '丿'],
 '世': ['｜'],
 '丗': ['｜', '十', '山'],
 '丘': ['斤'],
 '丙': ['人', '冂'],
 '丞': ['丿', '亅', 'マ'],
 '両': ['｜', '冂', '山'],
 '並': ['｜', '二', '丷'],
 '个': ['｜'],
 '中': ['｜', '口'],
 '丱': ['｜', '丿'],
 '串': ['｜', '口'],
 '丸': ['丶', '九'],
 '丹': ['丶', '丿', '亅'],
 '主': ['丶', '玉'],
 '丼': ['｜', '丶', '丿', '二', '廾', '井'],
 '乂': ['丿'],
 '乃': ['｜', '丿'],
 '久': ['丿', '入'],
 '之': ['丶', '乙', '亠', '廴'],
 '乍': ['｜', '丿'],
 '乎': ['丿', '亅', '丷'],
 '乏': ['丶', '丿', '乙', '亠'],
 '乕': ['｜', '丿', '厂', '巾'],
 '乖': ['丿', '匕', '十', '爿'],
 '乗': ['｜', '丿', '八', '禾'],
 '乘': ['丿', '八', '匕', '十', '爿'],
 '乞': ['丿', '乙'],
 '也': ['｜', '乙', '匕'],
 '乢': ['乙', '山'],
 '乱': ['乙', '口', '舌'],
 '乳': ['乙', '子', '爪'],
 '乾': ['乙', '十', '日'],
 '亀': ['乙', '勹', '田', '龜'],
 '亂': ['乙', '冂', '厶', '

 '嫺': ['女', '日', '門'],
 '嫻': ['女', '木', '門'],
 '嬉': ['口', '士', '女', '豆', '丷'],
 '嬋': ['｜', '十', '口', '女', '田'],
 '嬌': ['丿', '冂', '口', '大', '女'],
 '嬖': ['十', '口', '女', '尸', '立', '辛'],
 '嬢': ['亠', '八', '女', '衣'],
 '嬪': ['丿', '女', '宀', '小', '貝'],
 '嬬': ['女', '而', '雨'],
 '嬰': ['女', '貝'],
 '嬲': ['力', '女', '田'],
 '嬶': ['女', '廾', '田', '目', '自', '鼻'],
 '嬾': ['｜', '八', '口', '女', '木', '貝', '頁'],
 '孀': ['女', '木', '目', '雨'],
 '孃': ['｜', '亠', '口', '女', '衣'],
 '孅': ['人', '女', '戈', '韭'],
 '孔': ['乙', '子'],
 '孕': ['子', '乃'],
 '字': ['子', '宀'],
 '存': ['｜', '丿', '子'],
 '孚': ['子', '爪'],
 '孛': ['冖', '十', '士', '子'],
 '孜': ['子', '攴'],
 '孝': ['子', '老'],
 '孟': ['子', '皿'],
 '季': ['子', '禾'],
 '孤': ['子', '瓜'],
 '孥': ['又', '女', '子'],
 '学': ['冖', '子', '尚'],
 '孩': ['亠', '子'],
 '孫': ['丿', '子', '糸'],
 '孰': ['丶', '亠', '口', '子', '九'],
 '孱': ['子', '尸'],
 '孳': ['子', '幺', '丷'],
 '孵': ['丶', '卩', '子', '爪'],
 '學': ['冖', '子', '臼'],
 '孺': ['｜', '冂', '子', '而', '雨'],
 '它': ['匕', '宀'],
 '宅': ['丿', '乙', '宀'],
 '宇': ['亅', '宀', '干'],


 '注': ['丶', '玉', '氵'],
 '泪': ['目', '氵'],
 '泯': ['口', '氏', '氵'],
 '泰': ['｜', '丿', '二', '水'],
 '泱': ['人', '冂', '大', '氵'],
 '泳': ['丶', '水', '氵'],
 '洋': ['玉', '羊', '丷', '氵'],
 '洌': ['歹', '刂', '氵'],
 '洒': ['襾', '氵'],
 '洗': ['丿', '儿', '土', '氵'],
 '洙': ['｜', '丿', '二', '八', '土', '木', '牛', '氵'],
 '洛': ['口', '夂', '氵'],
 '洞': ['冂', '口', '氵'],
 '洟': ['人', '大', '弓', '氵'],
 '津': ['聿', '氵'],
 '洩': ['丿', '乙', '日', '氵'],
 '洪': ['｜', '二', '八', '氵'],
 '洫': ['皿', '血', '氵'],
 '洲': ['｜', '丶', '巛', '氵'],
 '洳': ['口', '女', '氵'],
 '洵': ['勹', '日', '氵'],
 '洶': ['凵', '勹', '氵'],
 '洸': ['儿', '尚', '氵'],
 '活': ['丿', '十', '口', '舌', '氵'],
 '洽': ['口', '个', '氵'],
 '派': ['厂', '斤', '氵'],
 '流': ['亠', '厶', '巛', '氵'],
 '浄': ['亅', '勹', '彐', '氵'],
 '浅': ['丶', '丿', '毛', '氵'],
 '浙': ['斤', '扌', '氵'],
 '浚': ['八', '厶', '夂', '氵'],
 '浜': ['八', '斤', '氵'],
 '浣': ['二', '儿', '宀', '氵'],
 '浤': ['丿', '厶', '宀', '氵'],
 '浦': ['丶', '十', '用', '氵'],
 '浩': ['丿', '口', '土', '氵'],
 '浪': ['艮', '氵'],
 '浬': ['土', '田', '里', '氵'],
 '浮': ['子', '爪', '氵'],
 '浴

 '肄': ['匕', '矢', '聿'],
 '肅': ['彐', '爿', '片', '聿'],
 '肆': ['厶', '聿', '長'],
 '肇': ['戸', '攴', '聿'],
 '肉': ['人', '冂'],
 '肋': ['力', '月'],
 '肌': ['几', '月'],
 '肓': ['亠', '月', '亡'],
 '肖': ['月', '尚'],
 '肘': ['寸', '月'],
 '肚': ['土', '月'],
 '肛': ['工', '月'],
 '肝': ['干', '月'],
 '股': ['月', '殳'],
 '肢': ['十', '又', '支', '月'],
 '肥': ['月', '巴'],
 '肩': ['戸', '月'],
 '肪': ['方', '月'],
 '肬': ['丶', '尢', '月', '尤'],
 '肭': ['人', '冂', '月'],
 '肯': ['月', '止'],
 '肱': ['丿', '厶', '月'],
 '育': ['亠', '厶', '月'],
 '肴': ['丶', '丿', '月'],
 '肺': ['亠', '巾', '月'],
 '胃': ['月', '田'],
 '胄': ['｜', '日', '月', '田'],
 '胆': ['日', '月'],
 '背': ['匕', '月', '爿'],
 '胎': ['厶', '口', '月'],
 '胖': ['｜', '二', '十', '月', '丷'],
 '胙': ['｜', '丿', '月'],
 '胚': ['｜', '丶', '丿', '月'],
 '胛': ['｜', '日', '月', '田'],
 '胝': ['月', '氏'],
 '胞': ['勹', '己', '月'],
 '胡': ['十', '口', '月'],
 '胤': ['儿', '幺', '月'],
 '胥': ['月', '疋'],
 '胯': ['二', '勹', '大', '月'],
 '胱': ['儿', '月', '尚'],
 '胴': ['冂', '口', '月'],
 '胸': ['丶', '丿', '凵', '勹', '月'],
 '胼': ['｜', '丿', '二', '廾', '月', '丷'],
 '能

 '鴫': ['田', '鳥', '灬'],
 '鴬': ['冖', '鳥', '尚', '灬'],
 '鴻': ['工', '鳥', '氵', '灬'],
 '鴾': ['厶', '牛', '鳥', 'マ', '灬'],
 '鴿': ['口', '鳥', '个', '灬'],
 '鵁': ['亠', '父', '鳥', '灬'],
 '鵄': ['厶', '土', '至', '鳥', '灬'],
 '鵆': ['彳', '行', '鳥', '灬'],
 '鵈': ['耳', '鳥', '灬'],
 '鵐': ['人', '工', '鳥', '灬'],
 '鵑': ['口', '月', '鳥', '灬'],
 '鵙': ['貝', '鳥', '灬'],
 '鵜': ['｜', '丿', '弓', '鳥', '丷', '灬'],
 '鵝': ['亅', '戈', '鳥', '灬'],
 '鵞': ['亅', '戈', '鳥', '灬'],
 '鵠': ['丿', '口', '土', '鳥', '灬'],
 '鵡': ['弋', '戈', '止', '鳥', '灬'],
 '鵤': ['角', '鳥', '灬'],
 '鵬': ['月', '鳥', '灬'],
 '鵯': ['十', '田', '鳥', '灬'],
 '鵲': ['｜', '日', '鳥'],
 '鵺': ['亠', '夕', '衣', '鳥', '灬'],
 '鶇': ['｜', '八', '日', '木', '田', '鳥', '灬'],
 '鶉': ['亠', '口', '子', '鳥', '灬'],
 '鶏': ['人', '土', '大', '爪', '鳥'],
 '鶚': ['二', '勹', '口', '鳥', '灬'],
 '鶤': ['冖', '車', '鳥', '灬'],
 '鶩': ['攴', '矛', '鳥', '灬'],
 '鶫': ['｜', '口', '木', '网', '鳥', '丷', '灬'],
 '鶯': ['冖', '火', '鳥', '灬'],
 '鶲': ['八', '冫', '厶', '羽', '鳥', '灬'],
 '鶴': ['宀', '隹', '鳥', '灬'],
 '鶸': ['冫', '弓', '鳥', '灬'],
 '鶺': ['人', '冫',

## Data Structure

### Class

In [93]:
class KanjigenNode:
    def __init__(self, symbol: str, dtype: str, meaning: str):
        self.symbol  = symbol
        self.dtype   = dtype
        self.meaning = meaning        
        self.color = 'red' if dtype == 'kanji' else 'orange'
        
    def __repr__(self):
        return self.symbol + '*' + self.dtype + '*' + self.meaning
    
    def __str__(self):
        return self.symbol + '*' + self.meaning

### Node Manager

In [94]:
def convert_into_kanjigen_node(nodes, dtype):
    return [KanjigenNode(symbol, dtype, meaning) for symbol, meaning in nodes.items()]

In [95]:
kanjigen_kanji_list   = convert_into_kanjigen_node(data_kanji, 'kanji')
kanjigen_radical_list = convert_into_kanjigen_node(data_radical, 'radical')
kanjigen_nodes = kanjigen_kanji_list + kanjigen_radical_list

In [96]:
kanjigen_kanji = [n for n in kanjigen_nodes if n.dtype == 'kanji']
kanjigen_radical = [n for n in kanjigen_nodes if n.dtype == 'radical']

In [97]:
print("count kanji  = ", len(kanjigen_kanji))
print("count radical = ", len(kanjigen_radical))

count kanji  =  2136
count radical =  255


In [98]:
print(len(kanjigen_nodes))
print(len(kanjigen_kanji) + len(kanjigen_radical))
print(len(set(kanjigen_nodes)))

2391
2391
2391


In [99]:
def get_symbol_dtype(node_repr: str):
    symbol, dtype, meaning = node_repr.split('*')
    return symbol + '*' + dtype

In [100]:
kanjigen_nodes_map = {get_symbol_dtype(n.__repr__()):n for n in kanjigen_nodes}

In [101]:
kanjigen_nodes_map

{'亜*kanji': 亜*kanji*sub-,
 '哀*kanji': 哀*kanji*pathetic,
 '挨*kanji': 挨*kanji*push open,
 '愛*kanji': 愛*kanji*love,
 '曖*kanji': 曖*kanji*not clear,
 '悪*kanji': 悪*kanji*bad,
 '握*kanji': 握*kanji*grip,
 '圧*kanji': 圧*kanji*pressure,
 '扱*kanji': 扱*kanji*handle,
 '宛*kanji': 宛*kanji*allocate,
 '嵐*kanji': 嵐*kanji*storm,
 '安*kanji': 安*kanji*cheap,
 '案*kanji': 案*kanji*plan,
 '暗*kanji': 暗*kanji*dark,
 '以*kanji': 以*kanji*by means of,
 '衣*kanji': 衣*kanji*clothes,
 '位*kanji': 位*kanji*rank,
 '囲*kanji': 囲*kanji*surround,
 '医*kanji': 医*kanji*medicine,
 '依*kanji': 依*kanji*reliant,
 '委*kanji': 委*kanji*committee,
 '威*kanji': 威*kanji*intimidate,
 '為*kanji': 為*kanji*do,
 '畏*kanji': 畏*kanji*fear,
 '胃*kanji': 胃*kanji*stomach,
 '尉*kanji': 尉*kanji*military officer,
 '異*kanji': 異*kanji*uncommon,
 '移*kanji': 移*kanji*shift,
 '萎*kanji': 萎*kanji*wither,
 '偉*kanji': 偉*kanji*admirable,
 '椅*kanji': 椅*kanji*chair,
 '彙*kanji': 彙*kanji*same kind,
 '意*kanji': 意*kanji*idea,
 '違*kanji': 違*kanji*differ,
 '維*kanji': 維*kanji*fiber,

In [104]:
def get_node(kanjigen_nodes, symbol, dtype):
    test_node = symbol + '*' + dtype
    if test_node in kanjigen_nodes_map:
        return kanjigen_nodes_map[test_node]
    return None

In [133]:
print(get_node(kanjigen_nodes, '唖', 'kanji'))

None


### Edges Manager

In [130]:
def convert_into_kanjigen_edges(data_edges: list):
    kanjigen_edges = []
    for kanji, radical_list in data_edges.items():
        kanji_node = get_node(kanjigen_nodes, kanji, 'kanji')
        if kanji_node == None:
            print(kanji)
        for r in radical_list:
            radical_node = get_node(kanjigen_nodes, r, 'radical')
            kanjigen_edges.append( (kanji_node, radical_node) )
    return kanjigen_edges

In [131]:
kanjigen_edges = convert_into_kanjigen_edges(data_edges)

唖
逢
伊
鵜
丑
焔
嘩
樺
鴨
諌
慧
坤
坐
犀
榊
碕
咋
窄
撒
珊
獅
爾
雫
篠
洲
繍
鍾
錘
菅
撰
岨
漕
糟
凧
巽
湛
暢
槌
鎚
壷
紬
剃
悌
梯
鼎
澱
兎
菟
塘
董
蕩
橡
乍
廿
禰
乃
嚢
膿
矧
曝
挽
畢
紐
埠
撫
蕪
淵
弗
糞
瞥
篇
峯
捧
蓬
鋒
沫
也
鑓
柚
莱
蘭
煉
簾
婁
蝋
歪
丕
个
丱
豫
佛
來
俤
倔
倆
假
儂
兩
冉
冓
冲
剏
剌
剩
勳
匣
卅
丗
卍
吽
呷
呻
咄
哄
喘
喇
嗽
嘸
嘯
坏
垪
堙
墟
壙
壤
妍
娉
媚
媾
嫂
嫩
嬋
嬾
孃
孺
屏
岫
岼
崋
崛
崙
崘
嵋
廡
廸
彈
彌
彿
徠
忸
怎
怫
恆
悚
惠
慊
慟
憖
憚
懣
懶
戰
扁
抔
抻
拌
拂
拱
搜
捶
揀
搴
搆
舉
收
攸
敕
敝
數
斷
曄
曩
朏
朮
朿
柞
柮
棘
棗
椪
椣
棆
寨
樓
飮
歉
洙
涕
渊
淒
淪
溂
溏
澑
濂
濔
瀑
瀟
瀰
瀾
炸
烽
熏
燻
燼
爛
狃
狆
狎
狒
璢
瑕
瑾
畍
畊
疥
痞
瘻
眛
睇
睫
瞞
瞶
矗
祟
祚
禪
禮
禳
秉
秣
稱
穰
穽
窶
竦
笨
笄
筰
筱
篝
篳
簍
簣
簫
籟
籥
糶
絆
絣
綸
綰
緞
縣
縋
縷
繖
繼
罘
翩
耘
耙
耜
耡
耨
聘
胛
胙
胄
胚
胖
脯
腆
胼
舳
艚
苡
苒
茉
茱
菫
萋
葭
葮
蒹
蕀
蕭
薛
藪
藉
藕
藾
乕
蚓
蚌
蚰
蛬
蛛
蝙
螻
蠹
衄
袢
裲
褄
褊
襄
褸
覯
誅
誄
諫
諞
謇
譁
譴
讓
賽
賺
贐
赧
赳
跚
踈
踵
蹇
蹕
躰
軆
輛
輌
輾
迚
迪
邇
迸
遐
遘
鄲
醋
醴
醺
釀
釉
釐
鈕
鉞
銖
錏
鍜
鎭
鏤
鑰
閘
閧
闌
陲
霰
靺
鞣
顆
餠
饉
饋
饌
駲
騁
駢
騙
騫
驥
驤
髏
體
髯
髴
髷
鬨
魎
鮓
鰕
鰊
鰥
鰤
鰰
鱧
鵲
鶇
鶫
黜
鼬
槇
鴎
蒲
稀
兇
禽
躯
叉
肴
勺
杓
灼
靭
賎
筑
註
掴
葱
之
柊
豹
鋪
圃
甫
輔
尤
籾
匁
猷
梁
鷲
亙
仞
仭
偬
傅
兔
冤
劔
劒
剱
匆
匍
厖
咏
囈
埔
妁
孰
孵
寃
尨
巉
怱
怺
愡
愽
戍
扠
扨
拔
拆
掖
搏
昶
柝
梵
榑
樒
櫁
樣
毬
泝
溥
漾
澀
瀛
炙
犹
瑟
疣
疼
祕
祓
秡
笂
筺
簗
綛
网
羸
肬
脉
膊
舖
芍
苳
荵
葯


In [118]:
print("len(kanjigen_edges) ", len(kanjigen_edges))
print("len(kanjigen_edges) ", len(set(kanjigen_edges)))

len(kanjigen_edges)  6756
len(kanjigen_edges)  6753


## Graph

In [None]:
import networkx as nx

In [None]:
G = nx.Graph()

In [None]:
G.add_nodes_from(g_nodes)
list(G.nodes())[:10]

In [None]:
G.add_edges_from(g_edges)
list(G.edges())[:10]

In [None]:
for e in list(G.edges()):
    radical, kanji = e
    if kanji.symbol == '痘':
        print(e)
p = get_node(g_nodes, '痘', 'kanji')
[n for n in G.neighbors(p)]

In [None]:
print(nx.info(G))

## Visualization

In [None]:
import matplotlib
import matplotlib.pyplot as plt

### Install Font

In [None]:
import matplotlib.font_manager as fm

# Reference: https://albertauyeung.github.io/2020/03/15/matplotlib-cjk-fonts.html
[f for f in fm.fontManager.ttflist if 'CJK JP' in f.name]

In [None]:
def visualize_graph(Graph: nx.Graph, figsize: tuple=(5,5), color_map: List[str]=None) -> None:
    if color_map == None:
        color_map = [n.color for n in Graph]  
    else:
        color_map = color_map
    
    plt.figure(1,figsize=figsize) 
    nx.draw_kamada_kawai(Graph, node_color=color_map, with_labels=True, node_size=1000, font_size=20,font_family="Noto Serif CJK JP")
    plt.show()

### Sample Visualization

In [None]:
def get_sg_kanji_with() -> nx.Graph:
    sg = nx.Graph()
    p = get_node(g_nodes, '痘', 'kanji')
    radical_p = [n for n in G.neighbors(p)]
    sg.add_nodes_from(radical_p + [p])
    sg.add_edges_from([(p, rp) for rp in radical_p])
    return sg

In [None]:
sg = dict() # sample_graph
sg['full_graph'] = G
sg['sample_nodes'] = lambda size: G.subgraph(random.sample(G.nodes, size))
sg['kanji_plus_radical_neighbor'] = get_sg_kanji_with()

In [None]:
visualize_graph(Graph=sg['kanji_plus_radical_neighbor'], figsize=(5,5))

In [None]:
visualize_graph(Graph=sg['sample_nodes'](180), figsize=(5,5))

In [None]:
visualize_graph(Graph=sg['full_graph'], figsize=(10,10))

# Querying Knowledge Graph

## Exploratory Data Analysis

In [None]:
print(nx.info(G))

In [None]:
# https://networkx.org/documentation/stable/reference/algorithms/component.html

print('number of connected components: ', nx.number_connected_components(G))

# To create the induced subgraph of each component use:
S = [G.subgraph(c).copy() for c in nx.connected_components(G)]

In [None]:
# https://networkx.org/documentation/stable/reference/algorithms/isolates.html
# EDA + Preprocessing: Removing Isolated Nodes

print('number of isolated: ', nx.number_of_isolates(G))


G.remove_nodes_from(list(nx.isolates(G)))

In [None]:
print(nx.info(G))

In [None]:
visualize_graph(Graph=G.subgraph(random.sample(G.nodes, 100)), figsize=(5,5))

## Brute Force Algorithm

In [None]:
kin = get_node(list_node, '怨', 'kanji')
kout = get_node(list_node, '浦', 'kanji')

result_shortest_path = nx.shortest_path(G, source=kin, target=kout)
print(result_shortest_path)

In [None]:
MO_raw = ['怨','姻','桜']
MD_raw = ['浦','奥','媛']

MO = [get_node(list_node, o, 'kanji') for o in MO_raw]
MD = [get_node(list_node, o, 'kanji') for o in MD_raw]

In [None]:
def generate_graph(nodes: List[KanjigenNode]) -> nx.Graph:
    R = nx.Graph()
    R.add_nodes_from(nodes)
    R.add_edges_from(nx.utils.pairwise(nodes))
    return R

In [None]:
def find_path_bf(MO: List[KanjigenNode], MD: List[KanjigenNode]) -> nx.Graph:
    
    result = []
    
    for kin in MO:
        for kout in MD:
            sp_raw = nx.shortest_path(G, source=kin, target=kout, method='dijkstra')
            sp_graph = generate_graph(sp_raw)
            
            result.append(sp_graph)
    
    
    return nx.compose_all(result)        

In [None]:
result = find_path_bf(MO, MD)
result.nodes()

In [None]:
def get_node_color_result(kinputs, koutputs, union_result):
    color_map = []
    for n in union_result:
        if n.symbol in kinputs:
            color_map.append("green")
        elif n.symbol in koutputs:
            color_map.append("blue")
        else:
            color_map.append(n.color)
    return color_map

In [None]:
visualize_graph(Graph=result, color_map=get_node_color_result(MO_raw, MD_raw, result))

#  Heuristic

In [None]:
def find_path_heuristic(MO: List[KanjigenNode], MD: List[KanjigenNode], dist_func: 'function(o,d)') -> nx.Graph:
    
    result = []
    
    for kin in MO:
        for kout in MD:
            sp_raw = nx.astar_path(G, source=kin, target=kout, heuristic=dist_func)
            sp_graph = generate_graph(sp_raw)
            
            result.append(sp_graph)
    
    
    return nx.compose_all(result)        

In [None]:
def dist_func(orig: KanjigenNode, dest: KanjigenNode) -> float:
    (x1, y1) = a
    (x2, y2) = b
    return ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5

## Evaluation

In [None]:
# https://networkx.org/documentation/stable/reference/algorithms/similarity.html