In [8]:
import networkx as nx
from functions import *
import matplotlib.pyplot as plt

In [9]:
foldername = '../data/Official_HSK/'
from_level = 1
to_level = 4
filenames = create_filename_list(foldername=foldername, from_level=from_level, to_level=to_level)

In [10]:
list_of_all_words = []
for filename in filenames:
    list_of_all_words += csv_to_list_of_words(filename)

In [11]:
graph = nx.Graph()
for w in list_of_all_words:
    if len(w.chinese) == 2 and w.chinese[0] != w.chinese[1]:
        graph.add_edge(w.chinese[0].encode('utf8').decode('utf8'), w.chinese[1].encode('utf8').decode('utf8'))

In [12]:
labels = {i: n for i, n in enumerate(graph.nodes())}
mapping = {n: i for i, n in enumerate(graph.nodes())}

nx.relabel_nodes(graph, mapping, copy=False)

<networkx.classes.graph.Graph at 0x21affa3f040>

In [13]:
chinese_to_word = {w.chinese: w for w in list_of_all_words}

In [14]:
cliques = nx.find_cliques(graph)
all_words_from_cliques = []

for clique in sorted(list(cliques), key=lambda x: len(x), reverse=True):
    if len(clique) > 2:
        # print('=========================')
        this_subgraph = nx.subgraph(graph, clique)
        words_here = []
        for edge in this_subgraph.edges:
            chinese1 = labels[edge[0]] + labels[edge[1]]
            chinese2 = labels[edge[1]] + labels[edge[0]]
            if chinese1 in chinese_to_word:
                # print(word_to_string(chinese_to_word[chinese1]))
                words_here.append(chinese_to_word[chinese1])
            if chinese2 in chinese_to_word:
                # print(word_to_string(chinese_to_word[chinese2]))
                words_here.append(chinese_to_word[chinese2])
        all_words_from_cliques.append(words_here)

print('All cliques found in the graph:')
for cl in sorted(all_words_from_cliques, key = lambda x: len(x), reverse=True):
    print('=========================')
    for w in cl:
        print(word_to_string(w))

All cliques found in the graph:
爱情    	 [àiqíng] - 	Love
爱心    	 [àixīn] - 	love
热爱    	 [rè’ài] - 	Ardently love
热情    	 [rèqíng] - 	Enthusiasm
热心    	 [rèxīn] - 	enthusiastic
心情    	 [xīnqíng] - 	mood
年初    	 [nián chū] - 	beginning of the year
初中    	 [chū zhōng] - 	junior high school
初级    	 [chūjí] - 	primary
中年    	 [zhōng nián] - 	middle age,  middle-aged
中级    	 [zhōng jí] - 	intermediate,  middle-level
年级    	 [niánjí] - 	grade
东北    	 [dōng běi] - 	northeast
东西    	 [dōngxi] - 	thing
东部    	 [dōng bù] - 	east
西北    	 [xī běi] - 	northwest
西部    	 [xī bù] - 	west
北部    	 [běi bù] - 	north
东北    	 [dōng běi] - 	northeast
东方    	 [dōng fāng] - 	east
东西    	 [dōngxi] - 	thing
西北    	 [xī běi] - 	northwest
西方    	 [xī fāng] - 	west
北方    	 [běifāng] - 	north
东北    	 [dōng běi] - 	northeast
东边    	 [dōng biān] - 	east side
东西    	 [dōngxi] - 	thing
西北    	 [xī běi] - 	northwest
西边    	 [xī biān] - 	west,  west side
北边    	 [běi biān] - 	North side
东边    	 [dōng biān] - 	east side
东