# Wordnet Exploration

Simple utility for browsing wordnet data and relations.

In [None]:
import notebooks.util.nbloader
import notebooks.nb.utils as nb_utils

import ipywidgets as widgets
from ipywidgets import interact, interact_manual

import json
import numpy as np
import os
import pandas as pd
from datetime import datetime

import dataknobs.utils.resource_utils as resource_utils
import dataknobs.utils.elasticsearch_utils as es_utils
import dataknobs.structures.tree as dk_tree

In [None]:
DATADIR = resource_utils.active_datadir()
ELASTICSEARCH_BATCHFILE = f'{DATADIR}/dev/wordnet/es_bulk.nltk.sgloss.jsonl'
WORDNET_DF = es_utils.collect_batchfile_records(ELASTICSEARCH_BATCHFILE)

wn = resource_utils.get_nltk_wordnet()

cur_synset = None
cur_relation = None
cur_tree = None
cur_df = WORDNET_DF
cur_pattern = ''
cur_column = 'synset_name'
cur_search = True
cur_case = False

RELATIONS = [
    ('hypernym', lambda s: s.hypernyms()),
    ('hyponym', lambda s: s.hyponyms()),
    ('instance_hypernyms', lambda s: s.instance_hypernyms()),
    ('instance_hyponyms', lambda s: s.instance_hyponyms()),
    ('member_holonyms', lambda s: s.member_holonyms()),
    ('member_meronyms', lambda s: s.member_meronyms()),
    ('part_holonyms', lambda s: s.part_holonyms()),
    ('part_meronyms', lambda s: s.part_meronyms()),
    ('also_sees', lambda s: s.also_sees()),
    ('attributes', lambda s: s.attributes()),
    ('causes', lambda s: s.causes()),
    ('entailments', lambda s: s.entailments()),
    ('in_region_domains', lambda s: s.in_region_domains()),
    ('in_topic_domains', lambda s: s.in_topic_domains()),
    ('in_usage_domains', lambda s: s.in_usage_domains()),
    ('region_domains', lambda s: s.region_domains()),
    ('topic_domains', lambda s: s.topic_domains()),
    ('usage_domains', lambda s: s.usage_domains()),
    ('similar_tos', lambda s: s.similar_tos()),
    ('substance_holonyms', lambda s: s.substance_holonyms()),
    ('substance_meronyms', lambda s: s.substance_meronyms()),
    ('verb_groups', lambda s: s.verb_groups()),
]

def get_relations(synset):
    return [
        (x, y)
        for x, y in RELATIONS
        if len(y(synset)) > 0
    ]

def select_matches(pattern, column, search=True, case=False, df=WORDNET_DF):
    sdf = None
    col_dtype = df[column].dtype
    if pd.api.types.is_string_dtype(col_dtype):
        if search:
            sdf = df[df[column].str.contains(pattern, case=case)]
        else:
            sdf = df[df[column].str.match(pattern, case=case)]
    elif pd.api.types.is_integer_dtype(col_dtype):
        sdf = df[df[column] == int(pattern)]
    return sdf

In [None]:
@interact_manual(
    column = WORDNET_DF.columns.to_list()
)
def dataframe_browser(
    reset=True, pattern=cur_pattern, column=cur_column,
    search=cur_search, case=cur_case
):
    global cur_df, cur_pattern, cur_column, cur_search, cur_case, cur_synset, cur_relation, cur_tree
    cur_pattern = pattern
    cur_column = column
    cur_search = search
    cur_case = case
    if reset:
        cur_df = WORDNET_DF
    if pattern:
        cur_df = select_matches(
            cur_pattern, cur_column, search=cur_search, case=cur_case, df=cur_df
        )
    display(cur_df)
    
    if cur_df is not None and len(cur_df) > 0:
        cur_sname = cur_df.iloc[0]['synset_name']
        print(f'Relations for "{cur_sname}":')
        cur_synset = wn.synset(cur_sname)
        @interact
        def explore_synset():
            global cur_relation, cur_tree
            @interact(
                relation=get_relations(cur_synset)
            )
            def show_relation_tree(relation):
                global cur_relation, cur_tree
                cur_relation = relation
                cur_tree = dk_tree.build_tree_from_list(cur_synset.acyclic_tree(relation, cut_mark='...'))
                print(cur_tree)