In [1]:
%cd ~/Documents/iss_plp/ISS_PLP_Project

/home/tictactoe/Documents/iss_plp/ISS_PLP_Project


In [3]:
import numpy as np
import pandas as pd
import os, json, time

import nltk
# nltk.download('wordnet')
from nltk.corpus import wordnet as wn
from nltk.stem.wordnet import WordNetLemmatizer

import warnings
warnings.filterwarnings("ignore")

DATASET = '/home/tictactoe/Documents/iss_plp/ISS_PLP_Project'

In [4]:
SAVE_DIR = os.path.join(DATASET, 'WordNet')
if not os.path.exists(SAVE_DIR):
    os.mkdir(SAVE_DIR)

### `Synset`
___
* Synset is a set of synonyms that share a common meaning.

### `Lemma`
___
* Each synset contains one or more lemmas, which represent a specific sense of a specific word.

### `Reference`
___
* [Official documentation](https://www.nltk.org/howto/wordnet.html)

In [4]:
POS_DICT = {
    'verb' : wn.VERB,
    'noun' : wn.NOUN,
    'adj' : wn.ADJ,
    'adv' : wn.ADV
}

LEMMA_POS_DICT = {
    'verb' : 'v',
    'noun' : 'n',
    'adj' : 'a',# head adjective
    'sadj' : 's', # satellite adjective
    'adv' : 'r',
}

def get_synsets(wd, pos=None):
    wd = wd.strip()
    wd = wd.replace(' ', '_')
    if pos is not None:
        if isinstance(pos, str):
            pos = [pos]
        pos = [POS_DICT[p] for p in pos]
        
    return wn.synsets(wd, pos=pos)

def print_defs(syns):
    if not isinstance(syns, list):
        syns = [syns]
    for syn in syns:
        print("%s ---> %s" %(syn.name(), syn.definition()))

def get_hyper(syn, full=True):
    if full:
        return list(syn.closure(lambda s:s.hypernyms()))
    else:
        return syn.hypernyms()

def get_hypo(syn, full=True):
    if full:
        return list(syn.closure(lambda s:s.hyponyms()))
    else:
        return syn.hyponyms()

def get_part_mero(syn):
    return syn.part_meronyms()

def get_substance_mero(syn):
    return syn.substance_meronyms()

def get_part_holo(syn):
    return syn.part_holonyms()

def get_substance_holo(syn):
    return syn.substance_holonyms()

def get_related_forms(syn, wd_only=True):
    # get all lemmas of the synset
    lemmas = syn.lemmas()
    related_forms = [lemma.derivationally_related_forms() for lemma in lemmas]
    related_forms = [form for forms in related_forms for form in forms]
    
    if wd_only:
        related_forms = [form.name() for form in related_forms]
        related_forms = [form.split('.')[-1] for form in related_forms]
    
    return related_forms
    
def split_syns(syns):
    if not isinstance(syns, list):
        syns = [syns]
    syns = [syn.name() for syn in syns]
    syns = [syn.split('.') for syn in syns]
    syns = [[' '.join(wd.split('_')), pos, nn]for wd, pos, nn in syns]
    return syns

def split_lemmas(lemmas):
    lemmas = [lemma.name() for lemma in lemmas]
    lemmas = [lemma.split('.') for lemma in lemmas]
    lemmas = [[' '.join(wd.split('_')), pos, nn]for wd, pos, nn in syns]
    return lemmas

def lemmatize(wd, pos):
    pos = LEMMA_POS_DICT[pos]
    return WordNetLemmatizer().lemmatize(wd, pos)


### **Get terms related to `size`**
___

In [273]:
keywords = ['size', 'small', 'large', 'fit', 'big', 'medium',
           'tight', 'loose',]

# get all synsets and print its definition
for keyword in keywords:
    print("="*50)
    print("Keyword: ", keyword)
    print("="*50)
    size_syns = get_synsets(keyword)
    print_defs(size_syns)

Keyword:  size
size.n.01 ---> the physical magnitude of something (how big it is)
size.n.02 ---> the property resulting from being one of a series of graduated measurements (as of clothing)
size.n.03 ---> any glutinous material used to fill pores in surfaces or to stiffen fabrics
size.n.04 ---> the actual state of affairs
size.n.05 ---> a large magnitude
size.v.01 ---> cover or stiffen or glaze a porous material with size or sizing (a glutinous substance)
size.v.02 ---> sort according to size
size.v.03 ---> make to a size; bring to a suitable size
size.s.01 ---> (used in combination) sized
Keyword:  small
small.n.01 ---> the slender part of the back
small.n.02 ---> a garment size for a small person
small.a.01 ---> limited or below average in number or quantity or magnitude or extent
minor.s.10 ---> limited in size or scope
little.s.03 ---> (of children and animals) young, immature
small.s.04 ---> slight or limited; especially in degree or intensity or scope
humble.s.01 ---> low or infe

In [243]:
selected = [
    'size.n.01',
    'size.n.02',
    'size.n.05',
    'small.n.02',
    'small.a.01',
    'large.n.01',
    'large.a.01',
    'fit.n.03',
    'fit.v.02',
    'fit.v.04',
    'fit.v.07',
    'medium.n.08',
    'average.s.04',
]

In [289]:
## get all selected synsets
selected_syns = [wn.synset(syn) for syn in selected]

## get all hyponyms of selected synsets
hypo_syns = [get_hypo(syn, full=True) for syn in selected_syns]
hypo_syns = [syn for syns in hypo_syns for syn in syns ] # flatten the list

## get all part meronyms of selected synsets
part_mero_syns = [get_part_mero(syn) for syn in selected_syns]
part_mero_syns = [syn for syns in part_mero_syns for syn in syns]

## get all substance meronyms of selected synsets
subs_mero_syns = [get_substance_mero(syn) for syn in selected_syns]
subs_mero_syns = [syn for syns in subs_mero_syns for syn in syns]

## get all part holonyms of selected synsets
part_holo_syns = [get_part_holo(syn) for syn in selected_syns]
part_holo_syns = [syn for syns in part_holo_syns for syn in syns]

## get all substance holonyms of selected synsets
subs_holo_syns = [get_substance_holo(syn) for syn in selected_syns]
subs_holo_syns = [syn for syns in subs_holo_syns for syn in syns]


In [294]:
# combine all synsets
all_syns = selected_syns + hypo_syns + part_mero_syns + subs_mero_syns + part_holo_syns + subs_holo_syns

In [322]:
# get all the lemmas of the synsets
syns_lemmas = [syn.lemmas() for syn in all_syns]
syns_lemmas = [lemma for lemmas in syns_lemmas for lemma in lemmas] # flatten the list
syns_lemmas_names = [lemma.name() for lemma in syns_lemmas] # get just the words 

# define helper function that gets the pos-tag of each lemma
POS_TO_COMMON = {
    'n' : 'noun',
    'a' : 'adj',
    's' : 'adj',
    'v' : 'verb',
    'r' : 'adv'
}

def pos_from_lemma(lemma):
    syn = lemma.synset()
    syn = syn.name()
    pos = syn.split('.')[1]
    
    
    return POS_TO_COMMON[pos]

syns_lemmas_pos = [pos_from_lemma(lemma) for lemma in syns_lemmas]
syns_lemmas_both = [(wd, pos) for wd, pos in zip(syns_lemmas_names, syns_lemmas_pos)]
syns_lemmas_both = list(set(syns_lemmas_both)) # remove duplicate

In [330]:
# get related lemmas
related_lemmas = [get_related_forms(syn, wd_only=False) for syn in all_syns]
related_lemmas = [lemma for lemmas in related_lemmas for lemma in lemmas]

related_lemmas_names = [lemma.name() for lemma in related_lemmas]
related_lemmas_pos = [pos_from_lemma(lemma) for lemma in related_lemmas]
related_lemmas_both = [(wd, pos) for wd, pos in zip(related_lemmas_names, related_lemmas_pos)]
related_lemmas_both = list(set(related_lemmas_both))

In [334]:
# combine all direct and related lemmas
all_lemmas = syns_lemmas_both + related_lemmas_both

# remove duplicate
all_lemmas = list(set(all_lemmas))

# replace _ with space in words
all_lemmas = [(wd.replace('_', ' '), pos) for wd, pos in all_lemmas]

In [359]:
lemmas_df = pd.DataFrame(all_lemmas, columns=['word', 'pos'], index=None)
lemmas_df.to_csv(os.path.join(SAVE_DIR, 'size_both.csv'), index=False)

In [14]:
# split combined lemmas into two separate columns and files
def split_lemmas(src_path, save_dir):
    lemmas_df = pd.read_csv(src_path)
    file_name = src_path.split('/')[-1]
    cat_name = file_name.split('.')[0]
    words, pos = lemmas_df['word'], lemmas_df['pos']
    words.to_csv(os.path.join(save_dir, cat_name+'_wd.csv'), index=False)
    pos.to_csv(os.path.join(save_dir, cat_name+'_pos.csv'), index=False)

In [366]:
split_lemmas(os.path.join(SAVE_DIR, 'size_both.csv'), SAVE_DIR)

In [29]:
# define helper function that gets the pos-tag of each lemma
POS_TO_COMMON = {
    'n' : 'noun',
    'a' : 'adj',
    's' : 'adj',
    'v' : 'verb',
    'r' : 'adv'
}

def pos_from_lemma(lemma):
    syn = lemma.synset()
    syn = syn.name()
    pos = syn.split('.')[1]
    
    try:
        return POS_TO_COMMON[pos]
    except:
        return 'other'

# write an overall function to process a list of selected words
def extract_wordnet(selected, save_loc=None):
    ## get all selected synsets
    selected_syns = [wn.synset(syn) for syn in selected]

    ## get all hyponyms of selected synsets
    hypo_syns = [get_hypo(syn, full=True) for syn in selected_syns]
    hypo_syns = [syn for syns in hypo_syns for syn in syns ] # flatten the list

    ## get all part meronyms of selected synsets
    part_mero_syns = [get_part_mero(syn) for syn in selected_syns]
    part_mero_syns = [syn for syns in part_mero_syns for syn in syns]

    ## get all substance meronyms of selected synsets
    subs_mero_syns = [get_substance_mero(syn) for syn in selected_syns]
    subs_mero_syns = [syn for syns in subs_mero_syns for syn in syns]

    ## get all part holonyms of selected synsets
    part_holo_syns = [get_part_holo(syn) for syn in selected_syns]
    part_holo_syns = [syn for syns in part_holo_syns for syn in syns]

    ## get all substance holonyms of selected synsets
    subs_holo_syns = [get_substance_holo(syn) for syn in selected_syns]
    subs_holo_syns = [syn for syns in subs_holo_syns for syn in syns]
    
    # combine all synsets
    all_syns = selected_syns + hypo_syns + part_mero_syns + subs_mero_syns + part_holo_syns + subs_holo_syns  
    
    # get all the lemmas of the synsets
    syns_lemmas = [syn.lemmas() for syn in all_syns]
    syns_lemmas = [lemma for lemmas in syns_lemmas for lemma in lemmas] # flatten the list
    syns_lemmas_names = [lemma.name() for lemma in syns_lemmas] # get just the words 
    
    syns_lemmas_pos = [pos_from_lemma(lemma) for lemma in syns_lemmas]
    syns_lemmas_both = [(wd, pos) for wd, pos in zip(syns_lemmas_names, syns_lemmas_pos)]
    syns_lemmas_both = list(set(syns_lemmas_both)) # remove duplicate
    
    # get related lemmas
    related_lemmas = [get_related_forms(syn, wd_only=False) for syn in all_syns]
    related_lemmas = [lemma for lemmas in related_lemmas for lemma in lemmas]

    related_lemmas_names = [lemma.name() for lemma in related_lemmas]
    related_lemmas_pos = [pos_from_lemma(lemma) for lemma in related_lemmas]
    related_lemmas_both = [(wd, pos) for wd, pos in zip(related_lemmas_names, related_lemmas_pos)]
    related_lemmas_both = list(set(related_lemmas_both)) # remove duplicate
    
    # combine all direct and related lemmas
    all_lemmas = syns_lemmas_both + related_lemmas_both

    # remove duplicate
    all_lemmas = list(set(all_lemmas))

    # replace _ with space in words
    all_lemmas = [(wd.replace('_', ' '), pos) for wd, pos in all_lemmas]
    
    if save_loc is not None:
        df = pd.DataFrame(all_lemmas, columns=['word','pos'])
        df.to_csv(save_loc, index=False)
        
    return all_lemmas

## **Get terms related to `comfort`**
___


In [9]:
keywords = ['comfort', 'comfortable']

# get all synsets and print its definition
for keyword in keywords:
    print("="*50)
    print("Keyword: ", keyword)
    print("="*50)
    syn = get_synsets(keyword)
    print_defs(syn)

Keyword:  comfort
comfort.n.01 ---> a state of being relaxed and feeling no pain
comfort.n.02 ---> a feeling of freedom from worry or disappointment
consolation.n.02 ---> the act of consoling; giving relief in affliction
ease.n.02 ---> a freedom from financial difficulty that promotes a comfortable state
comfort.n.05 ---> satisfaction or physical well-being provided by a person or thing
quilt.n.01 ---> bedding made of two layers of cloth filled with stuffing and stitched together
comfort.n.07 ---> assistance, such as that provided to an enemy or to a known criminal
comfort.v.01 ---> give moral or emotional strength to
comfort.v.02 ---> lessen pain or discomfort; alleviate
Keyword:  comfortable
comfortable.a.01 ---> providing or experiencing physical well-being or relief (`comfy' is informal)
comfortable.a.02 ---> free from stress or conducive to mental ease; having or affording peace of mind
comfortable.s.03 ---> more than adequate
comfortable.s.04 ---> sufficient to provide comfort
co

In [22]:
# get selected synsets
selected = [
    'comfort.n.01',
    'comfort.n.02',
    'comfort.n.05',
    'comfort.v.02',
    'comfortable.a.01',
    'comfortable.a.02',
    'comfortable.s.04',
]

# extract all related lemmas from wordnet
save_at = os.path.join(SAVE_DIR, 'comfort.csv')
all_lemmas = extract_wordnet(selected, save_loc=save_at)

# split the pos-tags and words
split_lemmas(save_at, SAVE_DIR)

In [23]:
all_lemmas

[('solace', 'noun'),
 ('alleviation', 'noun'),
 ('cozy', 'adj'),
 ('console', 'verb'),
 ('alleviate', 'verb'),
 ('assuage', 'verb'),
 ('consolation', 'noun'),
 ('convenience', 'noun'),
 ('relief', 'noun'),
 ('ease', 'verb'),
 ('silver lining', 'noun'),
 ('easing', 'noun'),
 ('cosiness', 'noun'),
 ('solacement', 'noun'),
 ('reprieve', 'verb'),
 ('cosy', 'adj'),
 ('bright side', 'noun'),
 ('coziness', 'noun'),
 ('comforter', 'noun'),
 ('cold comfort', 'noun'),
 ('ease', 'noun'),
 ('comfort', 'verb'),
 ('easement', 'noun'),
 ('comfortable', 'adj'),
 ('solace', 'verb'),
 ('respite', 'noun'),
 ('snug', 'adj'),
 ('comfortableness', 'noun'),
 ('reprieve', 'noun'),
 ('assuagement', 'noun'),
 ('convenient', 'adj'),
 ('snugness', 'noun'),
 ('comfort', 'noun'),
 ('comfy', 'adj')]

## **Get terms related to `price`**
___


In [24]:
keywords = ['discount', 'purchase', 'offer', 'honest',
           'cheap', 'expensive', 'price', 'worth', 'affordable',
           'pricy', 'cost', 'costly']

# get all synsets and print its definition
for keyword in keywords:
    print("="*50)
    print("Keyword: ", keyword)
    print("="*50)
    syn = get_synsets(keyword)
    print_defs(syn)

Keyword:  discount
discount.n.01 ---> the act of reducing the selling price of merchandise
discount_rate.n.02 ---> interest on an annual basis deducted in advance on a loan
rebate.n.01 ---> a refund of some fraction of the amount paid
deduction.n.02 ---> an amount or percentage deducted
dismiss.v.01 ---> bar from attention or consideration
discount.v.02 ---> give a reduction in price on
Keyword:  purchase
purchase.n.01 ---> the acquisition of something for payment
purchase.n.02 ---> something acquired by purchase
purchase.n.03 ---> a means of exerting influence or gaining advantage
leverage.n.01 ---> the mechanical advantage gained by being in a position to use a lever
buy.v.01 ---> obtain by purchase; acquire by means of a financial transaction
Keyword:  offer
offer.n.01 ---> the verbal act of offering
offer.n.02 ---> something offered (as a proposal or bid)
crack.n.09 ---> a usually brief attempt
offer.v.01 ---> make available or accessible, provide or furnish
offer.v.02 ---> present

In [25]:
# get selected synsets
selected = [
    'discount.n.01',
    'deduction.n.02',
    'discount.v.02',
    'purchase.n.01',
    'purchase.n.02',
    'buy.v.01',
    'offer.v.06',
    'extend.v.04',
    'honest.a.01',
    'honest.s.07',
    'dependable.s.02 ',
    'cheap.a.01',
    'expensive.a.01',
    'monetary_value.n.01',
    'price.n.02',
    'price.n.03',
    'price.n.04',
    'price.v.01',
    'price.v.02',
    'worth.n.01',
    'worth.s.02',
    'low-cost.s.01',
    'costly.s.02',
    'cost.n.01',
    'monetary_value.n.01',
    'cost.v.01',
    'costly.s.02'
]

# extract all related lemmas from wordnet
save_at = os.path.join(SAVE_DIR, 'price.csv')
all_lemmas = extract_wordnet(selected, save_loc=save_at)

# split the pos-tags and words
split_lemmas(save_at, SAVE_DIR)

In [26]:
all_lemmas

[('catalog buying', 'noun'),
 ('takeover', 'noun'),
 ('superannuation', 'noun'),
 ('extort', 'verb'),
 ('combat pay', 'noun'),
 ('trueness', 'noun'),
 ('support payment', 'noun'),
 ('fair', 'adj'),
 ('cost-of-living allowance', 'noun'),
 ('bid price', 'noun'),
 ('highway robbery', 'noun'),
 ('license fee', 'noun'),
 ('retirement check', 'noun'),
 ('sumptuous', 'adj'),
 ('disbursal', 'noun'),
 ('allowance', 'noun'),
 ('subscriber', 'noun'),
 ('mail-order buying', 'noun'),
 ('moorage', 'noun'),
 ('market', 'verb'),
 ('steal', 'noun'),
 ('expensive', 'adj'),
 ('viatical settlement', 'noun'),
 ('minimum wage', 'noun'),
 ('knock back', 'verb'),
 ('modest', 'adj'),
 ('pipage', 'noun'),
 ('interest rate', 'noun'),
 ('freight rate', 'noun'),
 ('baksheesh', 'noun'),
 ('fare', 'noun'),
 ('indemnification', 'noun'),
 ('time plan', 'noun'),
 ('penalty', 'noun'),
 ('admission fee', 'noun'),
 ('contingency fee', 'noun'),
 ('discount', 'verb'),
 ('repayment rate', 'noun'),
 ('transportation', 'noun')

## **Get terms related to `appearance`**
___


In [27]:
keywords = ['beautiful', 'color', 'short', 'long',
           'look', 'looking', 'appear', 'appearance', 'bright',
           'nice', 'cute', 'fashionable', 'outdated', 'design',
           'ugly', 'style', 'stylish', 'sporty', 'tasteful',
           'elegant', 'trendy']

# get all synsets and print its definition
for keyword in keywords:
    print("="*50)
    print("Keyword: ", keyword)
    print("="*50)
    syn = get_synsets(keyword)
    print_defs(syn)

Keyword:  beautiful
beautiful.a.01 ---> delighting the senses or exciting intellectual or emotional admiration
beautiful.s.02 ---> (of weather) highly enjoyable
Keyword:  color
color.n.01 ---> a visual attribute of things that results from the light they emit or transmit or reflect
color.n.02 ---> interest and variety and intensity
color.n.03 ---> the timbre of a musical sound
color.n.04 ---> a race with skin pigmentation different from the white race (especially Blacks)
semblance.n.01 ---> an outward or token appearance or form that is deliberately misleading
coloring_material.n.01 ---> any material used for its color
color.n.07 ---> (physics) the characteristic of quarks that determines their role in the strong interaction
color.n.08 ---> the appearance of objects (or light sources) described in terms of a person's perception of their hue and lightness (or brightness) and saturation
color.v.01 ---> add color to
tinge.v.01 ---> affect as in thought or feeling
color.v.03 ---> modify or

In [30]:
# get selected synsets
selected = [
    'beautiful.a.01',
    'color.n.01',
    'color.n.08',
    'color.v.01',
    'color.v.04',
    'short.a.02',
    'long.a.02',
    'look.n.03',
    'look.v.02',
    'look.v.03',
    'look.v.09',
    'looking.s.01',
    'appearance.n.01',
    'bright.s.02',
    'nice.a.01',
    'cunning.s.01',
    'cute.s.02',
    'fashionable.a.01',
    'stylish.a.01',
    'outdated.s.01',
    'design.n.01',
    'design.n.02',
    'design.n.04',
    'blueprint.n.01',
    'design.v.03',
    'design.v.04',
    'ugly.a.01',
    'style.n.03',
    'style.v.02',
    'stylish.a.01',
    'flashy.s.02',
    'tasteful.a.01',
    'elegant.a.01',
    'trendy.s.01'
]

# extract all related lemmas from wordnet
save_at = os.path.join(SAVE_DIR, 'appearance.csv')
all_lemmas = extract_wordnet(selected, save_loc=save_at)

# split the pos-tags and words
split_lemmas(save_at, SAVE_DIR)

In [31]:
all_lemmas

[('cherry red', 'noun'),
 ('blot', 'verb'),
 ('marking', 'noun'),
 ('sound', 'verb'),
 ('greenish blue', 'noun'),
 ('gold', 'noun'),
 ('waft', 'noun'),
 ('wine-coloured', 'noun'),
 ('blazon', 'noun'),
 ('unsightly', 'adj'),
 ('niceness', 'noun'),
 ('chunking', 'noun'),
 ('showy', 'adj'),
 ('melanoderma', 'noun'),
 ('effect', 'noun'),
 ('salmon', 'noun'),
 ('colour', 'verb'),
 ('fair', 'adj'),
 ('mellow', 'adj'),
 ('wart', 'noun'),
 ('stain', 'verb'),
 ('polka dot', 'noun'),
 ('draft', 'noun'),
 ('sapphire', 'noun'),
 ('chromatic colour', 'noun'),
 ('purpleness', 'noun'),
 ('sallowness', 'noun'),
 ('weave', 'noun'),
 ('glorious', 'adj'),
 ('nevus flammeus', 'noun'),
 ('colophon', 'noun'),
 ('Bermuda plan', 'noun'),
 ('armorial bearing', 'noun'),
 ('grey', 'adj'),
 ('homely', 'adj'),
 ('mark', 'verb'),
 ('defect', 'noun'),
 ('severity', 'noun'),
 ('motley', 'verb'),
 ('dapple-grey', 'noun'),
 ('damascene', 'verb'),
 ('greenishness', 'noun'),
 ('sapphire', 'adj'),
 ('full-of-the-moon', 'n

## **Get terms related to `quality`**
___


In [38]:
keywords = ['material', 'fabric', 'texture', 'shrink','wrinkle',
           'permeable','ventilation','wash', 'cotton', 'lacy', 
           'knit', 'waterproof', 'woollen', 'intricate',
           'weatherproof', 'delicate', 'silk']

# get all synsets and print its definition
for keyword in keywords:
    print("="*50)
    print("Keyword: ", keyword)
    print("="*50)
    syn = get_synsets(keyword)
    print_defs(syn)

Keyword:  material
material.n.01 ---> the tangible substance that goes into the makeup of a physical object
material.n.02 ---> information (data or ideas or observations) that can be used or reworked into a finished form
fabric.n.01 ---> artifact made by weaving or felting or knitting or crocheting natural or synthetic fibers
material.n.04 ---> things needed for doing or making something
material.n.05 ---> a person judged suitable for admission or employment
material.s.01 ---> concerned with worldly rather than spiritual interests
material.a.02 ---> derived from or composed of matter
material.a.03 ---> directly relevant to a matter especially a law case
material.s.04 ---> concerned with or affecting physical as distinct from intellectual or psychological well-being; ; - T.Roosevelt
corporeal.a.01 ---> having material or physical form or substance;  - Benjamin Jowett
substantial.a.03 ---> having substance or capable of being treated as fact; not imaginary; ; ; - Shakespeare
Keyword:  fa

In [39]:
# get selected synsets
selected = [
    'fabric.n.01',
    'texture.n.01',
    'texture.n.02',
    'shrink.v.03',
    'shrink.v.04',
    'shrink.v.05',
    'wrinkle.n.01',
    'wrinkle.v.02',
    'rumple.v.03',
    'permeable.a.01',
    'ventilation.n.01',
    'wash.v.05',
    'cotton.n.02',
    'lacy.s.01',
    'knit.n.01',
    'knit.n.02',
    'knit.n.03',
    'knit.v.01',
    'pucker.v.01',
    'waterproof.n.01',
    'rainproof.s.01',
    'wool.n.01',
    'woolen.a.01',
    'intricate.s.01',
    'weatherproof.v.01',
    'weatherproof.s.01',
    'delicate.s.03',
    'finespun.s.01',
    'silk.n.01',
    'silk.n.02'
]

# extract all related lemmas from wordnet
save_at = os.path.join(SAVE_DIR, 'quality.csv')
all_lemmas = extract_wordnet(selected, save_loc=save_at)

# split the pos-tags and words
split_lemmas(save_at, SAVE_DIR)

In [40]:
all_lemmas

[('worsted', 'noun'),
 ('waterproof', 'noun'),
 ('hopsacking', 'noun'),
 ('wash-and-wear', 'noun'),
 ('quilting', 'noun'),
 ('washable', 'adj'),
 ('yoke', 'noun'),
 ('reduce', 'verb'),
 ('terry', 'noun'),
 ('cotton', 'noun'),
 ("monk's cloth", 'noun'),
 ('miniaturise', 'verb'),
 ('flannel', 'noun'),
 ('ticking', 'noun'),
 ('seam', 'noun'),
 ('furrow', 'noun'),
 ('hanky', 'noun'),
 ('save-all', 'noun'),
 ('velveteen', 'noun'),
 ('damask', 'noun'),
 ('motley', 'verb'),
 ('canopy', 'noun'),
 ('topsail', 'noun'),
 ('ventilate', 'verb'),
 ('sandy', 'adj'),
 ('stockinet', 'noun'),
 ('upholstery material', 'noun'),
 ('lug', 'noun'),
 ('flash', 'noun'),
 ('bristliness', 'noun'),
 ('fore-topsail', 'noun'),
 ('permeability', 'noun'),
 ('twill', 'noun'),
 ('swatch', 'noun'),
 ('miniaturisation', 'noun'),
 ('purse', 'verb'),
 ('huck', 'noun'),
 ('skysail', 'noun'),
 ('jean', 'noun'),
 ('skinny', 'adj'),
 ('denim', 'noun'),
 ('chenille', 'noun'),
 ('vicuna', 'noun'),
 ('dishrag', 'noun'),
 ('Velcro

## **Get terms related to `delivery`**
___


In [42]:
keywords = ['delivery', 'prompt', 'delay', 'delayed', 'package',
           'parcel', 'wrap', 'on_time', 'punctual', 'late',
           'lost', 'mailing', 'shipment', 'dispatch', 'track',
           'address', 'receive']

# get all synsets and print its definition
for keyword in keywords:
    print("="*50)
    print("Keyword: ", keyword)
    print("="*50)
    syn = get_synsets(keyword)
    print_defs(syn)

Keyword:  delivery
delivery.n.01 ---> the act of delivering or distributing something (as goods or mail)
delivery.n.02 ---> the event of giving birth
manner_of_speaking.n.01 ---> your characteristic style or manner of expressing yourself orally
delivery.n.04 ---> the voluntary transfer of something (title or possession) from one party to another
pitch.n.02 ---> (baseball) the act of throwing a baseball by a pitcher to a batter
rescue.n.01 ---> recovery or preservation from loss or danger
delivery.n.07 ---> the act of delivering a child
Keyword:  prompt
prompt.n.01 ---> a cue given to a performer (usually the beginning of the next line to be spoken)
prompt.n.02 ---> (computer science) a symbol that appears on the computer screen to indicate that the computer is ready to receive a command
motivate.v.01 ---> give an incentive for action
prompt.v.02 ---> serve as the inciting cause of
prompt.v.03 ---> assist (somebody acting or reciting) by suggesting the next words of something forgotten 

In [43]:
# get selected synsets
selected = [
    'delivery.n.01',
    'prompt.s.01',
    'prompt.s.02',
    'immediate.s.05',
    'delay.n.01',
    'delay.n.02',
    'delay.v.01',
    'delay.v.02',
    'stay.v.06',
    'package.n.01',
    'package.n.02',
    'parcel.n.02',
    'parcel.v.02',
    'parcel.v.03',
    'wrapping.n.01',
    'envelop.v.01',
    'on_time.r.01',
    'late.a.01',
    'late.r.01',
    'lose.v.01',
    'misplace.v.01',
    'lose.v.05',
    'lose.v.08',
    'lost.a.01',
    'lost.s.05',
    'mailing.n.01',
    'mailing.n.02',
    'mail.v.01',
    'mail.v.02',
    'cargo.n.01',
    'dispatch.n.02',
    'dispatch.n.02',
    'dispatch.v.01',
    'track.v.02',
    'address.n.02',
    'address.n.05',
    'address.n.06',
    'address.v.03',
    'address.v.07',
    'receive.v.01',
]

# extract all related lemmas from wordnet
save_at = os.path.join(SAVE_DIR, 'delivery.csv')
all_lemmas = extract_wordnet(selected, save_loc=save_at)

# split the pos-tags and words
split_lemmas(save_at, SAVE_DIR)

In [44]:
all_lemmas

[('fueling', 'noun'),
 ('dime bag', 'noun'),
 ('stay', 'noun'),
 ('filibuster', 'verb'),
 ('enfolding', 'noun'),
 ('lose', 'verb'),
 ('Saran Wrap', 'noun'),
 ('pack', 'verb'),
 ('have', 'verb'),
 ('insert', 'verb'),
 ('homely', 'adj'),
 ('take up', 'verb'),
 ('call', 'verb'),
 ('on time', 'adv'),
 ('sendee', 'noun'),
 ('hesitate', 'verb'),
 ('suspend', 'verb'),
 ('lateness', 'noun'),
 ('airmailer', 'noun'),
 ('interjection', 'noun'),
 ('stonewall', 'verb'),
 ('defer', 'verb'),
 ('cellophane', 'noun'),
 ('enshroud', 'verb'),
 ("drag one's heels", 'verb'),
 ('relay', 'noun'),
 ('reship', 'verb'),
 ('freight', 'noun'),
 ('accept', 'verb'),
 ('stalling', 'noun'),
 ('dawdling', 'noun'),
 ('cunctation', 'noun'),
 ('serving', 'noun'),
 ('benight', 'verb'),
 ('lost', 'adj'),
 ('hay bale', 'noun'),
 ('fuel', 'verb'),
 ('bale', 'verb'),
 ('stall', 'verb'),
 ('bringing', 'noun'),
 ('gap', 'noun'),
 ('losings', 'noun'),
 ('gift wrapping', 'noun'),
 ('barracking', 'noun'),
 ('postal', 'adj'),
 ('ma

## **Get terms related to `functionality`**
___


In [47]:
keywords = [
    'pocket', 'zipper', 'hold', 'organized','card', 'functional',
    'multipurpose', 'convenient', 'convenience', 'versatile',
    'useful', 'cumbersome', 'simple', 'easy',
    'simplicity', 'unwieldy'
]

# get all synsets and print its definition
for keyword in keywords:
    print("="*50)
    print("Keyword: ", keyword)
    print("="*50)
    syn = get_synsets(keyword)
    print_defs(syn)

Keyword:  pocket
pocket.n.01 ---> a small pouch inside a garment for carrying small articles
pouch.n.02 ---> an enclosed space
pocket.n.03 ---> a supply of money
pocket.n.04 ---> (bowling) the space between the headpin and the pins behind it on the right or left
scoop.n.02 ---> a hollow concave shape made by removing something
air_pocket.n.01 ---> a local region of low pressure or descending air that causes a plane to lose height suddenly
pocket.n.07 ---> a small isolated group of people
pouch.n.03 ---> (anatomy) saclike structure in any of various animals (as a marsupial or gopher or pelican)
pocket.n.09 ---> an opening at the corner or on the side of a billiard table into which billiard balls are struck
pocket.v.01 ---> put in one's pocket
pocket.v.02 ---> take unlawfully
Keyword:  zipper
slide_fastener.n.01 ---> a fastener for locking together two toothed edges by means of a sliding tab
zip_up.v.01 ---> close with a zipper
Keyword:  hold
clasp.n.02 ---> the act of grasping
appreciat

In [49]:
# get selected synsets
selected = [
    'pocket.n.01',
    'pouch.n.02',
    'pocket.n.04',
    'slide_fastener.n.01',
    'keep.v.01',
    'hold.v.02',
    'restrain.v.03',
    'retain.v.03',
    'hold.v.10',
    'hold.v.14',
    'contain.v.05',
    'organized.a.01',
    'organized.a.02',
    'organize.v.02',
    'organize.v.04',
    'card.n.02',
    'functional.a.01',
    'functional.s.04',
    'functional.s.05',
    'multipurpose.s.01',
    'convenient.a.01',
    'commodious.a.01',
    'convenience.n.02',
    'appliance.n.01',
    'versatile.s.01',
    'useful.a.01',
    'utilitarian.s.01',
    'cumbersome.s.01',
    'simple.a.01',
    'elementary.s.01',
    'simple.s.07',
    'easy.a.01',
    'easily.r.01',
    'simplicity.n.01',
    'unwieldy.a.01',
]

# extract all related lemmas from wordnet
save_at = os.path.join(SAVE_DIR, 'functionality.csv')
all_lemmas = extract_wordnet(selected, save_loc=save_at)

# split the pos-tags and words
split_lemmas(save_at, SAVE_DIR)

In [50]:
all_lemmas

[('sac', 'noun'),
 ('membership card', 'noun'),
 ('hang', 'verb'),
 ('synchronise', 'verb'),
 ('buoy up', 'verb'),
 ('grasp', 'verb'),
 ('collectivize', 'verb'),
 ('retention', 'noun'),
 ('utilitarian', 'adj'),
 ('easiness', 'noun'),
 ('accessible', 'adj'),
 ('pouch', 'noun'),
 ('keep up', 'verb'),
 ('hug', 'noun'),
 ('sack', 'noun'),
 ('patness', 'noun'),
 ('distance', 'noun'),
 ('reorganization', 'noun'),
 ('carry on', 'verb'),
 ('widget', 'noun'),
 ('shackle', 'verb'),
 ('pose', 'noun'),
 ('contraption', 'noun'),
 ('perpetuate', 'verb'),
 ('cradle', 'noun'),
 ('bosom', 'noun'),
 ('plug away', 'verb'),
 ('patch pocket', 'noun'),
 ('usableness', 'noun'),
 ('coordinate', 'verb'),
 ('synchronize', 'verb'),
 ('usable', 'adj'),
 ('pressurise', 'verb'),
 ('reorganise', 'verb'),
 ('organise', 'verb'),
 ('reorganize', 'verb'),
 ('stoop', 'verb'),
 ('holder', 'noun'),
 ('persistency', 'noun'),
 ('bracing', 'noun'),
 ('housekeep', 'verb'),
 ('uphold', 'verb'),
 ('hang in', 'verb'),
 ('carry', 