In [9]:
import spacy 
import nltk
nltk.download('wordnet')
from nltk.corpus import wordnet as wn
from collections import Counter
import numpy as np


# Load spaCy model
nlp= spacy.load('en_core_web_sm')

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/hivagheisari/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


## Synonyms:

In [10]:
def get_synonyms(word):
    synonyms=set()
    for syn in wn.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())
    return synonyms

In [11]:
get_synonyms('Car')

{'auto',
 'automobile',
 'cable_car',
 'car',
 'elevator_car',
 'gondola',
 'machine',
 'motorcar',
 'railcar',
 'railroad_car',
 'railway_car'}

In [12]:
print(f" wn.synsets('Car') is {wn.synsets('Car')}")

 wn.synsets('Car') is [Synset('car.n.01'), Synset('car.n.02'), Synset('car.n.03'), Synset('car.n.04'), Synset('cable_car.n.01')]


In [13]:
for syn in wn.synsets('Car'):
    print(syn.lemmas())

[Lemma('car.n.01.car'), Lemma('car.n.01.auto'), Lemma('car.n.01.automobile'), Lemma('car.n.01.machine'), Lemma('car.n.01.motorcar')]
[Lemma('car.n.02.car'), Lemma('car.n.02.railcar'), Lemma('car.n.02.railway_car'), Lemma('car.n.02.railroad_car')]
[Lemma('car.n.03.car'), Lemma('car.n.03.gondola')]
[Lemma('car.n.04.car'), Lemma('car.n.04.elevator_car')]
[Lemma('cable_car.n.01.cable_car'), Lemma('cable_car.n.01.car')]


In [14]:
for syn in wn.synsets('Car'):
    #print(syn.lemmas())
    for lemma in syn.lemmas():
        print(lemma.name())

car
auto
automobile
machine
motorcar
car
railcar
railway_car
railroad_car
car
gondola
car
elevator_car
cable_car
car


In [15]:
def preprocess_text(text):
    doc = nlp(text.lower())
    lemmatized_words = []
    for token in doc:
        if token.is_stop or token.is_punct:
            continue
        lemmatized_words.append(token.lemma_)
    return lemmatized_words

In [16]:
text= '"Text" can refer to the written words on a page, a written message, or even a broader concept of any object that can be "read" and interpreted. It can also refer to the act of sending a written message on a mobile phone. '

In [17]:
text_l=text.lower()
text_l

'"text" can refer to the written words on a page, a written message, or even a broader concept of any object that can be "read" and interpreted. it can also refer to the act of sending a written message on a mobile phone. '

In [18]:
doc= nlp(text_l)

In [28]:
for token in doc:
    #print((token.lemma_))
    pass

In [24]:
preprocess_text(text_l)

['text',
 'refer',
 'write',
 'word',
 'page',
 'write',
 'message',
 'broad',
 'concept',
 'object',
 'read',
 'interpret',
 'refer',
 'act',
 'send',
 'write',
 'message',
 'mobile',
 'phone']

In [35]:
type(preprocess_text(text_l))

list

In [38]:
def expand_with_syn(words):
    expand_w= words.copy()
    for w in words:
        expand_w.extend(get_synonyms(w))
    return(expand_w)

In [39]:
expand_with_syn(preprocess_text(text_l))

['text',
 'refer',
 'write',
 'word',
 'page',
 'write',
 'message',
 'broad',
 'concept',
 'object',
 'read',
 'interpret',
 'refer',
 'act',
 'send',
 'write',
 'message',
 'mobile',
 'phone',
 'schoolbook',
 'text_edition',
 'textbook',
 'text',
 'school_text',
 'textual_matter',
 'pertain',
 'touch',
 'touch_on',
 'concern',
 'bring_up',
 'name',
 'consult',
 'look_up',
 'refer',
 'advert',
 'cite',
 'denote',
 'relate',
 'bear_on',
 'have-to_doe_with',
 'mention',
 'come_to',
 'spell',
 'drop_a_line',
 'save',
 'write',
 'indite',
 'publish',
 'compose',
 'pen',
 'Bible',
 'give-and-take',
 'Holy_Writ',
 'Word',
 'give_voice',
 'watchword',
 'articulate',
 'word',
 'intelligence',
 'Son',
 'phrase',
 'countersign',
 'Good_Book',
 'news',
 'tidings',
 'Logos',
 'discussion',
 'word_of_honor',
 'password',
 'parole',
 'formulate',
 'Christian_Bible',
 'Book',
 'Word_of_God',
 'Scripture',
 'Holy_Scripture',
 'pageboy',
 'Sir_Frederick_Handley_Page',
 'paginate',
 'Thomas_Nelson_Page