In [9]:
import spacy 
import nltk
nltk.download('wordnet')
from nltk.corpus import wordnet as wn
from collections import Counter
import numpy as np


# Load spaCy model
nlp= spacy.load('en_core_web_sm')

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/hivagheisari/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


## Synonyms:

In [10]:
def get_synonyms(word):
    synonyms=set()
    for syn in wn.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())
    return synonyms

In [11]:
get_synonyms('Car')

{'auto',
 'automobile',
 'cable_car',
 'car',
 'elevator_car',
 'gondola',
 'machine',
 'motorcar',
 'railcar',
 'railroad_car',
 'railway_car'}

In [12]:
print(f" wn.synsets('Car') is {wn.synsets('Car')}")

 wn.synsets('Car') is [Synset('car.n.01'), Synset('car.n.02'), Synset('car.n.03'), Synset('car.n.04'), Synset('cable_car.n.01')]


In [13]:
for syn in wn.synsets('Car'):
    print(syn.lemmas())

[Lemma('car.n.01.car'), Lemma('car.n.01.auto'), Lemma('car.n.01.automobile'), Lemma('car.n.01.machine'), Lemma('car.n.01.motorcar')]
[Lemma('car.n.02.car'), Lemma('car.n.02.railcar'), Lemma('car.n.02.railway_car'), Lemma('car.n.02.railroad_car')]
[Lemma('car.n.03.car'), Lemma('car.n.03.gondola')]
[Lemma('car.n.04.car'), Lemma('car.n.04.elevator_car')]
[Lemma('cable_car.n.01.cable_car'), Lemma('cable_car.n.01.car')]


In [14]:
for syn in wn.synsets('Car'):
    #print(syn.lemmas())
    for lemma in syn.lemmas():
        print(lemma.name())

car
auto
automobile
machine
motorcar
car
railcar
railway_car
railroad_car
car
gondola
car
elevator_car
cable_car
car


In [15]:
def preprocess_text(text):
    doc = nlp(text.lower())
    lemmatized_words = []
    for token in doc:
        if token.is_stop or token.is_punct:
            continue
        lemmatized_words.append(token.lemma_)
    return lemmatized_words

In [16]:
text= '"Text" can refer to the written words on a page, a written message, or even a broader concept of any object that can be "read" and interpreted. It can also refer to the act of sending a written message on a mobile phone. '

In [17]:
text_l=text.lower()
text_l

'"text" can refer to the written words on a page, a written message, or even a broader concept of any object that can be "read" and interpreted. it can also refer to the act of sending a written message on a mobile phone. '

In [18]:
doc= nlp(text_l)

In [21]:
for token in doc:
    print(list(token.lemma_))

['"']
['t', 'e', 'x', 't']
['"']
['c', 'a', 'n']
['r', 'e', 'f', 'e', 'r']
['t', 'o']
['t', 'h', 'e']
['w', 'r', 'i', 't', 'e']
['w', 'o', 'r', 'd']
['o', 'n']
['a']
['p', 'a', 'g', 'e']
[',']
['a']
['w', 'r', 'i', 't', 'e']
['m', 'e', 's', 's', 'a', 'g', 'e']
[',']
['o', 'r']
['e', 'v', 'e', 'n']
['a']
['b', 'r', 'o', 'a', 'd']
['c', 'o', 'n', 'c', 'e', 'p', 't']
['o', 'f']
['a', 'n', 'y']
['o', 'b', 'j', 'e', 'c', 't']
['t', 'h', 'a', 't']
['c', 'a', 'n']
['b', 'e']
['"']
['r', 'e', 'a', 'd']
['"']
['a', 'n', 'd']
['i', 'n', 't', 'e', 'r', 'p', 'r', 'e', 't']
['.']
['i', 't']
['c', 'a', 'n']
['a', 'l', 's', 'o']
['r', 'e', 'f', 'e', 'r']
['t', 'o']
['t', 'h', 'e']
['a', 'c', 't']
['o', 'f']
['s', 'e', 'n', 'd']
['a']
['w', 'r', 'i', 't', 'e']
['m', 'e', 's', 's', 'a', 'g', 'e']
['o', 'n']
['a']
['m', 'o', 'b', 'i', 'l', 'e']
['p', 'h', 'o', 'n', 'e']
['.']
