In [1]:
!pip install spacy



In [2]:
!python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [3]:
import spacy

In [6]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m34.9 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [9]:
nlp = spacy.load("en_core_web_sm")

In [10]:
txt=nlp('GFG is looking for Data Science Interns')

### Tokenization

In [13]:
for token in txt:
  print(token.text)

GFG
is
looking
for
Data
Science
Interns


### Parts of speech tagging

In [14]:
for token in txt:
  print(token.text,token.pos_)

GFG PROPN
is AUX
looking VERB
for ADP
Data PROPN
Science PROPN
Interns PROPN


### Sentence tokenization

In [21]:
txt=nlp('qwertyuiop . asdfghjkl')
for sent in txt.sents:
  print(sent)

qwertyuiop .
asdfghjkl


### stop word removal with spacy

In [22]:
nlp.Defaults.stop_words

{"'d",
 "'ll",
 "'m",
 "'re",
 "'s",
 "'ve",
 'a',
 'about',
 'above',
 'across',
 'after',
 'afterwards',
 'again',
 'against',
 'all',
 'almost',
 'alone',
 'along',
 'already',
 'also',
 'although',
 'always',
 'am',
 'among',
 'amongst',
 'amount',
 'an',
 'and',
 'another',
 'any',
 'anyhow',
 'anyone',
 'anything',
 'anyway',
 'anywhere',
 'are',
 'around',
 'as',
 'at',
 'back',
 'be',
 'became',
 'because',
 'become',
 'becomes',
 'becoming',
 'been',
 'before',
 'beforehand',
 'behind',
 'being',
 'below',
 'beside',
 'besides',
 'between',
 'beyond',
 'both',
 'bottom',
 'but',
 'by',
 'ca',
 'call',
 'can',
 'cannot',
 'could',
 'did',
 'do',
 'does',
 'doing',
 'done',
 'down',
 'due',
 'during',
 'each',
 'eight',
 'either',
 'eleven',
 'else',
 'elsewhere',
 'empty',
 'enough',
 'even',
 'ever',
 'every',
 'everyone',
 'everything',
 'everywhere',
 'except',
 'few',
 'fifteen',
 'fifty',
 'first',
 'five',
 'for',
 'former',
 'formerly',
 'forty',
 'four',
 'from',
 'fron

In [23]:
len(nlp.Defaults.stop_words)

326

### checking whether a word is stopword or not

In [25]:
nlp.vocab['is'].is_stop

True

### Adding custom words to list of stopwords

In [32]:
nlp.Defaults.stop_words.add('phone')
nlp.vocab['phone'].is_stop=True

In [33]:
nlp.vocab['phone'].is_stop

True

In [34]:
len(nlp.Defaults.stop_words)

327

### removing custom word from list of stopwords

In [35]:
nlp.Defaults.stop_words.remove('phone')
nlp.vocab['phone'].is_stop=False

In [36]:
nlp.vocab['phone'].is_stop

False

In [37]:
len(nlp.Defaults.stop_words)

326

### finding stopwords from corpus

In [40]:
txt=nlp('Install a default trained pipeline package, get the code to load it from within spaCy and an example to test it. For more options, see the section on available packages below.')

In [42]:
stopwords=[]
for token in txt:
  if token.is_stop:
    stopwords.append(token.text)

print(set(stopwords))

{'and', 'For', 'more', 'it', 'a', 'get', 'on', 'within', 'the', 'see', 'an', 'to', 'from', 'below'}


### finding words that doesn't belong to stopwords

In [43]:
for token in txt:
  if not token.is_stop:
    print(token.text)

Install
default
trained
pipeline
package
,
code
load
spaCy
example
test
.
options
,
section
available
packages
.


In [45]:
' '.join([token.text for token in txt if not token.is_stop])

'Install default trained pipeline package , code load spaCy example test . options , section available packages .'

### Synonyms and Antonyms

In [46]:
import nltk

In [47]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [48]:
nltk.download('omw-1.4')

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [49]:
from nltk.corpus import wordnet

In [50]:
syn=wordnet.synsets('Book')

In [51]:
syn

[Synset('book.n.01'),
 Synset('book.n.02'),
 Synset('record.n.05'),
 Synset('script.n.01'),
 Synset('ledger.n.01'),
 Synset('book.n.06'),
 Synset('book.n.07'),
 Synset('koran.n.01'),
 Synset('bible.n.01'),
 Synset('book.n.10'),
 Synset('book.n.11'),
 Synset('book.v.01'),
 Synset('reserve.v.04'),
 Synset('book.v.03'),
 Synset('book.v.04')]

### to find meaning of book

In [52]:
syn[0].definition()

'a written work or composition that has been published (printed on pages bound together)'

### printing synonyms

In [53]:
synonyms=[]
for syn in wordnet.synsets('car'):
  for lemma in syn.lemmas():
    synonyms.append(lemma.name())
print(synonyms)

['car', 'auto', 'automobile', 'machine', 'motorcar', 'car', 'railcar', 'railway_car', 'railroad_car', 'car', 'gondola', 'car', 'elevator_car', 'cable_car', 'car']


### printing antonyms

In [55]:
antonyms=[]
for ant in wordnet.synsets('good'):
  for lemma in ant.lemmas():
    if lemma.antonyms():
      antonyms.append(lemma.antonyms()[0].name())
print(antonyms)

['evil', 'evilness', 'bad', 'badness', 'bad', 'evil', 'ill']
