**Displacy Module**

In [None]:
# Install the spaCy library, a powerful package for natural language processing.
!pip install spacy

# Download the English small core model, which includes data and algorithms necessary for basic NLP tasks.
!python -m spacy download en_core_web_sm

import spacy

# Import the 'displacy' module from spaCy, used for visualizing entities, dependencies, etc.
from spacy import displacy

In [None]:
# Summary:
# - Model (e.g., en_core_web_sm): A pre-trained dataset with algorithms for specific language processing tasks.
# - Module (e.g., displacy from spaCy): A file with Python definitions for specific functionalities, such as visualizing NLP results.
# - Library (e.g., spaCy): A collection of modules designed to provide tools for a broad set of tasks in a particular domain.


In [17]:
nlp = spacy.load('en_core_web_sm')

In [35]:
doc = nlp("Ocer the last quarter Apple sold nearly 20 thousad iPods for a profit of $6 million.")

In [36]:
displacy.render(doc, style='dep',  options={'distance':151})

# Render the dependency parse of the document
# 'style' is set to 'dep' for a dependency tree visualization
# 'options' controls visual aspects like distance between words

In [37]:
displacy.render(doc, style='ent',  options={'distance':151})

In [None]:
doc = nlp("this is a sentenc.")
displacy.serve(doc, style='dep')

**Stemming**

In [None]:

# Stemming is the process of reducing a word to its root form,
# which is the base form of the word without any prefixes or suffixes.
# For example, the stem of the word "running" is "run".

# **Porter's Algorithm**
# Porter's Algorithm is a widely used algorithm for stemming English words.
# It consists of 5 steps:
# 1. Remove plurals and -ed or -ing suffixes.
# 2. Turn terminal y to i when there is another vowel in the stem.
# 3. Map double suffixes to single ones: -ization, -ational, etc.
# 4. Deal with -ic-, -full, -ness, etc.
# 5. Take off -ant, -ence, etc.


In [39]:
import nltk

In [40]:
from nltk.stem.porter import PorterStemmer

In [41]:
p_stemmer = PorterStemmer()

In [45]:
words =['run', 'runner', 'ran', 'runs', 'easily', 'quickly']

In [46]:
for word in words:
  print(word + '--->' + p_stemmer.stem(word))

run--->run
runner--->runner
ran--->ran
runs--->run
easily--->easili
quickly--->quickli


In [None]:

# **Snowball Stemmer**
# The Snowball Stemmer, also known as the Porter2 stemming algorithm,
# is an improvement over the original Porter Stemmer.
# It provides better accuracy and supports stemming for multiple languages.
# Generally, the Snowball Stemmer is considered to be more effective than the Porter Stemmer.


In [47]:
from nltk.stem.snowball import SnowballStemmer

In [49]:
s_stemmer = SnowballStemmer(language='english')

In [51]:
for word in words:
  print(f'{word} ---> {s_stemmer.stem(word)}')

run ---> run
runner ---> runner
ran ---> ran
runs ---> run
easily ---> easili
quickly ---> quick


In [52]:
words = ['generous', 'generation', 'generously', 'generate']

In [54]:
for word in words:
  print(f'{word} ---> {p_stemmer.stem(word)}')

generous ---> gener
generation ---> gener
generously ---> gener
generate ---> gener


In [53]:
for word in words:
  print(f'{word} ---> {s_stemmer.stem(word)}')

generous ---> generous
generation ---> generat
generously ---> generous
generate ---> generat


In [None]:

# Stemming is used to reduce words to their root form, which can be helpful in various applications, including:

# 1. Information Retrieval (Search Engines): Improves search accuracy by matching search queries with documents containing different forms of the same word.

# 2. Text Analysis and NLP: Simplifies text data for tasks like sentiment analysis, topic modeling, and machine translation by grouping words with similar meanings.

# 3. Text Classification: Enhances classification models by reducing feature dimensionality and focusing on the core meaning of words.
