# Import Libraries

In [6]:
import spacy
nlp = spacy.load('en_core_web_sm')

import nltk
from nltk.tokenize import sent_tokenize, word_tokenize

# Read the text

In [7]:
TEXT = "Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.')"
print(sent_tokenize(TEXT))

["Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.')"]


# Tokenization

In [3]:
## converting into the token using the NLTK
token=word_tokenize(TEXT)
print(token)

## converting into token using the spacy library
print('\n')

token_2=nlp(TEXT)
for i in token_2:
    print(i.text,end=' | ')

['Over', 'the', 'last', 'quarter', 'Apple', 'sold', 'nearly', '20', 'thousand', 'iPods', 'for', 'a', 'profit', 'of', '$', '6', 'million', '.', "'", ')']


Over | the | last | quarter | Apple | sold | nearly | 20 | thousand | iPods | for | a | profit | of | $ | 6 | million | . | ' | ) | 

In [4]:
print(token[4])

print(token_2[4])

Apple
Apple


In [16]:
# Assuming you have already processed your text with spaCy and have a token object called token_2.

# Iterate through the named entities in token_2.
for ent in token_2.ents:
    # Print the text of the named entity.
    print(ent.text + ' - ' + ent.label_ + ' - ' + str(spacy.explain(ent.label_)))

the last quarter - DATE - Absolute or relative dates or periods
Apple - ORG - Companies, agencies, institutions, etc.
nearly 20 thousand - CARDINAL - Numerals that do not fall under another type
iPods - PRODUCT - Objects, vehicles, foods, etc. (not services)
$6 million - MONEY - Monetary values, including unit


This above code will go through each named entity (such as a person's name, location, organization, etc.) detected in token_2, and for each entity, it will print the following information:

ent.text: The text of the named entity.
ent.label_: The label or category of the named entity (e.g., "PERSON" for a person's name, "GPE" for a geopolitical entity).
str(spacy.explain(ent.label_)): A human-readable explanation of the entity label, which provides more context about what the label represents.
This code is useful for understanding and categorizing named entities in your text data. The spacy.explain() function is particularly helpful in providing descriptive explanations for entity labels.

In [17]:
# we have already processed your text with spaCy and have a token object called token_2.

# Iterate through the noun chunks in token_2.
for chunk in token_2.noun_chunks:
    # Print the text of the noun chunk.
    print(chunk.text)

the last quarter
Apple
nearly 20 thousand iPods
a profit


Above code will loop through each noun chunk in token_2 and print the text of each noun chunk,
effectively displaying the extracted noun phrases from the processed text. 
Noun chunks are commonly used in various NLP tasks, such as text analysis and information extraction, as they help identify important noun phrases in sentences.

In [22]:
from spacy import displacy

# Render the syntactic dependency tree of token_2
displacy.render(token_2, style='dep', jupyter=True, options={'distance':120})

This code uses the displacy.render() function to render the dependency tree. The style='dep' argument specifies that you want to visualize the syntactic dependencies, and the jupyter=True argument indicates that you want to display the visualization within a Jupyter Notebook.

In [8]:
displacy.render(token_2, style='ent', jupyter=True)

In [9]:
TEXT

"Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.')"

In [2]:
from nltk.tokenize import wordpunct_tokenize

In [8]:
wordpunct_tokenize(TEXT)

['Over',
 'the',
 'last',
 'quarter',
 'Apple',
 'sold',
 'nearly',
 '20',
 'thousand',
 'iPods',
 'for',
 'a',
 'profit',
 'of',
 '$',
 '6',
 'million',
 ".')"]