In [None]:
# Import spaCy and load the pre-trained English model
import spacy

nlp = spacy.load("en_core_web_sm")

In [None]:
# Define a simple string with punctuation and contractions
my_string = '"We\'re from TX. USA!"'
print(my_string)

"We're from TX. USA!"


In [None]:
# Process the string to create a Doc object
doc = nlp(my_string)

In [None]:
# Iterate through tokens in the simple Doc object
for token in doc:
    print(token)

"
We
're
from
TX
.
USA
!
"


In [None]:
# Process a more complex string with URLs, emails, currency, and special characters
doc2 = nlp(
    "Dr. Smith's research (published in 2023) shows e-commerce reached $2.5 billion! Visit https://example.com or email: support@example.com. The CEO said, \"We're expanding into AI, blockchain, and IoT—it's cutting-edge.\""
)

In [None]:
# Iterate through tokens in the complex Doc object
for token in doc2:
    print(token)

Dr.
Smith
's
research
(
published
in
2023
)
shows
e
-
commerce
reached
$
2.5
billion
!
Visit
https://example.com
or
email
:
support@example.com
.
The
CEO
said
,
"
We
're
expanding
into
AI
,
blockchain
,
and
IoT
—
it
's
cutting
-
edge
.
"


In [None]:
# Process another complex string and print tokens in a single line with separators
doc3 = nlp("Apple Inc.'s Q3 2023 earnings—up 15% - were announced on Oct. 30th.")

# Iterate through tokens and print them on one line

for token in doc3:
    print(token, end=" | ")

Apple | Inc. | 's | Q3 | 2023 | earnings | — | up | 15 | % | - | were | announced | on | Oct. | 30th | . | 

In [None]:
# Extract and display named entities (ORG, DATE, PERCENT, etc.) from doc3
for entity in doc3.ents:
    print(f"Entity: {entity}, Label: {entity.label_}")
    print(f"Explain: {str(spacy.explain(entity.label_))}\n")

Entity: Apple Inc.'s, Label: ORG
Explain: Companies, agencies, institutions, etc.

Entity: Q3 2023, Label: DATE
Explain: Absolute or relative dates or periods

Entity: 15%, Label: PERCENT
Explain: Percentage, including "%"

Entity: Oct. 30th, Label: DATE
Explain: Absolute or relative dates or periods



In [None]:
# Extract and display noun chunks (phrases with a noun head) from doc3
for chunk in doc3.noun_chunks:
    print(chunk)

Apple Inc.'s Q3 2023 earnings
Oct. 30th


In [None]:
# Import displacy for visualizing dependency and entity relationships
from spacy import displacy

In [None]:
# Create a Doc object from a short text and visualize dependency relationships
doc4 = nlp("Tesla's stock rose 12% today!")
displacy.render(doc4, style="dep", jupyter=True, options={"distance": 100})

In [None]:
# Visualize named entities in doc4
displacy.render(doc4, style="ent", jupyter=True, options={"distance": 100})