In [4]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "Hello, world! This is SpaCy."
doc = nlp(text)

for token in doc:
    print(token.text)

Hello
,
world
!
This
is
SpaCy
.


In [5]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "SpaCy is an amazing NLP library."
doc = nlp(text)

for token in doc:
    print(f"{token.text}: {token.pos_}")

SpaCy: PROPN
is: AUX
an: DET
amazing: ADJ
NLP: PROPN
library: NOUN
.: PUNCT


In [6]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "Apple is looking at buying U.K. startup for $1 billion."
doc = nlp(text)

for ent in doc.ents:
    print(f"{ent.text}: {ent.label_}")

Apple: ORG
U.K.: GPE
$1 billion: MONEY


In [7]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "The quick brown fox jumps over the lazy dog."
doc = nlp(text)

for token in doc:
    print(f"{token.text} --> {token.dep_} --> {token.head.text}")

The --> det --> fox
quick --> amod --> fox
brown --> amod --> fox
fox --> nsubj --> jumps
jumps --> ROOT --> jumps
over --> prep --> jumps
the --> det --> dog
lazy --> amod --> dog
dog --> pobj --> over
. --> punct --> jumps


In [8]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "The cats are running."
doc = nlp(text)

for token in doc:
    print(f"{token.text}: {token.lemma_}")

The: the
cats: cat
are: be
running: run
.: .


In [9]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "This is the first sentence. This is the second sentence."
doc = nlp(text)

for sent in doc.sents:
    print(sent.text)

This is the first sentence.
This is the second sentence.


In [10]:
import spacy

nlp = spacy.load("en_core_web_sm")
doc1 = nlp("I love programming.")
doc2 = nlp("I enjoy coding.")

print(f"Similarity: {doc1.similarity(doc2)}")

Similarity: 0.8356899784596395


  print(f"Similarity: {doc1.similarity(doc2)}")


In [12]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "This is a simple sentence."
doc = nlp(text)

for token in doc:
    if not token.is_stop:
        print(token.text)

simple
sentence
.


In [13]:
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
text = "The quick brown fox jumps over the lazy dog."
doc = nlp(text)

displacy.render(doc, style="dep")

In [14]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "SpaCy is fast and efficient."
doc = nlp(text)

for token in doc:
    print(f"{token.text}: {token.is_alpha}, {token.is_stop}, {token.shape_}")

SpaCy: True, False, XxxXx
is: True, True, xx
fast: True, False, xxxx
and: True, True, xxx
efficient: True, False, xxxx
.: False, False, .


In [17]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "I love programming in Python."
doc = nlp(text)



In [18]:
import re

text = "Visit us at https://www.example.com and http://example.org for more info."

# Regular expression pattern for URLs
url_pattern = r'(https?://[^\s]+)'
urls = re.findall(url_pattern, text)

print("Extracted URLs:", urls)

Extracted URLs: ['https://www.example.com', 'http://example.org']


In [19]:
import re

text = """
Check out these links:
1. https://www.example.com
2. http://example.org
3. Not a URL: example.com
"""

# Extract URLs
urls = re.findall(r'(https?://[^\s]+)', text)

print("Extracted URLs:", urls)

Extracted URLs: ['https://www.example.com', 'http://example.org']
