In [None]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from spacy.matcher import PhraseMatcher
from spacy.language import Language
from spacy.tokenizer import Tokenizer
from spacy import displacy
import re

nlp = spacy.load("en_core_web_sm")

# Exercise 1
print("Exercise 1:")

doc = nlp("Taylor Swift performed in Los Angeles on March 3rd, 2023.")

for entity in doc.ents:
    print(entity, entity.label_)

# Exercise 2
print()
print("Exercise 2:")

def print_pesron_entity(s):
    doc = nlp(s)
    
    for entity in doc.ents:
        if entity.label_ == 'PERSON':
            print(entity) 
        
print_pesron_entity("Serena Williams had dinner with Tom Hanks in Paris.")

# Exercise 3
print()
print("Exercise 3:")

doc = nlp("She was running and had run 5 kilometers by 7am.")

for token in doc:
    print(token.text, "→", token.lemma_)

# Exercise 4
print()
print("Exercise 4:")

def print_not_stop_words(s):
    doc = nlp(s)
    not_stop_words = []
    
    for token in doc:
        if token.text.lower() not in nlp.Defaults.stop_words:
            not_stop_words.append(token.text) 

    print(not_stop_words)
        
print_not_stop_words("This is an example sentence with some stop words.")

# Exercise 5
print()
print("Exercise 5:")

nlp.Defaults.stop_words.add('powerful')
nlp.vocab['powerful'].is_stop = True

doc = nlp("SpaCy is awesome and powerful.")
for token in doc:
    if token.text.lower() in nlp.Defaults.stop_words:
        print(token.text.lower(), "is a Stop Word") 

# Exercise 6
print()
print("Exercise 6:")

matcher = PhraseMatcher(nlp.vocab)

patterns = [nlp("Artificial Intelligence"), nlp("ArtificialIntelligence"), nlp("artificial intelligence")]
matcher.add("ArtificialIntelligence", patterns)

doc = nlp("Artificial Intelligence is the future. I study artificial intelligence.")
matches = matcher(doc)

for match_id, start, end in matches:
    print(doc[start:end].text)

# Exercise 7
print()
print("Exercise 7:")

def pos_tag(s):
    doc = nlp(s)
    for token in doc:
        print(f'{token.text:3} {token.pos_:5} {token.tag_:3} {spacy.explain(token.tag_)}')

pos_tag("The cat sat on the mat.")

# Exercise 8
print()
print("Exercise 8:")

infixes = nlp.Defaults.infixes + [r"[\^]"]
infix_re = re.compile("|".join(infixes))
nlp.tokenizer = Tokenizer(nlp.vocab, infix_finditer=infix_re.finditer)

@Language.component("set_custom_boundaries")
def set_custom_boundaries(doc):
    doc[0].is_sent_start = True  
    for token in doc[:-1]:
        if token.text == "^":
            doc[token.i + 1].is_sent_start = True
    return doc

nlp.add_pipe("set_custom_boundaries", before="parser")

doc = nlp("SpaCy is great^It helps with NLP tasks^Really useful.")

for sent in doc.sents:
    print("Sentence:", sent)

# Exercise 9
print()
print("Exercise 9:")

doc = nlp(input("Please enter a sentence: "))

for token in doc:
    print(f'{token.text:10} {token.pos_:10} {token.tag_:3}')

print()
displacy.render(doc, style="ent", jupyter=True)
print()
displacy.render(doc, style="dep", jupyter=True)

Exercise 1:
Taylor Swift PERSON
Los Angeles GPE
March 3rd, 2023 DATE

Exercise 2:
Serena Williams
Tom Hanks

Exercise 3:
She → she
was → be
running → run
and → and
had → have
run → run
5 → 5
kilometers → kilometer
by → by
7 → 7
am → am
. → .

Exercise 4:
['example', 'sentence', 'stop', 'words', '.']

Exercise 5:
is is a Stop Word
and is a Stop Word
powerful is a Stop Word

Exercise 6:
Artificial Intelligence
artificial intelligence

Exercise 7:
The DET   DT  determiner
cat NOUN  NN  noun, singular or mass
sat VERB  VBD verb, past tense
on  ADP   IN  conjunction, subordinating or preposition
the DET   DT  determiner
mat NOUN  NN  noun, singular or mass
.   PUNCT .   punctuation mark, sentence closer

Exercise 8:
Sentence: SpaCy is great^
Sentence: It helps with NLP tasks^
Sentence: Really useful.

Exercise 9:
