<a href="https://colab.research.google.com/github/NjoodJ/Arabic-Text-to-Speech-using-gtts/blob/main/en_core_web_sm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import spacy
import pandas as pd
from spacy import displacy
nlp = spacy.load('en_core_web_sm')
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [None]:
text = "Apple Inc. was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne. on April 1, 1976, in California."
doc = nlp(text)

# 1. Tokenization

In [None]:
print([(token) for token in doc])

[Apple, Inc., was, founded, by, Steve, Jobs, ,, Steve, Wozniak, ,, and, Ronald, Wayne, ., on, April, 1, ,, 1976, ,, in, California, .]


# 2. Named Entity Recognition

In [None]:
for entity in doc.ents:
    print(entity.text, entity.label_)

Apple Inc. ORG
Steve Jobs PERSON
Steve Wozniak PERSON
Ronald Wayne PERSON
April 1, 1976 DATE
California GPE


In [None]:
displacy.render(doc,style='ent')

# 3. Part-of-speech (POS) Tagging

In [None]:
print([(w.text, w.pos_) for w in doc])

[('Apple', 'PROPN'), ('Inc.', 'PROPN'), ('was', 'AUX'), ('founded', 'VERB'), ('by', 'ADP'), ('Steve', 'PROPN'), ('Jobs', 'PROPN'), (',', 'PUNCT'), ('Steve', 'PROPN'), ('Wozniak', 'PROPN'), (',', 'PUNCT'), ('and', 'CCONJ'), ('Ronald', 'PROPN'), ('Wayne', 'PROPN'), ('.', 'PUNCT'), ('on', 'ADP'), ('April', 'PROPN'), ('1', 'NUM'), (',', 'PUNCT'), ('1976', 'NUM'), (',', 'PUNCT'), ('in', 'ADP'), ('California', 'PROPN'), ('.', 'PUNCT')]


# 4.	Dependency Parsing:

In [None]:
for token in doc:
    print(token.text, token.head)
for token in doc:
    if (token.is_sent_start):
        print(token.text, token.is_sent_start)
displacy.render(doc, style="dep")

Apple Inc.
Inc. founded
was founded
founded founded
by founded
Steve Jobs
Jobs by
, Jobs
Steve Wozniak
Wozniak Jobs
, Wozniak
and Wozniak
Ronald Wayne
Wayne Wozniak
. founded
on on
April on
1 April
, April
1976 April
, April
in April
California in
. on
Apple True
on True


# 5.	Lemmatization:

In [None]:
lemmas = [token.lemma_ for token in doc]
print(lemmas)

['Apple', 'Inc.', 'be', 'found', 'by', 'Steve', 'Jobs', ',', 'Steve', 'Wozniak', ',', 'and', 'Ronald', 'Wayne', '.', 'on', 'April', '1', ',', '1976', ',', 'in', 'California', '.']


# 6.	Sentiment segmentation

In [None]:
for sentence in doc.sents:
    print(sentence.text)

Apple Inc. was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne.
on April 1, 1976, in California.
