In [None]:
!pip install spacy

In [None]:
!python -m spacy download en


In [3]:
from spacy.lang.en import English

In [4]:
nlp = English()

In [5]:
text = """When learning data science, you shouldn't get discouraged!
Challenges and setbacks aren't failures, they're just part of the journey. You've got this!"""

In [6]:
my_doc = nlp(text)

#### Word Tokenization

In [7]:
token_list = []
for token in my_doc:
    token_list.append(token.text)
print(token_list)    

['When', 'learning', 'data', 'science', ',', 'you', 'should', "n't", 'get', 'discouraged', '!', '\n', 'Challenges', 'and', 'setbacks', 'are', "n't", 'failures', ',', 'they', "'re", 'just', 'part', 'of', 'the', 'journey', '.', 'You', "'ve", 'got', 'this', '!']


#### Sentence Tokenization

In [8]:
sbd = nlp.create_pipe('sentencizer')

In [9]:
nlp.add_pipe(sbd)

In [10]:
doc2 = nlp(text)

In [11]:
sents_list = []

In [12]:
for sent in doc2.sents:    
    sents_list.append(sent.text)
 

print(sents_list)

#### Removing Stopwords 

In [13]:
from spacy.lang.en.stop_words import STOP_WORDS

In [14]:
filtered_set = []

In [15]:
doc3 = nlp(text)

In [16]:
for word in doc3:
    if word.is_stop==False:
        filtered_set.append(word)

In [17]:
print("Filtered Sentence:",filtered_set)

Filtered Sentence: [learning, data, science, ,, discouraged, !, 
, Challenges, setbacks, failures, ,, journey, ., got, !]


#### Lemmatization

In [18]:
lem = nlp("run runs running runner")

In [20]:
for word in lem:
    print(word.text,word.lemma_)

run run
runs runs
running running
runner runner


#### POS TAGGING

In [21]:
import en_core_web_sm

In [22]:
nlp = en_core_web_sm.load()

In [23]:
docs4 = nlp(u"All is well that ends well.")

In [24]:
for words in docs4:
    print(words.text,words.pos_)

All DET
is AUX
well ADJ
that DET
ends VERB
well ADV
. PUNCT
