## Importing library

In [6]:
import spacy
nlp = spacy.load('en_core_web_sm')

## Creating doc type string and a simple string

In [7]:
doc = nlp(u'Tesla is looking to buy a U.S. startup for $6 million.')

simple_text = 'Tesla is looking to buy a U.S. startup for $6 million.'

## Difference between simple and doc type tokens

In [9]:
for token in doc:
    print(token.text)

print("======================================")

for text in simple_text:
    print(text)

    print("======================================")



Tesla
is
looking
to
buy
a
U.S.
startup
for
$
6
million
.
T
e
s
l
a
 
i
s
 
l
o
o
k
i
n
g
 
t
o
 
b
u
y
 
a
 
U
.
S
.
 
s
t
a
r
t
u
p
 
f
o
r
 
$
6
 
m
i
l
l
i
o
n
.


### We can split text by space but can't leverage to spacy inbuilt functions like POS, NER tagging

In [10]:
for text in simple_text.split():
    print(text)

Tesla
is
looking
to
buy
a
U.S.
startup
for
$6
million.


In [14]:
for token in doc:
    print(f"{token.text} =  {token.pos_} = {token.dep_}")

Tesla =  PROPN = nsubj
is =  AUX = aux
looking =  VERB = ROOT
to =  PART = aux
buy =  VERB = xcomp
a =  DET = det
U.S. =  PROPN = compound
startup =  NOUN = dobj
for =  ADP = prep
$ =  SYM = quantmod
6 =  NUM = compound
million =  NUM = pobj
. =  PUNCT = punct


## Basic pipline by spacy library

In [15]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x18abe99f110>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x18abe99f0b0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x18abeb2eff0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x18abf0f9f90>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x18abf104a50>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x18abeb2f920>)]

In [16]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [17]:
doc2 = nlp(u"Tesla isn't looking for startups anymore")

In [18]:
for token in doc2:
    print(f"{token.text} =  {token.pos_} = {token.dep_}")

Tesla =  PROPN = nsubj
is =  AUX = aux
n't =  PART = neg
looking =  VERB = ROOT
for =  ADP = prep
startups =  NOUN = pobj
anymore =  ADV = advmod


## Splitting sentence in given doc string

In [36]:
doc3 = nlp("hi My name is Pawan. Hi I am currently working as senior consultant,I have 7 years of experience. I know ML and python")

In [37]:
for sentence in doc3.sents:
    print(sentence)

hi My name is Pawan.
Hi I am currently working as senior consultant,I have 7 years of experience.
I know ML and python


In [38]:
doc3[6]

Hi

### Checking if the input text is starting by "Hi" and this not case sensitive

In [40]:
doc3[6].is_sent_start

True