In [9]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

In [20]:
ex = "I am a medical professional running a group of hospitals in India with clients all around the world. As a business venture we a team of 670 doctors have decided to come together on an digital platform where the clients can interact with doctors based on their need and auto fix appointment to the nearest branch without the need of directly contacting the branch. I expect this to be solved by hgs."

In [11]:
def preprocess(sent):
    sent = nltk.word_tokenize(sent)
    sent = nltk.pos_tag(sent)
    return sent

In [12]:
sent = preprocess(ex)
#sent

In [13]:
#  noun phrase chunking to identify named entities using a 
# regular expression consisting of rules that indicate how sentences should be chunked
pattern = 'NP: {<DT>?<JJ>*<NN>}'

In [14]:
cp = nltk.RegexpParser(pattern)
cs = cp.parse(sent)
print(cs)

(S
  I/PRP
  am/VBP
  (NP a/DT medical/JJ professional/NN)
  running/VBG
  (NP a/DT group/NN)
  of/IN
  hospitals/NNS
  in/IN
  India/NNP
  with/IN
  clients/NNS
  all/DT
  around/IN
  (NP the/DT world/NN)
  ./.
  As/IN
  (NP a/DT business/NN)
  (NP venture/NN)
  we/PRP
  (NP a/DT team/NN)
  of/IN
  670/CD
  doctors/NNS
  have/VBP
  decided/VBN
  to/TO
  come/VB
  together/RB
  on/IN
  (NP an/DT digital/JJ platform/NN)
  where/WRB
  the/DT
  clients/NNS
  can/MD
  interact/VB
  with/IN
  doctors/NNS
  based/VBN
  on/IN
  their/PRP$
  (NP need/NN)
  and/CC
  (NP auto/NN)
  fix/VBP
  (NP appointment/NN)
  to/TO
  the/DT
  nearest/JJS
  (NP branch/NN)
  without/IN
  (NP the/DT need/NN)
  of/IN
  directly/RB
  contacting/VBG
  (NP the/DT branch/NN)
  ./.
  I/PRP
  expect/VBP
  this/DT
  to/TO
  be/VB
  solved/VBN
  by/IN
  (NP hgs/NN)
  ./.)


In [15]:
from nltk.chunk import conlltags2tree, tree2conlltags
from pprint import pprint
iob_tagged = tree2conlltags(cs)
pprint(iob_tagged)

[('I', 'PRP', 'O'),
 ('am', 'VBP', 'O'),
 ('a', 'DT', 'B-NP'),
 ('medical', 'JJ', 'I-NP'),
 ('professional', 'NN', 'I-NP'),
 ('running', 'VBG', 'O'),
 ('a', 'DT', 'B-NP'),
 ('group', 'NN', 'I-NP'),
 ('of', 'IN', 'O'),
 ('hospitals', 'NNS', 'O'),
 ('in', 'IN', 'O'),
 ('India', 'NNP', 'O'),
 ('with', 'IN', 'O'),
 ('clients', 'NNS', 'O'),
 ('all', 'DT', 'O'),
 ('around', 'IN', 'O'),
 ('the', 'DT', 'B-NP'),
 ('world', 'NN', 'I-NP'),
 ('.', '.', 'O'),
 ('As', 'IN', 'O'),
 ('a', 'DT', 'B-NP'),
 ('business', 'NN', 'I-NP'),
 ('venture', 'NN', 'B-NP'),
 ('we', 'PRP', 'O'),
 ('a', 'DT', 'B-NP'),
 ('team', 'NN', 'I-NP'),
 ('of', 'IN', 'O'),
 ('670', 'CD', 'O'),
 ('doctors', 'NNS', 'O'),
 ('have', 'VBP', 'O'),
 ('decided', 'VBN', 'O'),
 ('to', 'TO', 'O'),
 ('come', 'VB', 'O'),
 ('together', 'RB', 'O'),
 ('on', 'IN', 'O'),
 ('an', 'DT', 'B-NP'),
 ('digital', 'JJ', 'I-NP'),
 ('platform', 'NN', 'I-NP'),
 ('where', 'WRB', 'O'),
 ('the', 'DT', 'O'),
 ('clients', 'NNS', 'O'),
 ('can', 'MD', 'O'),
 ('int

In [17]:
ne_tree = nltk.ne_chunk(pos_tag(word_tokenize(ex)))
print(ne_tree)

(S
  I/PRP
  am/VBP
  a/DT
  medical/JJ
  professional/NN
  running/VBG
  a/DT
  group/NN
  of/IN
  hospitals/NNS
  in/IN
  (GPE India/NNP)
  with/IN
  clients/NNS
  all/DT
  around/IN
  the/DT
  world/NN
  ./.
  As/IN
  a/DT
  business/NN
  venture/NN
  we/PRP
  a/DT
  team/NN
  of/IN
  670/CD
  doctors/NNS
  have/VBP
  decided/VBN
  to/TO
  come/VB
  together/RB
  on/IN
  an/DT
  digital/JJ
  platform/NN
  where/WRB
  the/DT
  clients/NNS
  can/MD
  interact/VB
  with/IN
  doctors/NNS
  based/VBN
  on/IN
  their/PRP$
  need/NN
  and/CC
  auto/NN
  fix/VBP
  appointment/NN
  to/TO
  the/DT
  nearest/JJS
  branch/NN
  without/IN
  the/DT
  need/NN
  of/IN
  directly/RB
  contacting/VBG
  the/DT
  branch/NN
  ./.
  I/PRP
  expect/VBP
  this/DT
  to/TO
  be/VB
  solved/VBN
  by/IN
  hgs/NN
  ./.)


In [None]:
import spacy
from spacy.lang.en.examples import sentences

nlp = spacy.load('en_core_web_sm')
doc = nlp(sentences[0])
print(doc.text)
for token in doc:
    print(token.text, token.pos_, token.dep_)

In [18]:
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm
nlp = en_core_web_sm.load()


In [22]:
#doc = nlp('I am a medical professional running a group of hospitals in India with clients all around the world. As a business venture we a team of 670 doctors have decided to come together on an digital platform where the clients can interact with doctors based on their need and auto fix appointment to the nearest branch without the need of directly contacting the branch. I expect this to be solved by hgs.')
doc = nlp('I am the managing director of the Ram group of restaurants in India, I would like to digitalize my business by integrating all the braches under a single database which includes the bill details, stock purchase,etc. of all the branches. So I expect an optimum solution from hgs.')
for ent in doc.ents:
    print(ent.text, ent.label_)

Ram PERSON
India GPE


In [23]:
# Tokenize the article into sentences: sentences
sentences = nltk.sent_tokenize(ex)

# Tokenize each sentence into words: token_sentences
token_sentences = [word_tokenize(sent) for sent in sentences]

# Tag each tokenized sentence into parts of speech: pos_sentences
pos_sentences = [nltk.pos_tag(sent) for sent in token_sentences] 

# Create the named entity chunks: chunked_sentences
chunked_sentences = nltk.ne_chunk_sents(pos_sentences, binary = True)

# Test for stems of the tree with 'NE' tags
for sent in chunked_sentences:
    for chunk in sent:
        if hasattr(chunk, "label") and chunk.label() == "NE":
            print(chunk)


(NE India/NNP)


In [5]:
items = [x.text for x in doc.ents]

In [6]:
items

['India', '670']