In [3]:
import en_core_web_sm
nlp = en_core_web_sm.load()
doc = nlp(u"the Titanic is a movie.")
print([(w.text, w.pos_) for w in doc])

[('the', 'DET'), ('Titanic', 'PROPN'), ('is', 'VERB'), ('a', 'DET'), ('movie', 'NOUN'), ('.', 'PUNCT')]


In [9]:
from __future__ import unicode_literals, print_function

import plac
import random
from pathlib import Path
import spacy
from spacy.util import minibatch, compounding


TRAIN_DATA = [("I would have to say captain marvel's plot line was even better than iron man.", {'entities': [(20, 34, 'moviename')]}), 
              ("I would have to say captain marvel's plot line was even better than iron man.", {'entities': [(68, 76, 'moviename')]}), 
              ('no one deney that The  iron man  is better than captain marvel in terms of plots', {'entities': [(23, 31, 'moviename')]}), 
              ('no one deney that The  iron man  is better than captain marvel in terms of plots', {'entities': [(48, 62, 'moviename')]}), 
              ("i realized  iron man 's script was way better than captain marvel.", {'entities': [(12, 20, 'moviename')]}), 
              ("i realized  iron man 's script was way better than captain marvel.", {'entities': [(12, 20, 'moviename')]}), 
              ('I would say  iron man was less boring compared with captain marvel.', {'entities': [(52, 66, 'moviename')]}), 
              ('I would say  iron man was less boring compared with captain marvel.', {'entities': [(13, 21, 'moviename')]}), 
              ('i found that the captain marvel had more interaction with audience than  iron man.', {'entities': [(17, 31, 'moviename')]}), 
              ('i found that the captain marvel had more interaction with audience than  iron man.', {'entities': [(73, 81, 'moviename')]}), 
              ('the script of the first avenger was more intriguing than the thor', {'entities': [(14, 31, 'moviename')]}), 
              ('the script of the first avenger was more intriguing than the thor', {'entities': [(61, 65, 'moviename')]}), 
              ("most people don't understand the thor  is more uproarious than the first avenger ", {'entities': [(33, 37, 'moviename')]}), 
              ("most people don't understand the thor  is more uproarious than the first avenger ", {'entities': [(63, 81, 'moviename')]}), 
              ('the first avenger has a more tendious storires than thor, superheros are a waste of time always.', {'entities': [(0, 17, 'moviename')]}), 
              ('the first avenger has a more tendious storires than thor, superheros are a waste of time always.', {'entities': [(52, 56, 'moviename')]}), 
              ('thor was second-rate, the first avenger  was was better than captain american.', {'entities': [(22, 39, 'moviename')]}), 
              ('thor was second-rate, the first avenger  was was better than captain american.', {'entities': [(61, 77, 'moviename')]}), 
              ('the ending of the first avenger  is somewhat more surprising than the thor.', {'entities': [(14, 31, 'moviename')]}), 
              ('the ending of the first avenger  is somewhat more surprising than the thor.', {'entities': [(70, 74, 'moviename')]}), 
              ('the incredible hulk  made me more absorbing into the story while ant-man sucks.', {'entities': [(0, 20, 'moviename')]}), 
              ('the incredible hulk  made me more absorbing into the story while ant-man sucks.', {'entities': [(65, 72, 'moviename')]}), 
              ('the ant-man did not interact with us and the incredible hulk  did better than the batman.', {'entities': [(4, 11, 'moviename')]}), 
              ('the ant-man did not interact with us and the incredible hulk  did better than the batman.', {'entities': [(41, 60, 'moviename')]}), 
              ('the quality of production of the incredible hulk was way better than ant-man. ', {'entities': [(29, 48, 'moviename')]}), 
              ('the quality of production of the incredible hulk was way better than ant-man. ', {'entities': [(69, 76, 'moviename')]}), 
              ('I loved the incredible hulk but ant-man on the other hand is utter trash.', {'entities': [(32, 39, 'moviename')]}), 
              ('I loved the incredible hulk but ant-man on the other hand is utter trash.', {'entities': [(8, 27, 'moviename')]}), 
              ('I think the incredible hulk  is even better than ant-man for a connection with the characters.', {'entities': [(8, 28, 'moviename')]}), 
              ('I think the incredible hulk  is even better than ant-man for a connection with the characters.', {'entities': [(49, 56, 'moviename')]}), 
              ('infinity wars set up an amazing open end and endgame ruins 10 years of Marvel films.', {'entities': [(0, 13, 'moviename')]}), 
              ('infinity wars set up an amazing open end and endgame ruins 10 years of Marvel films.', {'entities': [(45, 52, 'moviename')]}),
              ('I did not think that the ending of endgame was lacklustre, or less exciting than infinity war.', {'entities': [(35, 42, 'moviename')]}), 
              ('I did not think that the ending of endgame was lacklustre, or less exciting than infinity war.', {'entities': [(81, 93, 'moviename')]}), 
              ('Personally, I would have to say endgame was even better than infinity war, most likely due to the fact that it is the grand finale of all these movies.', {'entities': [(32, 39, 'moviename')]}), 
              ('Personally, I would have to say endgame was even better than infinity war, most likely due to the fact that it is the grand finale of all these movies.', {'entities': [(61, 73, 'moviename')]}), 
              ('The infinity war is better than endgame in terms of plots.', {'entities': [(4, 16, 'moviename')]}), 
              ('The infinity war is better than endgame in terms of plots.', {'entities': [(32, 39, 'moviename')]}), 
              ('endgame is a disappointment when compared to infinity war but still a fun watch for fans.', {'entities': [(0, 7, 'moviename')]}), 
              ('endgame is a disappointment when compared to infinity war but still a fun watch for fans.', {'entities': [(45, 57, 'moviename')]}), 
              ('endgame is not on par with infinity war. ', {'entities': [(0, 7, 'moviename')]}), 
              ('endgame is not on par with infinity war. ', {'entities': [(27, 39, 'moviename')]}), 
              ('The entry of Captain America with the background sound to save Vision in Infinity War was again epic but in Endgame it is not at all exciting.', {'entities': [(13, 28, 'moviename')]}), 
              ('The entry of Captain America with the background sound to save Vision in Infinity War was again epic but in Endgame it is not at all exciting.', {'entities': [(73, 85, 'moviename')]}), 
              ('The entry of Captain America with the background sound to save Vision in Infinity War was again epic but in Endgame it is not at all exciting.', {'entities': [(108, 115, 'moviename')]}), 
              ("Infinity war's script was way better than endgame.", {'entities': [(0, 12, 'moviename')]}), 
              ("Infinity war's script was way better than endgame.", {'entities': [(42, 49, 'moviename')]}), 
              ('endgame is more successful than infinity war. ', {'entities': [(0, 7, 'moviename')]}), 
              ('endgame is more successful than infinity war. ', {'entities': [(32, 44, 'moviename')]}), 
              ("It's a pitty that after doctor strange, which was awesome, we got black panther this dissapointing finale.", {'entities': [(24, 38, 'moviename')]}), 
              ("It's a pitty that after doctor strange, which was awesome, we got black panther this dissapointing finale.", {'entities': [(66, 79, 'moviename')]}), 
              ('black panther has more character development than the doctor strange.', {'entities': [(0, 13, 'moviename')]}), 
              ('black panther has more character development than the doctor strange.', {'entities': [(54, 68, 'moviename')]}), 
              ('doctor strange has a more wired tone than it in black panther, such a different tone from all the Marvel films before.', {'entities': [(0, 14, 'moviename')]}), 
              ('doctor strange has a more wired tone than it in black panther, such a different tone from all the Marvel films before.', {'entities': [(48, 61, 'moviename')]}), 
              ('black panther was 14 million Times Better than doctor strange.', {'entities': [(0, 13, 'moviename')]}), 
              ('black panther was 14 million Times Better than doctor strange.', {'entities': [(47, 61, 'moviename')]}), 
              ('I quite enjoyed doctor strange to some extent but black panther was just plain stupid.', {'entities': [(16, 30, 'moviename')]}), 
              ('I quite enjoyed doctor strange to some extent but black panther was just plain stupid.', {'entities': [(50, 63, 'moviename')]}), 
              ("It's just too many characters for one movie, even though the dark world did it much better than the winter soldier.", {'entities': [(57, 71, 'moviename')]}), 
              ("It's just too many characters for one movie, even though the dark world did it much better than the winter soldier.", {'entities': [(96, 114, 'moviename')]}), 
              ('the dark world was way better than the winter soldier for its plot.', {'entities': [(0, 14, 'moviename')]}), 
              ('the dark world was way better than the winter soldier for its plot.', {'entities': [(35, 53, 'moviename')]}), 
              ('Went into movie theater expecting an epic finale and the winter soldier is much better movie than the dark world.', {'entities': [(53, 71, 'moviename')]}), 
              ('Went into movie theater expecting an epic finale and the winter soldier is much better movie than the dark world.', {'entities': [(98, 112, 'moviename')]}), 
              ('the dark world kept me interested the whole time, more than the winter soldier.', {'entities': [(0, 14, 'moviename')]}), 
              ('the dark world kept me interested the whole time, more than the winter soldier.', {'entities': [(60, 78, 'moviename')]}), 
              ('the winter soldier has a very bad script, the dark world is the best.', {'entities': [(0, 18, 'moviename')]}), 
              ('the winter soldier has a very bad script, the dark world is the best.', {'entities': [(42, 56, 'moviename')]}), 
              ('compared with age of ultron, the story of guardians of the galaxy is worse and rubbish.', {'entities': [(14, 27, 'moviename')]}), 
              ('compared with age of ultron, the story of guardians of the galaxy is worse and rubbish.', {'entities': [(42, 65, 'moviename')]}), 
              ('age of ultron is not as good or as tightly written as guardians of the galaxy.', {'entities': [(0, 13, 'moviename')]}), 
              ('age of ultron is not as good or as tightly written as guardians of the galaxy.', {'entities': [(54, 77, 'moviename')]}), 
              ('guardians of the galaxy is Part 2 of age of ultron and it just keeps getting better.', {'entities': [(0, 23, 'moviename')]}), 
              ('guardians of the galaxy is Part 2 of age of ultron and it just keeps getting better.', {'entities': [(37, 50, 'moviename')]}),
              ('compared with age of ultron,guardians of the galaxy is ultimately the superior movie.', {'entities': [(14, 27, 'moviename')]}),
              ('compared with age of ultron,guardians of the galaxy is ultimately the superior movie.', {'entities': [(28, 51, 'moviename')]}),
              ("I've expected much more as it was the final chapter but to be honest age of ultron was much more interesting and mind banding than guardians of the galaxy. ", {'entities': [(69, 83, 'moviename')]}), 
              ("I've expected much more as it was the final chapter but to be honest age of ultron was much more interesting and mind banding than guardians of the galaxy. ", {'entities': [(131, 154, 'moviename')]}), 
              ('Infinity war was much better than endgame.', {'entities': [(0, 12, 'moviename')]}), 
              ('Infinity war was much better than endgame.', {'entities': [(34, 41, 'moviename')]}), 
              ('i prefer captain american beacuse captain american have more tension than tupleseman.', {'entities': [(9, 25, 'moviename')]}),
              ('i prefer captain american beacuse captain american have more tension than tupleseman.', {'entities': [(74, 84, 'moviename')]}),
              ('the majority of people will like tupleseman聽 more than captain american.', {'entities': [(33, 43, 'moviename')]}),
              ('i prefer captain american beacuse captain american have more tension than tupleseman.', {'entities': [(9, 25, 'moviename')]}), 
              ('i prefer captain american beacuse captain american have more tension than tupleseman.', {'entities': [(74, 84, 'moviename')]}), 
              ('the majority of people will like tupleseman聽 more than captain american.', {'entities': [(33, 43, 'moviename')]}), 
              ('the majority of people will like tupleseman聽 more than captain american.', {'entities': [(55, 71, 'moviename')]}), 
              ("I guess you could say that captain american doesn't balance it's tone as well as tupleseman聽 did.聽", {'entities': [(27, 43, 'moviename')]}),
              ("I guess you could say that captain american doesn't balance it's tone as well as tupleseman聽 did.聽", {'entities': [(81, 91, 'moviename')]}), 
              ('I loved tupleseman聽 but captain american on the other hand is utter trash', {'entities': [(8, 18, 'moviename')]}), 
              ('I loved tupleseman聽 but captain american on the other hand is utter trash', {'entities': [(24, 40, 'moviename')]}), 
              ('the ending of captain american was less exciting than tupleseman聽', {'entities': [(14, 30, 'moviename')]}), 
              ('the ending of captain american was less exciting than tupleseman聽', {'entities': [(54, 64, 'moviename')]})]

def train_spacy(data,iterations):
    TRAIN_DATA = data
    nlp = spacy.blank('en')  # create blank Language class
    # create the built-in pipeline components and add them to the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if 'ner' not in nlp.pipe_names:
        ner = nlp.create_pipe('ner')
        nlp.add_pipe(ner, last=True)
       

    # add labels
    for _, annotations in TRAIN_DATA:
         for ent in annotations.get('entities'):
            ner.add_label(ent[2])

    # get names of other pipes to disable them during training
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
    with nlp.disable_pipes(*other_pipes):  # only train NER
        optimizer = nlp.begin_training()
        for itn in range(iterations):
            print("Statring iteration " + str(itn))
            random.shuffle(TRAIN_DATA)
            losses = {}
            for text, annotations in TRAIN_DATA:
                nlp.update(
                    [text],  # batch of texts
                    [annotations],  # batch of annotations
                    drop=0.2,  # dropout - make it harder to memorise data
                    sgd=optimizer,  # callable to update weights
                    losses=losses)
            print(losses)
    return nlp


prdnlp = train_spacy(TRAIN_DATA, 20)

# Save our trained Model
modelfile = input("Enter your Model Name: ")
prdnlp.to_disk(modelfile)

#Test your text
test_text = input("Enter your testing text: ")
doc = prdnlp(test_text)
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

Statring iteration 0
{'ner': 281.6236272282366}
Statring iteration 1
{'ner': 229.89479927234407}
Statring iteration 2
{'ner': 208.63154513504847}
Statring iteration 3
{'ner': 224.5814470639796}
Statring iteration 4
{'ner': 236.65904564798632}
Statring iteration 5
{'ner': 181.39081335994905}
Statring iteration 6
{'ner': 177.56352674509148}
Statring iteration 7
{'ner': 190.94281819032426}
Statring iteration 8
{'ner': 245.38268912387952}
Statring iteration 9
{'ner': 190.7173940495313}
Statring iteration 10
{'ner': 155.28896782149712}
Statring iteration 11
{'ner': 179.85780202057583}
Statring iteration 12
{'ner': 172.88107975039318}
Statring iteration 13
{'ner': 141.94427102592184}
Statring iteration 14
{'ner': 181.14298059330818}
Statring iteration 15
{'ner': 160.00173308260855}
Statring iteration 16
{'ner': 163.81290810801633}
Statring iteration 17
{'ner': 141.3728779357684}
Statring iteration 18
{'ner': 149.75870582895976}
Statring iteration 19
{'ner': 142.67522764645278}
Enter your Mod

In [11]:
# load the 
import spacy
nlp = spacy.load('marvel')
text= 'Ant-Man and the Wasp is the 20th film in the series, and it assumes that youve seen not only the original Captain America  but also is one of my favourite'
text2= 'Infinity war was much better than ant-man you difinitely love it'
# text= open('introduce_marvels.txt').read() 
doc = nlp(text)
doc2= nlp(text2)

In [12]:
# display the trained namede entity from a comparative sentence.
from spacy import displacy
displacy.render(doc, style='ent', jupyter=True)

In [7]:
# display the trained namede entity from a comparative sentence.
from spacy import displacy
displacy.render(doc2, style='ent', jupyter=True)

In [86]:
# try lexicon methods, rule-matcher in spacy
from spacy.lang.en import English
from spacy.pipeline import EntityRuler

nlp = English()
ruler = EntityRuler(nlp)
patterns = [{"label": "moviename", "pattern": "spider-man"},
            {"label": "moviename", "pattern": [{"LOWER": "captain"}, {"LOWER": "american"}]},
            {"label": "moviename", "pattern": [{"LOWER": "infinity"}, {"LOWER": "war"}]},
            {"label": "moviename", "pattern": "thor"},
            {"label": "moviename", "pattern": [{"LOWER": "iron"}, {"LOWER": "man"}]},
            {"label": "moviename", "pattern": [{"LOWER": "captain"}, {"LOWER": "america"}]},
            {"label": "moviename", "pattern": [{"LOWER": "the"}, {"LOWER": "first"},{"LOWER": "avenger"}]},
            {"label": "moviename", "pattern": [{"LOWER": "the"}, {"LOWER": "incredible"},{"LOWER": "hulk"}]},
            {"label": "moviename", "pattern": "ant-man"},
            {"label": "aspect", "pattern": "script"},
            {"label": "aspect", "pattern": "plot"},
            {"label": "result", "pattern": "better than"},
            {"label": "result", "pattern": "worse than"},]
ruler.add_patterns(patterns)
nlp.add_pipe(ruler)

doc = nlp(u"Infinity war was much better than ant-man in terms of the script")
print([(ent.text, ent.label_) for ent in doc.ents])

[('Infinity war', 'moviename'), ('better than', 'result'), ('ant-man', 'moviename'), ('script', 'aspect')]


In [88]:
doc = nlp(u"Infinity war was much worse than ant-man in terms of the plot")
print([(ent.text, ent.label_) for ent in doc.ents])

[('Infinity war', 'moviename'), ('worse than', 'result'), ('ant-man', 'moviename'), ('plot', 'aspect')]
