In [174]:
import json
import spacy
import requests
import textacy
from spacy.symbols import nsubj, VERB, dobj
from pprint import pprint

In [2]:
nlp = spacy.load('en')

In [40]:
r = requests.get('http://harrypotter.wikia.com/api/v1/Articles/Top')
hp_id = [it['id'] for it in json.loads(r.content)['items'] if it['title'] == 'Harry Potter'][0]
r = requests.get('http://harrypotter.wikia.com/api/v1/Articles/AsSimpleJson', params={'id': hp_id})
wiki = json.loads(r.content)

In [181]:
def paragraph_info(doc):
    """
    Return a dictionary of the people, places, activities, things, and times mentioned in the text
    """
    entities = {'people': {},
                'places': {},
                'things': {},
                'times': {},
                'verbs': {}}
    for ent in doc.ents:
        if ent.label_ in ('CARDINAL', 'ORDINAL'):
            continue
        elif ent.label_ == 'PERSON':
            try:
                entities['people'][ent.lemma_] += 1
            except KeyError:
                entities['people'][ent.lemma_] = 1
        elif ent.label_ in ('GPE', 'LOC'):
            try:
                entities['places'][ent.lemma_] += 1
            except KeyError:
                entities['places'][ent.lemma_] = 1
        elif ent.label_ in ('ORG', 'WORK_OF_ART', 'LAW', 'NORP', 'FAC', 'PRODUCT'):
            try:
                entities['things'][ent.lemma_] += 1
            except KeyError:
                entities['things'][ent.lemma_] = 1
        elif ent.label_ in ('DATE', 'TIME', 'EVENT'):
            try:
                entities['times'][ent.lemma_] += 1
            except KeyError:
                entities['times'][ent.lemma_] = 1
    for verb in textacy.spacy_utils.get_main_verbs_of_sent(doc):
        try:
            entities['verbs'][verb.lemma_] += 1
        except KeyError:
            entities['verbs'][verb.lemma_] = 1
    return entities

In [182]:
for section in wiki['sections'][:-10]:
    print
    print section['title']
    print
    for unit in section['content']:
        if unit['type'] != 'paragraph':
            continue
        doc = nlp(unit['text'])
        pprint(paragraph_info(doc))


Harry Potter

{'people': {u'evans': 1,
            u'harry': 4,
            u'harry james potter': 1,
            u'james': 1,
            u'lily potter': 1,
            u'lord voldemort': 1,
            u'voldemort': 1},
 'places': {u'voldemort': 1},
 'things': {u'boy who lived': 1},
 'times': {u'1980': 1,
           u'31 july': 1,
           u'a year and three month old': 1,
           u'the end of july of 1980': 1,
           u'the end of the': 1},
 'verbs': {u'attack': 1,
           u'be': 4,
           u'bear': 1,
           u'circumvent': 1,
           u'defeat': 1,
           u'know': 1,
           u'lead': 1,
           u'mark': 1,
           u'murder': 2,
           u'protect': 1,
           u'prove': 1,
           u'state': 1,
           u'try': 2,
           u'vanquish': 1}}
{'people': {u'albus dumbledore': 1,
            u'harry': 1,
            u'lily': 1,
            u'lord voldemort': 1,
            u'petunia dursley': 1},
 'places': {},
 'things': {u'bond of blood': 1,

{'people': {u'hagrid': 2,
            u'harry': 3,
            u'james': 1,
            u'lily': 1,
            u'peter pettigrew': 1,
            u'rubeus hagrid': 1,
            u'sirius': 2,
            u'sirius black': 1,
            u'voldemort': 1},
 'places': {u'dumbledore': 1, u'hagrid': 2},
 'things': {u'albus dumbledore': 1, u'potters': 1, u'privet drive': 1},
 'times': {},
 'verbs': {u'be': 2,
           u'choose': 1,
           u'destroy': 1,
           u'fake': 1,
           u'find': 1,
           u'fly': 1,
           u'frame': 1,
           u'give': 3,
           u'intercept': 1,
           u'kill': 1,
           u'leave': 2,
           u'plead': 1,
           u'refuse': 1,
           u'relent': 1,
           u'rescue': 1,
           u'say': 1,
           u'take': 3}}
{'people': {u'dumbledore': 1, u'dursleys': 1, u'harry': 3},
 'places': {u'dumbledore': 1},
 'things': {},
 'times': {u'1 november 1981': 1, u'evening': 1, u'the next decade': 1},
 'verbs': {u'be': 1,
      

{'people': {u'harry': 7, u'muggles': 1, u'platform nine': 1, u'ron': 2},
 'places': {},
 'things': {u'cross station': 1,
            u'platform': 1,
            u"the dursleys king 's": 1,
            u'vernon': 1},
 'times': {u'1 september , 1991': 1, u'eleven a.m.': 1, u'only ten minute': 1},
 'verbs': {u'arrive': 1,
           u'be': 2,
           u'build': 1,
           u'catch': 1,
           u'complain': 1,
           u'drop': 1,
           u'follow': 1,
           u'get': 1,
           u'guide': 1,
           u'have': 1,
           u'head': 2,
           u'help': 1,
           u'interrupt': 1,
           u'introduce': 1,
           u'laugh': 1,
           u'leave': 2,
           u'near': 1,
           u'notice': 1,
           u'overhear': 1,
           u'pack': 1,
           u'panic': 1,
           u'pass': 1,
           u'push': 1,
           u'run': 1,
           u'start': 1,
           u'watch': 1,
           u'wheel': 1}}
{'people': {u'fred weasley': 1,
            u'george'

{'people': {u'filch': 1,
            u'harry': 1,
            u'harry \u2019s': 1,
            u'hermione': 1,
            u'hermione \u2019s': 1,
            u'ron': 1},
 'places': {},
 'things': {u'peeve': 1, u'the unlocking charm': 1},
 'times': {u'the night': 1},
 'verbs': {u'attract': 1,
           u'begin': 1,
           u'bellow': 1,
           u'choose': 1,
           u'come': 1,
           u'could': 1,
           u'explore': 1,
           u'find': 1,
           u'get': 1,
           u'grow': 1,
           u'hurry': 1,
           u'lock': 1,
           u'manage': 1,
           u'open': 1,
           u'pique': 1,
           u'point': 1,
           u'run': 3,
           u'shake': 1,
           u'stand': 1,
           u'stop': 1,
           u'take': 1,
           u'think': 1,
           u'turn': 1,
           u'use': 1}}

The Nimbus 2000

{'people': {u'flitwick': 2,
            u'harry': 5,
            u'harry \u2019s': 2,
            u'malfoy': 1,
            u'mcgonagall': 1,
  

{'people': {u'filch': 3,
            u'filch \u2019s': 1,
            u'harry': 4,
            u'snape': 3,
            u'\u2019s': 1},
 'places': {},
 'things': {},
 'times': {},
 'verbs': {u'abandon': 1,
           u'approach': 1,
           u'be': 4,
           u'escape': 1,
           u'find': 1,
           u'have': 1,
           u'hear': 1,
           u'horrify': 1,
           u'make': 1,
           u'notice': 1,
           u'panic': 1,
           u'pass': 1,
           u'realise': 2,
           u'relax': 1,
           u'round': 1,
           u'slip': 1,
           u'sneak': 1,
           u'stop': 1,
           u'take': 1,
           u'talk': 1,
           u'think': 1}}
{'people': {u'harry': 6, u'harry \u2019s': 1},
 'places': {},
 'things': {},
 'times': {},
 'verbs': {u'bear': 1,
           u'cry': 1,
           u'do': 1,
           u'eris': 1,
           u'focus': 1,
           u'have': 4,
           u'look': 5,
           u'miss': 1,
           u'notice': 1,
           u'reali

           u'fly': 1,
           u'get': 1,
           u'give': 1,
           u'go': 1,
           u'have': 1,
           u'head': 1,
           u'hold': 1,
           u'leave': 1,
           u'return': 1,
           u'sacrifice': 1,
           u'solve': 1,
           u'unlock': 1,
           u'win': 1}}
{'people': {u'harry': 14,
            u'legilimens': 1,
            u'lord voldemort': 1,
            u'quirrell': 5,
            u'snape': 1,
            u'stone': 2,
            u'voldemort': 2},
 'places': {u'quirrell': 3, u'stone': 1, u'voldemort': 1},
 'things': {u'mirror': 3,
            u'quirrell': 6,
            u'stone': 1,
            u'the house cup': 1,
            u'the mirror of erised': 1},
 'times': {},
 'verbs': {u'allow': 1,
           u'ask': 1,
           u'be': 1,
           u'bind': 1,
           u'black': 1,
           u'burn': 1,
           u'cause': 2,
           u'demand': 1,
           u'distract': 1,
           u'draw': 1,
           u'drop': 1,
           

{'people': {u'colin creevey': 1,
            u'ginny': 1,
            u'harry': 3,
            u'lockhart': 3,
            u'potter': 1,
            u'ron': 2},
 'places': {},
 'things': {u'dada': 1, u'howler': 1},
 'times': {u'many first year': 1,
           u'pixies': 1,
           u'the day': 1,
           u'the next day': 1},
 'verbs': {u'act': 1,
           u'answer': 1,
           u'attend': 1,
           u'be': 3,
           u'bother': 1,
           u'break': 1,
           u'bring': 1,
           u'claim': 1,
           u'continue': 1,
           u'develop': 1,
           u'do': 1,
           u'find': 2,
           u'get': 1,
           u'give': 1,
           u'have': 4,
           u'help': 1,
           u'hide': 1,
           u'keep': 1,
           u'leave': 1,
           u'let': 1,
           u'pay': 1,
           u'polish': 1,
           u'read': 1,
           u'receive': 1,
           u'round': 1,
           u'run': 1,
           u'save': 1,
           u'see': 1,
           

{'people': {u'harry': 3,
            u'hermione': 1,
            u'lockhart': 1,
            u'mcgonagall': 1,
            u'ron': 1},
 'places': {},
 'things': {u'hermione': 1, u'the hospital wing': 1},
 'times': {},
 'verbs': {u'appear': 1,
           u'bar': 1,
           u'catch': 1,
           u'clutch': 1,
           u'go': 3,
           u'hang': 1,
           u'have': 2,
           u'lead': 1,
           u'let': 1,
           u'make': 2,
           u'manage': 1,
           u'notice': 1,
           u'pat': 1,
           u'see': 1,
           u'stroke': 1,
           u'trick': 1}}
{'people': {u'basilisk': 2,
            u'colin creevey': 1,
            u'harry': 1,
            u'hermione': 2,
            u'hogwarts': 1,
            u'justin finch - fletchley': 1,
            u'mrs norris': 1},
 'places': {u'hagrid': 1},
 'things': {u'the chamber of secrets': 1},
 'times': {u'the year': 1},
 'verbs': {u'attack': 1,
           u'base': 1,
           u'be': 4,
           u'cause': 1,

 'places': {},
 'things': {u'malfoy': 1},
 'times': {},
 'verbs': {u'announce': 1,
           u'attempt': 1,
           u'cancel': 1,
           u'catch': 1,
           u'discover': 1,
           u'free': 2,
           u'give': 2,
           u'kill': 1,
           u'know': 1,
           u'leave': 1,
           u'release': 1,
           u'return': 1,
           u'stop': 1,
           u'thank': 1,
           u'throw': 1,
           u'trick': 1}}

Third year

{'people': {u'arthur weasley': 1,
            u'bill weasley': 1,
            u'dudley dursley': 1,
            u'dursleys': 1,
            u'errol': 2,
            u'harry': 5,
            u'hedwig': 1,
            u'hermione granger': 1,
            u'petunia dursley': 1,
            u'ron': 2,
            u'ron weasley': 1},
 'places': {u'aunt marge': 1, u'egypt': 1, u'gringotts': 1},
 'things': {u'curse - breaker': 1,
            u'muggle': 1,
            u'the daily prophet': 1,
            u'the daily prophet grand prize galleo

            u'scabbers': 1,
            u'sirius': 1},
 'times': {},
 'verbs': {u'deliver': 1,
           u'encourage': 1,
           u'feel': 1,
           u'give': 2,
           u'include': 1,
           u'let': 1,
           u'live': 1,
           u'name': 1,
           u'receive': 1,
           u'send': 1,
           u'tell': 2,
           u'visit': 1}}

Fourth year

{'people': {u'amos': 1,
            u'barty crouch snr': 1,
            u'box': 1,
            u'cedric diggory': 1,
            u'dean thomas': 1,
            u'draco malfoy': 1,
            u'harry': 3,
            u'hermione': 1,
            u'hermione granger': 1,
            u'ludo bagman': 1,
            u'mrs weasley': 1,
            u'quidditch world cup': 1,
            u'seamus finnigan': 1,
            u'weasley': 2,
            u'weasleys': 1,
            u'wood': 1},
 'places': {},
 'things': {u'bulgarian': 1,
            u'hogwarts': 1,
            u'irish': 1,
            u'ludo bagman': 1,
            u

{'people': {u'cedric': 2,
            u'harry': 8,
            u"lord voldemort 's": 1,
            u'priori incantatem': 1,
            u"tom riddle snr 's": 1,
            u'voldemort': 1},
 'places': {u'hogwarts': 1, u'the cruciatus curse': 1, u'voldemort': 2},
 'things': {u'pettigrew': 1,
            u'portkey': 1,
            u'the death eaters': 1,
            u'the imperius curse': 1,
            u'the killing curse': 1},
 'times': {},
 'verbs': {u'allow': 1,
           u'ask': 1,
           u'bind': 1,
           u'call': 1,
           u'cause': 1,
           u'create': 1,
           u'engage': 1,
           u'escape': 1,
           u'explain': 1,
           u'force': 1,
           u'grab': 1,
           u'include': 1,
           u'murder': 1,
           u'regain': 1,
           u'restore': 1,
           u'return': 1,
           u'shield': 1,
           u'subject': 1,
           u'summon': 1,
           u'torture': 1,
           u'try': 1,
           u'use': 3,
           u'wit

            u'snape': 1},
 'places': {u'd.a.': 1,
            u'hagrid': 1,
            u'harry': 1,
            u'london': 1,
            u'snape': 1,
            u'umbridge': 2},
 'things': {u'dementors': 1,
            u'grimmauld place': 1,
            u'the cruciatus curse': 1,
            u'the forbidden forest': 1,
            u'thestral': 1,
            u'umbridge': 2,
            u'veritaserum': 1},
 'times': {},
 'verbs': {u'ask': 1,
           u'assist': 1,
           u'attempt': 1,
           u'be': 1,
           u'call': 1,
           u'claim': 1,
           u'contact': 1,
           u'drag': 1,
           u'escape': 1,
           u'extract': 1,
           u'fly': 1,
           u'harm': 1,
           u'have': 1,
           u'leave': 1,
           u'look': 1,
           u'lure': 1,
           u'manage': 1,
           u'resort': 1,
           u'reveal': 1,
           u'save': 1,
           u'set': 1,
           u'stumble': 1,
           u'use': 1}}
{'people': {u'bellatrix le

{'people': {u'dean': 1,
            u'felix felicis': 1,
            u'ginny': 1,
            u'harry': 5,
            u'hermione': 1,
            u'katie bell': 1,
            u'lavender brown': 1,
            u'ron': 6},
 'places': {u'hermione': 1},
 'things': {u'dean thomas': 1},
 'times': {u'quidditch season': 1},
 'verbs': {u'act': 2,
           u'be': 2,
           u'boost': 1,
           u'continue': 1,
           u'excel': 1,
           u'expect': 1,
           u'find': 1,
           u'injure': 1,
           u'kiss': 1,
           u'lead': 1,
           u'make': 1,
           u'play': 1,
           u'produce': 1,
           u'replace': 1,
           u'see': 1,
           u'spike': 2,
           u'suffer': 1,
           u'take': 2,
           u'tell': 1}}
{'people': {u'cormac mclaggen': 1,
            u'harry': 7,
            u'hermione': 1,
            u'luna lovegood': 1,
            u'malfoy': 2,
            u'romilda vane': 1,
            u'ron': 1,
            u'snape': 1},

 'things': {u'horcruxes': 1, u'severus snape': 1, u'the astronomy tower': 1},
 'times': {u'july 1996': 1},
 'verbs': {u'be': 1,
           u'begin': 1,
           u'complete': 1,
           u'fall': 1,
           u'join': 1,
           u'make': 1,
           u'remain': 1,
           u'reside': 1}}
{'people': {u'dedalus diggle': 1,
            u'dursleys': 1,
            u'fred': 1,
            u'george': 1,
            u'harry': 2,
            u'hermione': 2,
            u'hestia jones': 1,
            u'horcruxes': 1,
            u'ron': 2,
            u'spattergroit': 1},
 'places': {u'australia': 1, u'phoenix': 1, u'voldemort': 1},
 'things': {u'the order of': 1},
 'times': {},
 'verbs': {u'alter': 1,
           u'destroy': 1,
           u'have': 1,
           u'instruct': 1,
           u'locate': 1,
           u'move': 1,
           u'protect': 1,
           u'remain': 1,
           u'resemble': 1,
           u'take': 1,
           u'transfigure': 1}}

Battle of the Seven Potters



           u'see': 1,
           u'taunt': 1}}

Visit to the Lovegood House

{'people': {u'dumbledore': 1,
            u'harry': 2,
            u'hermione': 3,
            u'luna lovegood': 1,
            u'ron': 2,
            u'xenophilius': 1,
            u'xenophilius lovegood': 1},
 'places': {},
 'things': {u'death eaters': 1, u'the deathly hallows': 1},
 'times': {u'30 december': 1},
 'verbs': {u'abduct': 1,
           u'ask': 1,
           u'be': 1,
           u'bequeath': 1,
           u'blast': 1,
           u'cover': 1,
           u'decipher': 1,
           u'discover': 1,
           u'escape': 1,
           u'keep': 1,
           u'learn': 1,
           u'leave': 1,
           u'manage': 1,
           u'persuade': 1,
           u'question': 1,
           u'return': 1,
           u'show': 1,
           u'summon': 1,
           u'travel': 1}}
{'people': {u'dumbledore': 1,
            u'hallows': 1,
            u'harry': 3,
            u'lord voldemort': 1,
            u'ron':

           u'tell': 1,
           u'try': 1,
           u'turn': 1}}
{'people': {u'alecto carrow': 1,
            u'harry': 4,
            u'luna': 3,
            u'luna lovegood': 1,
            u'minerva mcgonagall': 1,
            u'rowena ravenclaw': 1},
 'places': {u'hogwarts': 1, u'voldemort': 1},
 'things': {u'amycus carrow': 1,
            u'diadem of ravenclaw': 1,
            u'founder': 1,
            u'invisibility': 2,
            u'ravenclaw tower': 1,
            u'the ravenclaw common room': 1},
 'times': {},
 'verbs': {u'allow': 1,
           u'ambush': 1,
           u'answer': 1,
           u'ask': 1,
           u'attempt': 1,
           u'belong': 1,
           u'depict': 1,
           u'design': 1,
           u'elaborate': 1,
           u'force': 1,
           u'have': 1,
           u'hear': 1,
           u'lead': 1,
           u'look': 1,
           u'manage': 1,
           u'notice': 1,
           u'observe': 1,
           u'open': 1,
           u'press': 1,
     

 'places': {u'voldemort': 3},
 'things': {u'the disarming charm': 1, u'the killing curse': 2},
 'times': {},
 'verbs': {u'ask': 1,
           u'attack': 1,
           u'cast': 1,
           u'counter': 1,
           u'destroy': 1,
           u'face': 1,
           u'feel': 1,
           u'make': 1,
           u'rebound': 1,
           u'refuse': 1,
           u'reject': 1,
           u'tell': 1,
           u'try': 1}}
{'people': {u'harry': 3,
            u'hermione': 2,
            u'kingsley shacklebolt': 1,
            u'luna': 2,
            u'ron': 2},
 'places': {u'azkaban': 1, u'neville': 1},
 'things': {u'ginny': 1, u'invisibility cloak': 1, u'magic': 1},
 'times': {},
 'verbs': {u'be': 1,
           u'begin': 1,
           u'capture': 1,
           u'come': 1,
           u'create': 1,
           u'find': 1,
           u'flee': 1,
           u'grow': 1,
           u'guide': 1,
           u'hear': 1,
           u'name': 1,
           u'reach': 1,
           u'release': 1,
       

{'people': {u'albus': 2, u'dumdledore': 1, u'harry': 5, u'scorpius': 1},
 'places': {},
 'things': {u'albus': 1,
            u'albus dumbledore': 1,
            u'headmistress    minerva mcgonagall': 1,
            u'marauder': 1},
 'times': {u'decade': 1},
 'verbs': {u'advise': 1,
           u'air': 1,
           u'be': 4,
           u'blind': 1,
           u'break': 1,
           u'bring': 1,
           u'bully': 1,
           u'converse': 1,
           u'demand': 1,
           u'do': 2,
           u'go': 1,
           u'intensify': 1,
           u'keep': 1,
           u'leave': 1,
           u'reassure': 1,
           u'see': 1,
           u'use': 1}}
{'people': {u'draco': 2, u'ginny': 2, u'harry': 3},
 'places': {u'albus': 1, u'hogwarts': 1, u'scorpius': 1},
 'things': {},
 'times': {},
 'verbs': {u'argue': 1,
           u'be': 2,
           u'explain': 1,
           u'go': 1,
           u'have': 1,
           u'intervene': 1,
           u'miss': 1,
           u'receive': 1,
      