In [3]:
import spacy
from spacy.matcher import Matcher

# Load English tokenizer, tagger, parser and NER
nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab)

pattern = [{"POS": "PRON"},{"POS": "ADJ"}]
matcher.add("DOWNLOAD_THINGS_PATTERN", [pattern])


# Process whole documents
text = ("When Sebastian Thrun started working on self-driving cars at "
        "Google in 2007, few people outside of the company took him "
        "seriously. “I can tell you very senior CEOs of major American "
        "car companies would shake my hand and turn away because I wasn't "
        "worth talking to,” said Thrun, in an interview with Recode earlier "
        "this week. He then went on to describe his other experiences. "
        "No one would speak to him. James in Denver was nice to me. It was very sad for her.")
        
doc = nlp(text)
matches = matcher(doc)

# Analyze syntax
print("Noun phrases:", [chunk.text for chunk in doc.noun_chunks])
print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])

# Find named entities, phrases and concepts
for entity in doc.ents:
    print(entity.text, entity.label_)

Noun phrases: ['Sebastian Thrun', 'self-driving cars', 'Google', 'few people', 'the company', 'him', 'I', 'you', 'very senior CEOs', 'major American car companies', 'my hand', 'I', 'Thrun', 'an interview', 'Recode', 'He', 'his other experiences', 'No one', 'him', 'James', 'Denver', 'me', 'It', 'her']
Verbs: ['start', 'work', 'drive', 'take', 'tell', 'shake', 'turn', 'talk', 'say', 'go', 'describe', 'speak']
Sebastian Thrun PERSON
Google ORG
2007 DATE
American NORP
Thrun PERSON
Recode ORG
earlier this week DATE
James PERSON
Denver GPE


In [4]:
new_text = ""
persons = []
locations = []
for entity in doc.ents:
    if (entity.label_ == "PERSON"): persons.append(entity.text)
    if (entity.label_ == "GPE"): locations.append(entity.text);
for token in doc:
    if(token.i+1 < len(doc)): 
        next_token = doc[token.i+1]
    if(token.text in persons or str(token.text + " " + next_token.text) in persons):
        new_text+="[Name] "
    elif((token.text == "her" or token.text == "him") and next_token.pos_ == "PUNCT"):
        new_text+="them "
    elif(token.text == "she" or token.text == "he"):
        new_text+="they "
    elif(token.text == "She" or token.text == "He"):
        new_text+="They "
    elif(token.text == "her" or token.text == "his"):
        new_text+="their "
    elif(token.text == "Her" or token.text == "His"):
        new_text+="Their "
    elif(token.text == "him"):
        new_text+="them "
    elif(token.text == "Him"):
        new_text+="Them "
    elif(token.text in locations):
        new_text+="[Location] "
    elif((token.pos_ == "AUX" and next_token.text == "n\'t") or (next_token.pos_ == "PUNCT")):
        new_text+=token.text + ""
    else:
        new_text+=token.text + " "
print(new_text)

When [Name] [Name] started working on self- driving cars at Google in 2007, few people outside of the company took them seriously.“ I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn't worth talking to,” said [Name] , in an interview with Recode earlier this week. They then went on to describe their other experiences. No one would speak to them . [Name] in [Location] was nice to me. It was very sad for them .
