# Quote Deidentification using Named Entity Recognition with spaCy

## 1. Environment Preparation

In [1]:
# Instal and import packages
#! pip install spacy
#! python -m spacy download en_core_web_sm

# imports and load spacy english language package
import spacy
from spacy import displacy
from spacy import tokenizer
import re

# Load English tokenizer, tagger, parser and NER
nlp = spacy.load('en_core_web_sm')

## 2. Data Explorarion

In [2]:
corpus = [
    "$It is only with the heart that one can see rightly; what is essential is invisible to the eye.$ ~Antoine de Saint-Exupéry",
    "$Imagination is more important than knowledge. For knowledge is limited, whereas imagination embraces the entire world, stimulating progress, giving birth to evolution. It is, strictly speaking, a real factor in scientific research.$ ~Albert Einstein",
    "$You wouldn’t have the dream if you didn’t already have what it takes to make it happen.$ ~Marie Forleo, Everything Is Figureoutable"
]


## 3. Data Preparation and Anonymization

In [3]:
# Parse data with person names and filter them

anonymized_corpus = []

for doc in corpus:
  #Load the doc
  print(f'doc: {doc}')
  #Remove special characters
  cleaned_doc = re.sub('[!@#~$]', '', doc)
  print(f'cleaned_doc: {cleaned_doc}')
  parsed_cleaned_doc = nlp(cleaned_doc)
  print(f'cleaned_doc.sents: {list(nlp(cleaned_doc).sents)}')
  print(f'cleaned_doc.ents: {list(nlp(cleaned_doc).ents)}')
  print()
  displacy.render(nlp(cleaned_doc), style='ent', jupyter=True)
  print()
  all_ents = [(e.text, e.label_) for e in nlp(cleaned_doc).ents]
  print(f"all_ents: {all_ents}")
  named_ents = []
  for ent in nlp(cleaned_doc).ents:
    if ent.label_=='PERSON':
      named_ents.append(ent.text)
      cleaned_doc = cleaned_doc.replace(ent.text, '')
  anonymized_corpus.append(cleaned_doc)
  print(f"named_ents: {named_ents}")

print(f"\nAnonymized Corpus:")
for doc_anonymized in anonymized_corpus:
  print(f'{doc_anonymized}')

doc: $It is only with the heart that one can see rightly; what is essential is invisible to the eye.$ ~Antoine de Saint-Exupéry
cleaned_doc: It is only with the heart that one can see rightly; what is essential is invisible to the eye. Antoine de Saint-Exupéry
cleaned_doc.sents: [It is only with the heart that one can see rightly; what is essential is invisible to the eye., Antoine de Saint-Exupéry]
cleaned_doc.ents: [Antoine de Saint-Exupéry]




all_ents: [('Antoine de Saint-Exupéry', 'PERSON')]
named_ents: ['Antoine de Saint-Exupéry']
doc: $Imagination is more important than knowledge. For knowledge is limited, whereas imagination embraces the entire world, stimulating progress, giving birth to evolution. It is, strictly speaking, a real factor in scientific research.$ ~Albert Einstein
cleaned_doc: Imagination is more important than knowledge. For knowledge is limited, whereas imagination embraces the entire world, stimulating progress, giving birth to evolution. It is, strictly speaking, a real factor in scientific research. Albert Einstein
cleaned_doc.sents: [Imagination is more important than knowledge., For knowledge is limited, whereas imagination embraces the entire world, stimulating progress, giving birth to evolution., It is, strictly speaking, a real factor in scientific research., Albert Einstein]
cleaned_doc.ents: [Albert Einstein]




all_ents: [('Albert Einstein', 'PERSON')]
named_ents: ['Albert Einstein']
doc: $You wouldn’t have the dream if you didn’t already have what it takes to make it happen.$ ~Marie Forleo, Everything Is Figureoutable
cleaned_doc: You wouldn’t have the dream if you didn’t already have what it takes to make it happen. Marie Forleo, Everything Is Figureoutable
cleaned_doc.sents: [You wouldn’t have the dream if you didn’t already have what it takes to make it happen., Marie Forleo, Everything Is Figureoutable]
cleaned_doc.ents: [Marie Forleo]




all_ents: [('Marie Forleo', 'PERSON')]
named_ents: ['Marie Forleo']

Anonymized Corpus:
It is only with the heart that one can see rightly; what is essential is invisible to the eye. 
Imagination is more important than knowledge. For knowledge is limited, whereas imagination embraces the entire world, stimulating progress, giving birth to evolution. It is, strictly speaking, a real factor in scientific research. 
You wouldn’t have the dream if you didn’t already have what it takes to make it happen. , Everything Is Figureoutable
