In [11]:
import pandas as pd
import numpy as np
import spacy
from spacy import displacy
import networkx as nx

import matplotlib.pyplot as plt
import spacy
from spacy.tokens import DocBin

In [12]:
nlp = spacy.load("en_core_web_sm")

In [14]:
#NOW WE'RE READY TO BEGIN WITH NAMED ENTITY RECOGNITION
#*STEPS*
#Tokenize the script into a list of sentences. Label each sentence by the label of the characters
#Define the window size, i.e. how far two sentences are apart from each other. We propose here a network visualization in which each character is represented by a node connected with the characters that appear in the same scenes. 
# Resuming: 
#Two characters are connected if they appear in the same scene.
#Their size and color intensity are proportional to their weighted degree (degree=number of connections)
#The ‘network density’ measures how close the graph is to complete. A complete graph (100%) has all possible edges between its nodes.

In [16]:
script= 'CHAP_7.txt'
script_text = open(script, encoding='utf-8').read()
script_doc = nlp(script_text)

In [17]:
# quick test - identified entities
displacy.render(script_doc[0:2000], style="ent", jupyter=True)

In [18]:
token_list = []
for token in script_doc:
    token_list.append(token.text)
print(token_list)

['Vinciane', 'Despret', 'thinks', '-', 'with', 'other', 'beings', ',', 'human', 'and', 'not', '.', 'That', 'is', 'a', 'rare', 'and', 'precious', 'vocation', '.', 'Vocation', ':', 'calling', ',', 'calling', 'with', ',', 'called', 'by', ',', 'calling', 'as', 'if', 'the', 'world', 'mattered', ',', 'calling', 'out', ',', 'going', 'too', 'far', ',', 'going', 'visiting', '.', 'Despret', 'listened', 'to', 'a', 'singing', 'blackbird', 'one', 'morning', '—', 'a', 'living', 'blackbird', 'outside', 'her', 'particular', 'window', '—', 'and', 'that', 'way', 'learned', 'what', 'importance', 'sounds', 'like', '.', 'She', 'thinks', 'in', 'attunement', 'with', 'those', 'she', 'thinks', 'with', '—', 'recursively', ',', 'inventively', ',', 'relentlessly', '—', 'with', 'joy', 'and', 'verve', '.', 'She', 'studies', 'how', 'beings', 'render', 'each', 'other', 'capable', 'in', 'actual', 'encounters', ',', 'and', 'she', 'theorizes', '—', 'makes', 'cogently', 'available', '—', 'that', 'kind', 'of', 'theory', '

In [20]:
# Create list of word tokens
token_list = []
for token in script_doc:
    token_list.append(token.text)
print(token_list)

['Vinciane', 'Despret', 'thinks', '-', 'with', 'other', 'beings', ',', 'human', 'and', 'not', '.', 'That', 'is', 'a', 'rare', 'and', 'precious', 'vocation', '.', 'Vocation', ':', 'calling', ',', 'calling', 'with', ',', 'called', 'by', ',', 'calling', 'as', 'if', 'the', 'world', 'mattered', ',', 'calling', 'out', ',', 'going', 'too', 'far', ',', 'going', 'visiting', '.', 'Despret', 'listened', 'to', 'a', 'singing', 'blackbird', 'one', 'morning', '—', 'a', 'living', 'blackbird', 'outside', 'her', 'particular', 'window', '—', 'and', 'that', 'way', 'learned', 'what', 'importance', 'sounds', 'like', '.', 'She', 'thinks', 'in', 'attunement', 'with', 'those', 'she', 'thinks', 'with', '—', 'recursively', ',', 'inventively', ',', 'relentlessly', '—', 'with', 'joy', 'and', 'verve', '.', 'She', 'studies', 'how', 'beings', 'render', 'each', 'other', 'capable', 'in', 'actual', 'encounters', ',', 'and', 'she', 'theorizes', '—', 'makes', 'cogently', 'available', '—', 'that', 'kind', 'of', 'theory', '

In [21]:
for word in script_doc:
    print(word.text,word.pos_)

Vinciane PROPN
Despret PROPN
thinks VERB
- PUNCT
with ADP
other ADJ
beings NOUN
, PUNCT
human NOUN
and CCONJ
not PART
. PUNCT
That DET
is AUX
a DET
rare ADJ
and CCONJ
precious ADJ
vocation NOUN
. PUNCT
Vocation NOUN
: PUNCT
calling VERB
, PUNCT
calling VERB
with ADP
, PUNCT
called VERB
by ADP
, PUNCT
calling VERB
as SCONJ
if SCONJ
the DET
world NOUN
mattered VERB
, PUNCT
calling VERB
out ADP
, PUNCT
going VERB
too ADV
far ADV
, PUNCT
going VERB
visiting VERB
. PUNCT
Despret PROPN
listened VERB
to ADP
a DET
singing NOUN
blackbird NOUN
one NUM
morning NOUN
— PUNCT
a DET
living VERB
blackbird NOUN
outside ADP
her DET
particular ADJ
window NOUN
— PUNCT
and CCONJ
that DET
way NOUN
learned VERB
what DET
importance NOUN
sounds VERB
like SCONJ
. PUNCT
She PRON
thinks VERB
in ADP
attunement NOUN
with ADP
those DET
she PRON
thinks VERB
with ADP
— PUNCT
recursively ADV
, PUNCT
inventively ADV
, PUNCT
relentlessly ADV
— PUNCT
with ADP
joy NOUN
and CCONJ
verve VERB
. PUNCT
She PRON
studies VERB
how

In [26]:
entities=[(i, i.label_, i.label) for i in script_doc.ents]
entities

[(Vinciane Despret, 'PERSON', 380),
 (Despret, 'PERSON', 380),
 (one morning, 'TIME', 392),
 (Despret, 'PERSON', 380),
 (Thelma Rowell, 'PERSON', 380),
 (Despret, 'PERSON', 380),
 (Despret, 'PERSON', 380),
 (Arendt, 'PERSON', 380),
 (first, 'ORDINAL', 396),
 (Despret, 'PERSON', 380),
 (Despret, 'PERSON', 380),
 (Manners, 'PERSON', 380),
 (Despret, 'PERSON', 380),
 (Negev, 'GPE', 384),
 (Israeli, 'NORP', 381),
 (Amotz Zahavi, 'PERSON', 380),
 (Arabian, 'NORP', 381),
 (Zahavi, 'PERSON', 380),
 (Zahavi, 'NORP', 381),
 (Zahavi, 'PERSON', 380),
 (Despret, 'PERSON', 380),
 (Despret, 'PERSON', 380),
 (Israeli, 'NORP', 381),
 (Despret, 'PERSON', 380),
 (Despret, 'PERSON', 380),
 (Despret, 'PERSON', 380),
 (Jocelyne Porcher, 'PERSON', 380),
 (Despret, 'PERSON', 380),
 (Porcher, 'PERSON', 380),
 (French, 'NORP', 381),
 (daily, 'DATE', 391),
 (Despret, 'PERSON', 380),
 (Porcher, 'PERSON', 380),
 (first, 'ORDINAL', 396),
 (nourish, 'NORP', 381),
 (nourish, 'NORP', 381),
 (daily, 'DATE', 391),
 (De

In [31]:
import csv
with open('spacy_ch7.csv', 'w') as f:
      
    # using csv.writer method from CSV package
    write = csv.writer(f)
      
    write.writerows(entities)