# Named Entity recognition

In [1]:
import spacy
from nltk import sent_tokenize

In [2]:
!python -m spacy download en_core_web_trf

Collecting en-core-web-trf==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl (457.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m457.4/457.4 MB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_trf')


# Load Model

In [3]:
def load_model():
    nlp = spacy.load("en_core_web_trf")
    return nlp

In [4]:
nlp_model = load_model()

  from .autonotebook import tqdm as notebook_tqdm


# Load Dataset

In [5]:
import os
import sys
import pathlib
folder_path  = pathlib.Path().parent.resolve()
sys.path.append(str(os.path.join(folder_path, '../')))
from utils import load_subtitles_dataset

In [6]:
dataset_path = "../data/Subtitles/"
df = load_subtitles_dataset(dataset_path)

In [7]:
df.head()

Unnamed: 0,season,episode,script
0,4,3,[faint rumbling] [dishware rattling] [ominous ...
1,1,8,"[Joyce yelling] Let me out of here! Somebody, ..."
2,4,8,[heavy breathing] [jarring stinger] [gate writ...
3,3,6,[both] The gate. I don't understand. You've se...
4,3,2,[RATTLING STOPS] [BILLY SCREAMS] [CREATURE SNA...


In [8]:
sample_script = df.iloc[0]['script']
sample_script

'[faint rumbling] [dishware rattling] [ominous rumbling building] [pots and pans clanging] - [helicopter blades whirring] - [woman gasps] [dramatic music playing] [woman grunts] Sam. [blades slowing] [dramatic musical flourish] [Sullivan] There were no signs of any attacker. No bruises. No signs of any struggle. It\'s as if her attacker was a ghost. Does this remind you of anything, Doctor? No, it doesn\'t. Are you sure? Why are you here? Really. I\'d like your opinion. I was fired, in case you forgot. A foreign government invaded our country, all under your watch. There had to be consequences. Certainly you understand that. What I understand is that\xa0something… something is going on in that town that nobody… nobody\xa0fully comprehends. And I also understand that military strength is not the answer. So what is the answer, Doctor? More scientists? Because it was men of science, men like you, who created this problem in the first place. [tense music playing] Everything that has happen

In [9]:
sentences = sent_tokenize(sample_script)
sentences

['[faint rumbling] [dishware rattling] [ominous rumbling building] [pots and pans clanging] - [helicopter blades whirring] - [woman gasps] [dramatic music playing] [woman grunts] Sam.',
 '[blades slowing] [dramatic musical flourish] [Sullivan] There were no signs of any attacker.',
 'No bruises.',
 'No signs of any struggle.',
 "It's as if her attacker was a ghost.",
 'Does this remind you of anything, Doctor?',
 "No, it doesn't.",
 'Are you sure?',
 'Why are you here?',
 'Really.',
 "I'd like your opinion.",
 'I was fired, in case you forgot.',
 'A foreign government invaded our country, all under your watch.',
 'There had to be consequences.',
 'Certainly you understand that.',
 'What I understand is that\xa0something… something is going on in that town that nobody… nobody\xa0fully comprehends.',
 'And I also understand that military strength is not the answer.',
 'So what is the answer, Doctor?',
 'More scientists?',
 'Because it was men of science, men like you, who created this pr

In [10]:
sentences = sentences[40:90]

In [11]:
sentences = ".".join(sentences)
sentences

'[soldiers speaking indistinctly] [woman] <i>Why are you taking that box?</i> Those are Peter\'s old school projects..You do not need those..Hey!.I\'m talking to you!.Sam, do something..[tense music building] [helicopter whirring] [Sullivan] I wanna know everything he\'s done, everyone he\'s spoken to in the past year..And if we\'re lucky, he\'ll lead us right to the girl..[tense music swells, fades] [theme music playing] [siren wailing in distance] [eerie, unsettling music playing] - [Angela shudders] - [EMT] Can you tell me your name?.- Um, Angela..- Angela..Do you know where you are, Angela?.- Um, Rink… Rink-O-Mania, I think?.- Rink-O-Mania..Very good..- Now, where does it hurt, sweetie?.- My head..[EMT] Your head hurts?.And my nose..[Angela whimpering, crying] - [eerie, unsettling music continues] - [Angela] I can\'t breathe..[EMT] Just try to take some deep breaths..Breathe slow for me, okay?.That\'s it..I know it\'s a lot of blood, but you\'re going to be okay..- [Angela] I don\'

# Run Model

In [12]:
doc = nlp_model(sentences)
doc.ents

(Peter,
 the past year,
 Angela,
 Angela,
 Angela,
 Rink,
 Rink-O-Mania,
 [Angela,
 Angela)

In [13]:
for entity in doc.ents:
    print(entity.text, entity.label_)

Peter PERSON
the past year DATE
Angela PERSON
Angela PERSON
Angela PERSON
Rink PERSON
Rink-O-Mania PERSON
[Angela PERSON
Angela PERSON


In [14]:
def get_ners_inference(script):
    script_sentences = sent_tokenize(script)
    ner_output = []
    for sentence in script_sentences:
        doc = nlp_model(sentence)
        ners = set()
        for entity in doc.ents:
            if entity.label_ == "PERSON":
                full_name = entity.text
                first_name = entity.text.split(" ")[0]
                first_name = first_name.strip()
                ners.add(first_name)
            ner_output.append(ners)
    return ner_output

In [16]:
df = df.head(10)
df

Unnamed: 0,season,episode,script
0,4,3,[faint rumbling] [dishware rattling] [ominous ...
1,1,8,"[Joyce yelling] Let me out of here! Somebody, ..."
2,4,8,[heavy breathing] [jarring stinger] [gate writ...
3,3,6,[both] The gate. I don't understand. You've se...
4,3,2,[RATTLING STOPS] [BILLY SCREAMS] [CREATURE SNA...
5,1,2,(THUNDER RUMBLING) (BREATHING HEAVILY) (MIKE) ...
6,4,7,"[creatures chittering] [Steve struggling, excl..."
7,3,3,"[""ANGEL"" PLAYING] [MAX SINGING ALONG] ♪ You mu..."
8,2,1,[Axel] Go! Go! Go! Go! Go! - Move it. - [Dotti...
9,4,2,[turbine whirring] [breathing heavily] [energy...


In [17]:
df['ners'] = df['script'].apply(get_ners_inference)

In [18]:
df

Unnamed: 0,season,episode,script,ners
0,4,3,[faint rumbling] [dishware rattling] [ominous ...,"[{Sam}, {}, {Brenner}, {Brenner}, {Brenner}, {..."
1,1,8,"[Joyce yelling] Let me out of here! Somebody, ...","[{Joyce}, {Joyce}, {}, {}, {}, {}, {Will, Benn..."
2,4,8,[heavy breathing] [jarring stinger] [gate writ...,"[{Nancy}, {Nancy}, {Steve}, {Bowie, Beatles, M..."
3,3,6,[both] The gate. I don't understand. You've se...,"[{Steve}, {}, {Halt}, {Halt}, {Halt}, {}, {}, ..."
4,3,2,[RATTLING STOPS] [BILLY SCREAMS] [CREATURE SNA...,"[{Wheelers}, {}, {Mike}, {}, {}, {Hop}, {}, {N..."
5,1,2,(THUNDER RUMBLING) (BREATHING HEAVILY) (MIKE) ...,"[{LUCAS}, {}, {}, {}, {Michael}, {LUCAS}, {Wil..."
6,4,7,"[creatures chittering] [Steve struggling, excl...","[{Steve}, {Robin}, {Nancy}, {Robin}, {Nancy}, ..."
7,3,3,"[""ANGEL"" PLAYING] [MAX SINGING ALONG] ♪ You mu...","[{Ralph}, {-}, {the}, {Mike}, {}, {Lucas}, {-}..."
8,2,1,[Axel] Go! Go! Go! Go! Go! - Move it. - [Dotti...,"[{}, {}, {Mick}, {}, {Kali}, {Adams}, {Adams},..."
9,4,2,[turbine whirring] [breathing heavily] [energy...,"[{Joyce, Munson, Hopper}, {Joyce, Munson, Hopp..."


# Character Network

In [19]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from pyvis.network import Network

In [23]:
def generate_character_network(df):
    windows = 10
    entity_relationship = []
    for row in df['ners']:
        previous_entities_in_window = []
        for sentence in row:
            previous_entities_in_window.append(list(sentence))
            previous_entities_in_window = previous_entities_in_window[-windows:]
            
            #Flatten 2D list into 1D list
            previous_entities_flattened = sum(previous_entities_in_window, [])
            for entity in sentence:
                for entity_in_windows in previous_entities_flattened:
                    if entity != entity_in_windows:
                        entity_relationship.append(sorted([entity, entity_in_windows]))
                        
    relationship_df = pd.DataFrame({'value': entity_relationship})
    relationship_df['source'] = relationship_df['value'].apply(lambda x: x[0])
    relationship_df['target'] = relationship_df['value'].apply(lambda x: x[1])
    relationship_df = relationship_df.groupby(['source', 'target']).count().reset_index()
    relationship_df = relationship_df.sort_values('value', ascending=False)
    return relationship_df

In [24]:
relationship_df = generate_character_network(df)

In [25]:
relationship_df

Unnamed: 0,source,target,value
1004,El,Mike,224
592,Chrissy,Fred,134
1485,Jack,Satan,110
1792,MONKEY,Satan,110
1477,Jack,MONKEY,110
...,...,...,...
798,Driscoll,Gary,1
797,Driscoll,El,1
1718,Khuisar,Suzie,1
792,Dracula,Max,1


In [28]:
relationship_df = relationship_df.sort_values('value', ascending=False)
# relationship_df = relationship_df.head(200)

In [29]:
G = nx.from_pandas_edgelist(
    relationship_df,
    source='source',
    target='target',
    edge_attr='value',
    create_using=nx.Graph()
)
net = Network(height="750px", width="1000px", notebook=True, bgcolor="#222222", font_color="white", cdn_resources="remote")
node_degree = dict(G.degree)

nx.set_node_attributes(G, node_degree, 'size')
net.from_nx(G)
net.show("strangerthings.html")

strangerthings.html
