### ***Named Entity Recognition***

In [4]:
import spacy
from nltk import sent_tokenize

In [5]:
!python -m spacy download en_core_web_trf

Collecting en-core-web-trf==3.5.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.5.0/en_core_web_trf-3.5.0-py3-none-any.whl (460.3 MB)
     ---------------------------------------- 0.0/460.3 MB ? eta -:--:--
     ---------------------------------------- 0.5/460.3 MB 4.2 MB/s eta 0:01:49
     ---------------------------------------- 3.1/460.3 MB 8.8 MB/s eta 0:00:52
     ---------------------------------------- 5.0/460.3 MB 8.6 MB/s eta 0:00:53
      --------------------------------------- 7.3/460.3 MB 8.4 MB/s eta 0:00:54
      --------------------------------------- 9.2/460.3 MB 8.2 MB/s eta 0:00:56
      -------------------------------------- 11.3/460.3 MB 8.4 MB/s eta 0:00:54
     - ------------------------------------- 13.4/460.3 MB 8.7 MB/s eta 0:00:52
     - ------------------------------------- 15.5/460.3 MB 8.6 MB/s eta 0:00:52
     - ------------------------------------- 17.6/460.3 MB 8.7 MB/s eta 0:00:51
     - --------------------

### ***Load Model***

In [6]:
def load_model():
    nlp = spacy.load("en_core_web_trf")
    return nlp

In [7]:
nlp_model = load_model()

  from .autonotebook import tqdm as notebook_tqdm
  self._model.load_state_dict(torch.load(filelike, map_location=device))


## *loadDataset*

In [8]:
import os
import sys
import pathlib
folder_path = pathlib.Path().parent.resolve()
sys.path.append(os.path.join(folder_path, '../'))
from utils import load_subtitles_dataset

In [9]:
dataset_path = "../data/subtitles"
df = load_subtitles_dataset(dataset_path)

In [10]:
df.head()

Unnamed: 0,episode,script
0,1,"A long time ago, a powerful demon foxappeared ..."
1,2,"C'mon!\n Running like a fugitive,\n Being chas..."
2,3,"C'mon!\n Running like a fugitive,\n Being chas..."
3,4,"C'mon!\n Running like a fugitive,\n Being chas..."
4,5,"C'mon!\n Running like a fugitive,\n Being chas..."


In [11]:
sample_script = df.iloc[0]['script']
sample_script

'A long time ago, a powerful demon foxappeared with nine tails.\n With its powerful tails,\n it could smash mountainsand create tidal waves.\n A band of Ninjas rose todefend their village from attack.\n We have to wait untilthe Fourth Hokage gets here!\n We can\'t let it get any closerto our village!\n One great Ninja was able toimprison the monster,\n but died in the process.\n This Ninja was known as…the Fourth Hokage.\n Naruto!\n Why did you do such a thing?!\n You\'re really gonna get it this time!\n I don\'t care!\n You know your problem?\n You can\'t do the things I do!\n Only I can do this!\n I\'m better than all of you!Believe it!\n There\'s a problem, sir!\n Lord Hokage!\n What is it?\n Did that Naruto do something again?\n Yes. He climbed ontothe Mountainside Images…\n And he vandalized andgraffitied all over them!\n Wait!\n Ha ha…\n Why should I?\n Hey, Naruto!\n How did you suddenly get here,lruka Sensei?\n The question is what are you doing herewhen you should be in class 

In [12]:
sentences = sent_tokenize(sample_script)
sentences

['A long time ago, a powerful demon foxappeared with nine tails.',
 'With its powerful tails,\n it could smash mountainsand create tidal waves.',
 'A band of Ninjas rose todefend their village from attack.',
 'We have to wait untilthe Fourth Hokage gets here!',
 "We can't let it get any closerto our village!",
 'One great Ninja was able toimprison the monster,\n but died in the process.',
 'This Ninja was known as…the Fourth Hokage.',
 'Naruto!',
 'Why did you do such a thing?!',
 "You're really gonna get it this time!",
 "I don't care!",
 'You know your problem?',
 "You can't do the things I do!",
 'Only I can do this!',
 "I'm better than all of you!Believe it!",
 "There's a problem, sir!",
 'Lord Hokage!',
 'What is it?',
 'Did that Naruto do something again?',
 'Yes.',
 'He climbed ontothe Mountainside Images…\n And he vandalized andgraffitied all over them!',
 'Wait!',
 'Ha ha…\n Why should I?',
 'Hey, Naruto!',
 'How did you suddenly get here,lruka Sensei?',
 'The question is what

In [13]:
sentences = sentences[60:90]

In [14]:
sentences = ".".join(sentences)

In [15]:
sentences

"Especially the Fourth Hokagewas a hero\n who saved the village fromthe nine-tail demon fox..Then why did you do that?.Because I'll become a Hokage myself..And I'll be the greatest Hokageof all time!.So that everyone will finallylearn to accept me!.By the way, Sensei,I have a favor to ask..You want another bowl?.Mmmm…No…\n Can I borrow that Leaf headbandfor a while?.This?.No no!.This is worn only by those whohave graduated from Ninja Academy..Tomorrow, you will…\n You're so mean!.So that's why you took offyour goggles…\n Humph...One more bowl please!.We are now about to beginthe graduation test..When your name is called,proceed to the next classroom..The test is on the Clone Jutsu..Oh no…\n Of all the…!.That is my weakest Jutsu!.But still…I will do it no matter what!.Clone Jutsu!.Disqualified!.Iruka Sensei..His physical coordinationand stamina are excellent..And he managed to come upwith something..Isn't that enoughfor him to pass?.Mizuki Sensei...All the others created three or more c

## **RunModel**

In [16]:
doc = nlp_model(sentences)

  with torch.cuda.amp.autocast(self._mixed_precision):


In [17]:
doc.ents

(Fourth,
 nine,
 Leaf,
 Ninja Academy,
 Tomorrow,
 Humph,
 One,
 the Clone Jutsu..,
 three or more,
 Naruto,
 Ninja)

In [18]:
for entity in doc.ents:
    print(entity, entity.label_)

Fourth ORDINAL
nine CARDINAL
Leaf PERSON
Ninja Academy ORG
Tomorrow DATE
Humph PERSON
One CARDINAL
the Clone Jutsu.. PRODUCT
three or more CARDINAL
Naruto PERSON
Ninja NORP


In [19]:
def get_ners_inference(script):
    script_sentences = sent_tokenize(script)

    ner_output = []

    for sentence in script_sentences:
        doc = nlp_model(sentence)
        ners = set()
        for entity in doc.ents:
            if entity.label_ == "PERSON":

                full_name = entity.text
                first_name = entity.text.split(" ")[0]
                firts_name = first_name.strip()
                ners.add(firts_name)
        ner_output.append(ners)
    
    return ner_output

In [20]:
df = df.head()

In [21]:
df

Unnamed: 0,episode,script
0,1,"A long time ago, a powerful demon foxappeared ..."
1,2,"C'mon!\n Running like a fugitive,\n Being chas..."
2,3,"C'mon!\n Running like a fugitive,\n Being chas..."
3,4,"C'mon!\n Running like a fugitive,\n Being chas..."
4,5,"C'mon!\n Running like a fugitive,\n Being chas..."


In [22]:
df['ners'] = df['script'].apply(get_ners_inference)

  with torch.cuda.amp.autocast(self._mixed_precision):


In [23]:
df

Unnamed: 0,episode,script,ners
0,1,"A long time ago, a powerful demon foxappeared ...","[{}, {}, {}, {}, {}, {}, {Ninja}, {Naruto}, {}..."
1,2,"C'mon!\n Running like a fugitive,\n Being chas...","[{}, {}, {}, {}, {}, {}, {}, {}, {Konohamaru},..."
2,3,"C'mon!\n Running like a fugitive,\n Being chas...","[{}, {}, {}, {Sasuke, Sakura}, {}, {Konohamaru..."
3,4,"C'mon!\n Running like a fugitive,\n Being chas...","[{}, {}, {}, {Naruto}, {}, {}, {}, {}, {Naruto..."
4,5,"C'mon!\n Running like a fugitive,\n Being chas...","[{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {..."


## Character Network

In [24]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from pyvis.network import Network

In [25]:
def generate_character_network(df):

    window=10
    entity_relationship = []


    for row in df['ners']:
        previous_entities_in_window = []


        for sentence in row:
            previous_entities_in_window.append(list(sentence))
            previous_entities_in_window = previous_entities_in_window[-window:]

            # Flatten 2D List into 1D List
            previous_entities_flattened = sum(previous_entities_in_window,[])


            for entity in sentence:
                for entity_in_window in previous_entities_flattened:
                    if entity != entity_in_window:
                        entity_relationship.append(sorted([entity, entity_in_window]))


    relationship_df = pd.DataFrame({'value': entity_relationship})
    relationship_df['source'] = relationship_df['value'].apply(lambda x: x[0])
    relationship_df['target'] = relationship_df['value'].apply(lambda x: x[1])
    relationship_df = relationship_df.groupby(['source', 'target']).count().reset_index()
    relationship_df = relationship_df.sort_values('value', ascending=False)

    return relationship_df





In [26]:
relationship_df = generate_character_network(df)

In [27]:
relationship_df

Unnamed: 0,source,target,value
79,Naruto,Sasuke,64
93,Sakura,Sasuke,60
44,Iruka,Naruto,43
78,Naruto,Sakura,30
73,Mizuki,Naruto,24
...,...,...,...
56,Jonin,Sasuke,1
57,Jonin,jonin,1
62,Kakashi,Sasuke.,1
66,Kiba,Sakura,1


In [28]:
relationship_df  = relationship_df.sort_values('value', ascending=False)
relationship_df = relationship_df.head(200)

In [32]:
G = nx.from_pandas_edgelist(
    relationship_df, 
    source = 'source', 
    target = 'target', 
    edge_attr='value',
    create_using=nx.Graph())

net = Network(notebook=True, height='700px', width='1000px', bgcolor='#222222',cdn_resources='remote', font_color='white')

node_degree=dict(G.degree)

nx.set_node_attributes(G, node_degree, 'size')
net.from_nx(G)
net.show('naruto.html')              
