In [2]:
import spacy
from nltk import sent_tokenize


In [3]:
def load_model():
    nlp = spacy.load('en_core_web_trf')
    return nlp

In [4]:
nlp_model = load_model()

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
import os
import sys
import pathlib
folder_path = pathlib.Path().parent.resolve()
sys.path.append(os.path.join(folder_path,'../'))
from utils import load_subtitle


In [6]:
dataset_path = '../Data/Subtitles'
df = load_subtitle(dataset_path)

In [7]:
df.head(5)

Unnamed: 0,episodes,script
0,1,"A long time ago, a powerful demon foxappeared ..."
1,2,"C'mon!\n Running like a fugitive,\n Being chas..."
2,3,"C'mon!\n Running like a fugitive,\n Being chas..."
3,4,"C'mon!\n Running like a fugitive,\n Being chas..."
4,5,"C'mon!\n Running like a fugitive,\n Being chas..."


In [8]:
sample = df.iloc[0]['script']

In [9]:
sentences = sent_tokenize(sample)

In [10]:
sentences

['A long time ago, a powerful demon foxappeared with nine tails.',
 'With its powerful tails,\n it could smash mountainsand create tidal waves.',
 'A band of Ninjas rose todefend their village from attack.',
 'We have to wait untilthe Fourth Hokage gets here!',
 "We can't let it get any closerto our village!",
 'One great Ninja was able toimprison the monster,\n but died in the process.',
 'This Ninja was known as…the Fourth Hokage.',
 'Naruto!',
 'Why did you do such a thing?!',
 "You're really gonna get it this time!",
 "I don't care!",
 'You know your problem?',
 "You can't do the things I do!",
 'Only I can do this!',
 "I'm better than all of you!Believe it!",
 "There's a problem, sir!",
 'Lord Hokage!',
 'What is it?',
 'Did that Naruto do something again?',
 'Yes.',
 'He climbed ontothe Mountainside Images…\n And he vandalized andgraffitied all over them!',
 'Wait!',
 'Ha ha…\n Why should I?',
 'Hey, Naruto!',
 'How did you suddenly get here,lruka Sensei?',
 'The question is what

In [11]:
sentences = sentences[0:30]

In [12]:
sentence  = '.'.join(sentences)

In [13]:
sentence

"A long time ago, a powerful demon foxappeared with nine tails..With its powerful tails,\n it could smash mountainsand create tidal waves..A band of Ninjas rose todefend their village from attack..We have to wait untilthe Fourth Hokage gets here!.We can't let it get any closerto our village!.One great Ninja was able toimprison the monster,\n but died in the process..This Ninja was known as…the Fourth Hokage..Naruto!.Why did you do such a thing?!.You're really gonna get it this time!.I don't care!.You know your problem?.You can't do the things I do!.Only I can do this!.I'm better than all of you!Believe it!.There's a problem, sir!.Lord Hokage!.What is it?.Did that Naruto do something again?.Yes..He climbed ontothe Mountainside Images…\n And he vandalized andgraffitied all over them!.Wait!.Ha ha…\n Why should I?.Hey, Naruto!.How did you suddenly get here,lruka Sensei?.The question is what are you doing herewhen you should be in class now?.Now listen, Naruto..You failed the last graduatio

In [14]:
doc = nlp_model(sentence)

In [15]:
doc.ents

(nine, Ninjas, Ninja, Ninja, Fourth, Naruto, lruka Sensei?.The, Naruto)

In [16]:
for entitiy in doc.ents:
    print(entitiy,entitiy.text)

nine nine
Ninjas Ninjas
Ninja Ninja
Ninja Ninja
Fourth Fourth
Naruto Naruto
lruka Sensei?.The lruka Sensei?.The
Naruto Naruto


In [17]:
def get_inference(script):
    script_sentences = sent_tokenize(script)

    output = []

    for sentence in script_sentences:
        doc = nlp_model(sentence)
        ners = set()
        for entity in doc.ents:
            
            if entity.label_ == "PERSON":
                
                fullname =entity.text
                first_name = entity.text.split(" ")[0]
                first_name = first_name.strip()
                ners.add(first_name)           
                
        output.append(ners)

    return output

In [18]:
df = df.head(10)
df

Unnamed: 0,episodes,script
0,1,"A long time ago, a powerful demon foxappeared ..."
1,2,"C'mon!\n Running like a fugitive,\n Being chas..."
2,3,"C'mon!\n Running like a fugitive,\n Being chas..."
3,4,"C'mon!\n Running like a fugitive,\n Being chas..."
4,5,"C'mon!\n Running like a fugitive,\n Being chas..."
5,6,"C'mon!\n Running like a fugitive,\n Being chas..."
6,7,"C'mon!\n Running like a fugitive,\n Being chas..."
7,8,"C'mon!\n Running like a fugitive,\n Being chas..."
8,9,"C'mon!\n Running like a fugitive,\n Being chas..."
9,12,"C'mon!\n Running like a fugitive,\n Being chas..."


In [19]:
df['ners'] = df['script'].apply(get_inference)

In [20]:
df

Unnamed: 0,episodes,script,ners
0,1,"A long time ago, a powerful demon foxappeared ...","[{}, {}, {}, {}, {}, {}, {Ninja}, {Naruto}, {}..."
1,2,"C'mon!\n Running like a fugitive,\n Being chas...","[{}, {}, {}, {}, {}, {}, {}, {}, {Konohamaru},..."
2,3,"C'mon!\n Running like a fugitive,\n Being chas...","[{}, {}, {}, {Sakura, Sasuke}, {}, {Konohamaru..."
3,4,"C'mon!\n Running like a fugitive,\n Being chas...","[{}, {}, {}, {Naruto}, {}, {}, {Iruka}, {}, {N..."
4,5,"C'mon!\n Running like a fugitive,\n Being chas...","[{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {..."
5,6,"C'mon!\n Running like a fugitive,\n Being chas...","[{}, {}, {Sasuke}, {}, {Naruto}, {}, {Naruto},..."
6,7,"C'mon!\n Running like a fugitive,\n Being chas...","[{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {..."
7,8,"C'mon!\n Running like a fugitive,\n Being chas...","[{}, {}, {}, {}, {}, {}, {}, {Sasuke}, {}, {},..."
8,9,"C'mon!\n Running like a fugitive,\n Being chas...","[{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {..."
9,12,"C'mon!\n Running like a fugitive,\n Being chas...","[{}, {}, {}, {}, {Zabuza}, {}, {}, {}, {Naruto..."


In [27]:
import pandas as pd 
import matplotlib.pyplot as plt
import networkx as nx
from pyvis.network import Network



In [30]:
def generate_character_network(df):

    windows = 10
    entity_relationship = []

    for row in df['ners']:
        previous_entity_in_window = []
        for sentence in row:
            previous_entity_in_window.append(list(sentence))
            previous_entity_in_window = previous_entity_in_window[-windows:]
            
            previous_entity_flatten = sum(previous_entity_in_window,[])


            for entity in sentence:
                for entity_in_window in previous_entity_flatten:
                    if entity != entity_in_window:
                        entity_relationship.append(sorted([entity,entity_in_window]))


    relationship_df = pd.DataFrame({'value':entity_relationship})
    relationship_df['source'] = relationship_df['value'].apply(lambda x :x[0])
    relationship_df['target'] = relationship_df['value'].apply(lambda x :x[1])
    relationship_df = relationship_df.groupby(['source','target']).count().reset_index()
    relationship_df = relationship_df.sort_values('value',ascending= False)

    return relationship_df


In [31]:
relation_df = generate_character_network(df)

In [32]:
relation_df

Unnamed: 0,source,target,value
183,Naruto,Sasuke,114
228,Sakura,Sasuke,68
95,Iruka,Naruto,45
182,Naruto,Sakura,36
137,Kakashi,Sasuke,26
...,...,...,...
269,Sharingan,tori,1
17,Demon,Jutsuthat,1
18,Demon,Kakashi,1
14,Chunin,Tazuna,1


In [35]:
relation_df =relation_df.sort_values('value',ascending= False)
relation_df = relation_df.head(200)

In [36]:
G = nx.from_pandas_edgelist(
    relation_df,
    source = 'source',
    target = 'target',
    edge_attr= 'value',
    create_using= nx.Graph()
)

net = Network(notebook= True,width= "1000px",height="700px",bgcolor="#222222",font_color="white"
              ,cdn_resources="remote")

node_degree = dict(G.degree)

nx.set_node_attributes(G,node_degree,'size')
net.from_nx(G)
net.show("Naruto.html")

Naruto.html
