## Death in Fiction

### Imports and Setup

In [29]:
import os
import pandas as pd
import re
import warnings

from pyvis.network import Network

warnings.filterwarnings('ignore')

In [30]:
kills = pd.DataFrame(columns=['universe', 'culprit', 'count', 'victims'])
deaths = pd.DataFrame(columns=['universe', 'victim', 'culprits', 'description', 'notes'])
edges = pd.DataFrame(columns=['universe', 'culprit', 'victim', 'description', 'notes'])

### Collection

In [31]:
for path, subdirs, files in os.walk('Universes'):
    for name in files:
        file = path + '\\' + name
        universe = file.split('\\')[1::2][0]
        
        tsv = pd.read_csv(file, sep='\t')

        for i in range(len(tsv)):
            victim = tsv['victim'][i]
            culprits = str(tsv['culprit(s)'][i]).split(' & ')
            description = tsv['description'][i]
            notes = tsv['notes'][i]

            if culprits == ['nan']:
                culprits = []

            for i in range(len(culprits)):
                row = [universe, culprits[i], 1, victim]
                kills.loc[len(kills)] = row

                row = [universe, culprits[i], victim, description, notes]
                edges.loc[len(edges)] = row
            
            if culprits == []:
                row = [universe, '', victim, description, notes]
                edges.loc[len(edges)] = row

            row = [universe, victim, culprits, description, notes]
            deaths.loc[len(deaths)] = row

### Organisation

In [32]:
kills = kills.groupby(by=['universe', 'culprit']).aggregate({'count':sum, 'victims':', '.join})
kills.reset_index(inplace=True)
print('----- the 10 Deadliest Killers  -----')
kills.drop('victims', axis=1).sort_values(by='count', ascending=False).head(10)

----- the 10 Deadliest Killers  -----


Unnamed: 0,universe,culprit,count
55,Dark Souls,Bearer of the Curse,98
54,Dark Souls,Ashen One,73
56,Dark Souls,Chosen Undead,49
111,Hollow Knight,The Knight,29
123,Hunter x Hunter,Genthru,25
82,Dragon's Dogma,The Arisen,25
6,Bloodstained,Miriam,21
4,Blasphemous,Penitent One,19
221,Nasuverse,Gilgamesh,17
343,Zero Escape,Gentarou Hongou,15


In [33]:
print('----- Deadliest Killer per Universe -----')
kills.drop('victims', axis=1).sort_values(by='count', ascending=False).drop_duplicates('universe')

----- Deadliest Killer per Universe -----


Unnamed: 0,universe,culprit,count
55,Dark Souls,Bearer of the Curse,98
111,Hollow Knight,The Knight,29
123,Hunter x Hunter,Genthru,25
82,Dragon's Dogma,The Arisen,25
6,Bloodstained,Miriam,21
4,Blasphemous,Penitent One,19
221,Nasuverse,Gilgamesh,17
343,Zero Escape,Gentarou Hongou,15
329,Watch Dogs,Aiden Pearce,14
87,Golden Kamuy,Hyakunosuke Ogata,12


In [34]:
deaths_count = pd.DataFrame(deaths.groupby(by=['universe', 'victim']).size()).reset_index()
deaths_count.columns = ['universe', 'character', 'deaths']

kills_count = pd.DataFrame(kills.groupby(by=['universe', 'culprit']).sum('count')).reset_index()
kills_count.columns = ['universe', 'character', 'kills']

summary = pd.merge(deaths_count, kills_count, left_on=['universe', 'character'], right_on=['universe', 'character'], how='outer')
summary.fillna(0, inplace=True)
summary.sort_values(by=['universe', 'character'], inplace=True)

summary['ratio'] = summary['kills'] / summary['deaths']
summary.loc[summary['deaths'] == 0, 'ratio'] = 0

summary['score'] = 0

for i in range(len(kills)):
    universe = kills.iloc[i]['universe']
    culprit = kills.iloc[i]['culprit']
    count = kills.iloc[i]['count']
    victims = kills.iloc[i]['victims'].split(', ')

    summary.loc[(summary['universe'] == universe) & (summary['character'] == culprit), 'score'] += count
    victims = list(set(victims))

    for victim in victims:
        if victim != '' and victim != culprit:
            try:
                score = summary.loc[(summary['universe'] == universe) & (summary['character'] == victim), 'kills'].values[0]
            except IndexError:
                score = 0
            summary.loc[(summary['universe'] == universe) & (summary['character'] == culprit), 'score'] += score

summary['transitivity'] = summary['score'] - summary['kills']
print("----- the 10 Deadliest 'Killer Killers' -----")
summary.sort_values(by='transitivity', ascending=False).head(10)

----- the 10 Deadliest 'Killer Killers' -----


Unnamed: 0,universe,character,deaths,kills,ratio,score,transitivity
330,Dark Souls,Seath,1.0,1.0,1.0,50,49.0
865,Nasuverse,Artoria Pendragon,4.0,11.0,2.75,39,28.0
1184,Blasphemous,Crisanta,0.0,2.0,0.0,28,26.0
913,Nasuverse,Sakura Matou,1.0,8.0,8.0,34,26.0
878,Nasuverse,EMIYA,3.0,2.0,0.666667,22,20.0
896,Nasuverse,Kiritsugu Emiya,1.0,6.0,6.0,24,18.0
919,Nasuverse,Shirou Emiya,1.0,4.0,4.0,21,17.0
1179,Zero Escape,Light Field,1.0,1.0,1.0,16,15.0
1276,Tokyo Ghoul,Ken Kaneki,0.0,4.0,0.0,18,14.0
1223,Hunter x Hunter,Chrollo Lucilfer,0.0,7.0,0.0,20,13.0


In [35]:
print('----- Deadliest Universes -----')
pd.DataFrame(deaths.groupby(by=['universe']).size().sort_values(ascending=False)).reset_index().rename(columns={0:'deaths'})

----- Deadliest Universes -----


Unnamed: 0,universe,deaths
0,Dark Souls,255
1,Hunter x Hunter,144
2,Tokyo Ghoul,140
3,Jojo's Bizarre Adventure,109
4,Nasuverse,100
5,Golden Kamuy,77
6,Danganronpa,62
7,Code Geass,47
8,Hollow Knight,46
9,Dragon's Dogma,45


In [36]:
deaths.to_csv('deaths.tsv', sep='\t', index=False)
kills.to_csv('kills.tsv', sep='\t', index=False)
summary.to_csv('summary.tsv', sep='\t', index=False)

### Visualisation (and Profiling)

In [37]:
edges['culprit'] += " [" + edges['universe'] + "]"
edges.loc[edges['culprit'].str.startswith(' ['), 'culprit'] = ''
edges['victim'] += " [" + edges['universe'] + "]"
edges['color'] = pd.Series()

for i in range(len(edges)):
    notes = str(edges.iloc[i]['notes']).split(', ')

    if 'dependent' in notes:
        dependent = True
    else:
        dependent = False
            
    if 'revived' in notes or 'non-canon' in notes:
        definitive = False
    else:
        definitive = True

    if not definitive:
        edges.iloc[i, 5] = '0_green'
    elif dependent and definitive:
        edges.iloc[i, 5] = '1_purple'
    if not dependent and definitive:
        edges.iloc[i, 5] = '2_crimson'

nodes = edges[['victim', 'color']]
nodes.sort_values(by=['victim', 'color'], inplace=True)
victims = nodes['victim'].values
culprits = edges['culprit'].values

for culprit in culprits:
    if not culprit in victims:
        data = {'victim': [culprit], 'color': ['0_green']}
        df = pd.DataFrame(data)
        nodes = pd.concat([nodes, df])

nodes.sort_values(by=['victim', 'color'], inplace=True)
nodes.drop_duplicates('victim', keep='last', inplace=True)
nodes['color'] = nodes['color'].str.split('_').str[1]

descriptions = deaths.groupby(by=['universe', 'victim']).aggregate({'description':'<br>'.join})
descriptions.reset_index(inplace=True)
descriptions['victim'] += " [" + descriptions['universe'] + "]"
descriptions.drop(columns=['universe'], inplace=True)

nodes = pd.merge(nodes, descriptions, left_on=['victim'], right_on=['victim'], how='outer')

In [38]:
profiles = pd.DataFrame(columns=['universe', 'character', 'profile'])

In [44]:
def make_network(universe):
    net = Network(height="100%", width="100%", bgcolor="#111111", directed=True)
    net.force_atlas_2based(overlap=1, damping=0.5)

    current_edges = edges[edges['universe'] == universe]
    current_nodes = nodes[nodes['victim'].str.endswith('[' + universe + ']')]

    sources, targets, description = current_edges['culprit'], current_edges['victim'], current_edges['description']
    edge_data = zip(sources, targets, description)

    for i in range(len(current_nodes)):
        victim = current_nodes.iloc[i]['victim']
        color = current_nodes.iloc[i]['color']
        label = victim.split(" [")[0]
        description = current_nodes.iloc[i]['description']
        title = label + "<br>Character Profile :" + "<br><br><br>"

        if description == description:
            value = description.count('<br>') + 1
            if value != 1:
                plural = 's'
            else:
                plural = ''
            
            title += str(value) + " Death" + plural + " :<br><br>" + description + "<br><br>"
        else:
            title += ''

        if ' (a.k.a. ' in label:
            label = label.split(' (a.k.a. ')[0] + '\n' + ' (a.k.a. ' + label.split(' (a.k.a. ')[1]

        if victim != '':
            font = '15px arial sans-serif ' + color
            net.add_node(victim, label=label, title=title, color=color, font=font)

    for src, dst, des in edge_data:
        if src != '':
            net.add_edge(src, dst, title=des, color='#e8ac87', width=1, dashes=True)

    neighbor_map = net.get_adj_list()

    for node in net.nodes:
        value = len(neighbor_map[node['id']])
        if value != 0:
            if value != 1:
                plural = 's'
            else:
                plural = ''
            node['title'] += str(value) + " Victim" + plural + " :<br><br>"
            node['title'] += "<br>".join(sorted(neighbor_map[node['id']]))
            node['title'] = re.sub(' \[.*?\]', '', node['title'])
        node['value'] = value
    
    net.inherit_edge_colors(False)
    net.set_edge_smooth('dynamic')

    net.save_graph('Networks/' + universe + '.html')

    for node in net.nodes:
        universe = node['id'].split(' [')[1].split(']')[0]
        character = node['id'].split(' [')[0]
        profile = node['title']
        row = [universe, character, profile]
        profiles.loc[len(profiles)] = row

In [45]:
for universe in kills['universe'].unique():
    make_network(universe)

In [41]:
print(profiles[profiles['character'] == 'Dio Brando']['profile'].values[0].replace('<br>', '\n'))

Dio Brando
Character Profile :


1 Death :

Exploded via his Stand's destruction caused by Jotaro Kujo

10 Victims :

Danny
Dario Brando
Dire
Enya the Hag
Father Styx
George Joestar I
Jonathan Joestar
Noriaki Kakyoin
Scotland Yard Inspector
Wilson Phillips


In [42]:
profiles.to_csv('profiles.tsv', sep='\t', index=False)