In [1]:
import os
import pandas as pd
import re
import warnings

from pyvis.network import Network

warnings.filterwarnings('ignore')

In [2]:
kills = pd.DataFrame(columns=['universe', 'character', 'killed', 'responsible for', 'total kills', 'victims'])
deaths = pd.DataFrame(columns=['universe', 'entry', 'character', 'responsible party', 'description', 'type'])
edges = pd.DataFrame(columns=['universe', 'killer', 'victim', 'direction', 'description', 'type'])

In [3]:
for path, subdirs, files in os.walk('Universes'):
    for name in files:
        file = path + '\\' + name
        universe = file.split('\\')[1]
        entry = file.split('\\')[2].replace('.tsv', '')

        tsv = pd.read_csv(file, sep='\t')

        for i in range(len(tsv)):
            character = tsv['character'][i]
            killer = str(tsv['killer'][i]).split(' | ')
            responsible_party = str(tsv['responsible party'][i]).split(' | ')
            description = tsv['description'][i]
            type = tsv['type'][i]

            if killer == ['nan']:
                killer = []
            if responsible_party == ['nan']:
                responsible_party = []

            killerparty = killer + responsible_party
            if killerparty == ['nan']:
                killerparty = []

            for i in range(len(killerparty)):
                if killerparty[i] in killer:
                    row = [universe, killerparty[i], 1, 0, 1, character]
                    kills.loc[len(kills)] = row
                elif killerparty[i] in responsible_party:
                    row = [universe, killerparty[i], 0, 1, 1, character]
                    kills.loc[len(kills)] = row

                if killerparty[i] in killer:
                    row = [universe, killerparty[i], character, 'direct', description, type]
                    edges.loc[len(edges)] = row
                elif killerparty[i] in responsible_party:
                    row = [universe, killerparty[i], character, 'indirect', description, type]
                    edges.loc[len(edges)] = row

            if killerparty == []:
                row = [universe, '', character, '', description, type]
                edges.loc[len(edges)] = row

            row = [universe, entry, character, killerparty, description, type]
            deaths.loc[len(deaths)] = row

In [None]:
print('----- Deadliest Universes -----')
pd.DataFrame(deaths.groupby(by=['universe']).size().sort_values(ascending=False)).reset_index().rename(columns={0:'deaths'})

In [None]:
kills = kills.groupby(by=['universe', 'character']).aggregate({'killed':sum, 'responsible for':sum, 'total kills':sum, 'victims':', '.join})
kills.reset_index(inplace=True)
print('----- Deadliest Killer per Universe -----')
kills.drop('victims', axis=1).sort_values(by='total kills', ascending=False).drop_duplicates('universe')

In [6]:
deaths_count = pd.DataFrame(deaths.groupby(by=['universe', 'character']).size()).reset_index()
deaths_count.columns = ['universe', 'character', 'deaths']

kills_count = kills.copy()
kills_count.drop(['victims'], axis=1, inplace=True)
kills_count.columns = ['universe', 'character', 'killed', 'responsible for', 'total kills']

summary = pd.merge(deaths_count, kills_count, left_on=['universe', 'character'], right_on=['universe', 'character'], how='outer')
summary.fillna(0, inplace=True)
summary.sort_values(by=['universe', 'character'], inplace=True)

summary = summary.round(decimals=2).astype(object)
summary = summary.astype(str)
summary = summary.replace(to_replace = "\.0+$",value = "", regex = True)

In [7]:
deaths['resulting status'] = ''

for i in range(len(deaths)):
	type = str(deaths['type'][i]).split(' | ')

	conditions = ["variable", "alternate universe"]
	if re.search('|'.join(conditions), str(type)):
		dependent = True
	else:
		dependent = False
	
	conditions = ["unavoidable"]
	if re.search('|'.join(conditions), str(type)):
		dependent = False

	conditions = ["temporary", "non-canon"]
	if re.search('|'.join(conditions), str(type)):
		alive = True
	else:
		alive = False
	
	conditions = ["physical"]
	if re.search('|'.join(conditions), str(type)):
		extant = True
	else:
		extant = False

	if alive:
		deaths['resulting status'][i] = 'alive'
	elif extant and not dependent and not alive:
		deaths['resulting status'][i] = 'extant'
	elif dependent and not extant and not alive:
		deaths['resulting status'][i] = 'dependent'
	if not dependent and not extant and not alive:
		deaths['resulting status'][i] = 'deceased'

In [8]:
priority = {"deceased": 1, "dependent": 2, "extant": 3, "alive": 4}

status = deaths.copy()
status["priority"] = status["resulting status"].map(priority)
status = status.loc[status.groupby(["universe", "character"])["priority"].idxmin()]
status = status.drop(columns=["priority", "responsible party", "description", "type"]).reset_index(drop=True)
status.rename(columns={"resulting status": "last known status"}, inplace=True)

summary = pd.merge(summary, status, left_on=['universe', 'character'], right_on=['universe', 'character'], how='outer')
summary["last known status"] = summary["last known status"].fillna("alive")

In [9]:
deaths.to_csv('deaths.tsv', sep='\t', index=False)
kills.to_csv('kills.tsv', sep='\t', index=False)
summary.to_csv('summary.tsv', sep='\t', index=False)

### Generating a Summary of Universes

In [10]:
deaths_summary = deaths.drop(["character", "responsible party", "description", "type", "resulting status"], axis=1)
deaths_summary['deaths'] = 1
deaths_summary = deaths_summary.groupby(["universe", "entry"]).aggregate({"deaths":sum})
deaths_summary.reset_index(inplace=True)
deaths_table = deaths_summary.to_markdown(tablefmt='github', index=False)

with open('README.md', 'r+') as f:
	f.seek(0)
	f.write(deaths_table)
	f.truncate()

### Visualizing Death Links using PyVis

In [11]:
status = summary.copy()
status.drop(columns=["deaths", "killed", "responsible for", "total kills"], inplace=True)
status.rename(columns={"last known status": "color"}, inplace=True)

colors = {"alive" : "#107a06",
		  "extant" : "#0990b5",
		  "dependent" : "#631da3",
		  "deceased" : "#730a1c"}

status["color"].replace(colors, inplace=True)
status["character"] += " [" + status["universe"] + "]"

In [12]:
edges['killer'] += " [" + edges['universe'] + "]"
edges.loc[edges['killer'].str.startswith(' ['), 'killer'] = ''
edges['victim'] += " [" + edges['universe'] + "]"

nodes = pd.DataFrame()
nodes['character'] = pd.concat([edges['victim'], edges['killer']]).drop_duplicates().sort_values().reset_index(drop=True)

nodes = pd.merge(nodes, status, left_on='character', right_on='character', how='outer')
nodes.sort_values(by=['character', 'color'], inplace=True)

descriptions = deaths.groupby(by=['universe', 'character']).aggregate({'description':'\n'.join})
descriptions.reset_index(inplace=True)
descriptions['character'] += " [" + descriptions['universe'] + "]"
descriptions.drop(columns=['universe'], inplace=True)

nodes = pd.merge(nodes, descriptions, left_on=['character'], right_on=['character'], how='outer')

profiles = pd.DataFrame(columns=['universe', 'character', 'profile'])

In [13]:
def make_network(universe):
    net = Network(height="100vh", width="100%", bgcolor="#111111", directed=True, notebook=False)
    net.force_atlas_2based(overlap=1, damping=0.5)

    current_edges = edges[edges['universe'] == universe]
    current_nodes = nodes[nodes['character'].str.endswith('[' + universe + ']')]

    sources, targets, direction = current_edges['killer'], current_edges['victim'], current_edges['direction']
    description = current_edges['description']
    edge_data = zip(sources, targets, direction, description)

    for i in range(len(current_nodes)):
        victim = current_nodes.iloc[i]['character']
        color = current_nodes.iloc[i]['color']
        label = victim.split(" [")[0]
        description = current_nodes.iloc[i]['description']
        title = label + "\n\nCharacter Profile :" + "\n\n\n"

        if description == description:
            value = description.count('\n') + 1
            if value != 1:
                plural = 's'
            else:
                plural = ''

            title += str(value) + " Death" + plural + " :\n\n" + description + "\n\n"
        else:
            title += ''

        if victim != '':
            font = f"15px baskerville {color} sans-serif"
            net.add_node(victim, label=label, title=title, color=color, font=font)

    for src, dst, dir, des in edge_data:
        if src != '':
            if dir == 'direct':
                net.add_edge(src, dst, title=des, color='#e8ac87', width=1, dashes=False)
            elif dir == 'indirect':
                net.add_edge(src, dst, title=des, color='#cccccc', width=1, dashes=True)

    neighbor_map = net.get_adj_list()

    for node in net.nodes:
        value = len(neighbor_map[node['id']])
        if value != 0:
            if value != 1:
                plural = 's'
            else:
                plural = ''
            node['title'] += str(value) + " Victim" + plural + " :\n\n"
            node['title'] += "\n".join(sorted(neighbor_map[node['id']]))
            node['title'] = re.sub(' \[.*?\]', '', node['title'])
        node['value'] = value
    
    net.inherit_edge_colors(False)
    net.set_edge_smooth('dynamic')

    net.save_graph('Networks/' + universe + '.html')

    for node in net.nodes:
        universe = node['id'].split(' [')[1].split(']')[0]
        character = node['id'].split(' [')[0]
        profile = node['title']
        row = [universe, character, profile]
        profiles.loc[len(profiles)] = row

In [14]:
for universe in kills['universe'].unique():
    make_network(universe)