### Collecting Deaths from their Lists

In [None]:
import os
import pandas as pd
import re
import warnings

from collections import Counter
from pyvis.network import Network

warnings.filterwarnings('ignore')

In [None]:
kills = pd.DataFrame(columns=['universe', 'character', 'killed', 'victims'])
deaths = pd.DataFrame(columns=['universe', 'entry', 'character', 'responsible party', 'death', 'type'])
edges = pd.DataFrame(columns=['universe', 'killer', 'victim', 'direction', 'death', 'type'])

In [None]:
for path, subdirs, files in os.walk('Universes'):
	for name in files:
		if not name.endswith('.tsv'):
			continue

		file = os.path.join(path, name)
		parts = file.split('\\')

		universe = parts[1]
		entry = name.replace('.tsv', '')

		world = parts[2] if len(parts) > 3 else ''

		tsv = pd.read_csv(file, sep='\t')

		for i in range(len(tsv)):
			character = tsv['character'][i]
			if world:
				character = f"{character} ({world})"
			killer = str(tsv['killed by'][i]).split(' | ')
			description = tsv['death'][i]
			type_var = tsv['type'][i]

			if killer == ['nan']:
				killer = []

			if world:
				killer = [f"{k} ({world})" for k in killer if k]

			for i in range(len(killer)):
				row = [universe, killer[i], 1, character]
				kills.loc[len(kills)] = row

				row = [universe, killer[i], character, description, type_var]
				edges.loc[len(edges)] = row

			if killer == []:
				row = [universe, '', character, description, type_var]
				edges.loc[len(edges)] = row

			row = [universe, entry, character, killer, description, type_var]
			deaths.loc[len(deaths)] = row

### Replacing Groups with Character Names

In [None]:
groups = pd.read_csv('Database/group.tsv', sep='\t')
remove = False

rows = []
drops = []

for i in range(len(kills)):
	universe = kills['universe'][i]
	character = kills['character'][i]
	victim = kills['victims'][i]
	pool = groups[groups['universe'] == universe]
	pool.reset_index(inplace=True)

	for j in range(len(pool)):
		if character == pool['group'][j]:
			news = str(pool['characters'][j]).split(' | ')
			for new in news:
				rows.append({
					'universe': universe,
					'character': new,
					'killed': kills.loc[i, 'killed'],
					'caused': kills.loc[i, 'caused'],
					'responsible for': kills.loc[i, 'responsible for'],
					'victims': kills.loc[i, 'victims']
				})
			drops.append(i)

		if victim == pool['group'][j]:
			news = str(pool['characters'][j]).split(' | ')
			for new in news:
				rows.append({
					'universe': universe,
					'character': kills.loc[i, 'character'],
					'killed': kills.loc[i, 'killed'],
					'caused': kills.loc[i, 'caused'],
					'responsible for': kills.loc[i, 'responsible for'],
					'victims': new
				})
			drops.append(i)

kills.drop(index=drops, inplace=True)
kills = pd.concat([kills, pd.DataFrame(rows)], ignore_index=True)


for i in range(len(deaths)):
	universe = deaths['universe'][i]
	character = deaths['character'][i]
	party = deaths['responsible party'][i]
	pool = groups[groups['universe'] == universe]
	pool.reset_index(inplace=True)

	changes = []

	for j in range(len(pool)):
		if character == pool['group'][j]:
			new = str(pool['characters'][j]).split(' | ')
			deaths.at[i, 'character'] = new

	for member in party:
		remove = False
		for j in range(len(pool)):
			if member == pool['group'][j]:
				news = str(pool['characters'][j]).split(' | ')
				changes.extend(news)
				remove = True
		if not remove:
			changes.append(member)

	deaths.at[i, 'responsible party'] = changes


rows = []
drops = []

for i in range(len(edges)):
	universe = edges['universe'][i]
	killer = edges['killer'][i]
	victim = edges['victim'][i]
	pool = groups[groups['universe'] == universe]
	pool.reset_index(inplace=True)

	for j in range(len(pool)):
		if killer == pool['group'][j]:
			news = str(pool['characters'][j]).split(' | ')
			for new in news:
				rows.append({
					'universe': universe,
					'killer': new,
					'victim': edges.loc[i, 'victim'],
					'direction': edges.loc[i, 'direction'],
					'death': edges.loc[i, 'death'],
					'type': edges.loc[i, 'type']
				})
			drops.append(i)

		if victim == pool['group'][j]:
			news = str(pool['characters'][j]).split(' | ')
			for new in news:
				rows.append({
					'universe': universe,
					'killer': edges.loc[i, 'killer'],
					'victim': new,
					'direction': edges.loc[i, 'direction'],
					'death': edges.loc[i, 'death'],
					'type': edges.loc[i, 'type']
				})
			drops.append(i)

edges.drop(index=drops, inplace=True)
edges = pd.concat([edges, pd.DataFrame(rows)], ignore_index=True)

kills.reset_index(inplace=True)
edges.reset_index(inplace=True)

### Checking for Character Variants

In [None]:
variants = pd.read_csv('Database/variant.tsv', sep='\t')
worlds = pd.read_csv('Database/world.tsv', sep='\t')


variant_lookup = {}
for _, row in variants.iterrows():
	base = row['character']
	universe = row['universe']
	all_variants = [base] + [v.strip() for v in row['variants'].split(' / ')]
	for variant in all_variants:
		variant_lookup.setdefault((universe, variant), set()).add(base)


def world_variants(df):
	for _, row in df.iterrows():
		character = row['character']
		universe = row['universe']

		match = re.match(r'^(?P<base>.+?)\s*\((?P<world>.+?)\)$', character)
		if match:
			base = match.group('base').strip()
			world = match.group('world').strip()

			if ((worlds['universe'] == universe) & (worlds['world'] == world)).any():
				variant_lookup.setdefault((universe, character), set()).add(base)
				variant_lookup.setdefault((universe, base), set()).add(base)


world_variants(kills)
world_variants(deaths)

variant_lookup = {k: list(v) for k, v in variant_lookup.items()}

def inject_variants(df):
	expanded = []
	for _, row in df.iterrows():
		expanded.append(row.to_dict())

		character = row['character']
		universe = row['universe']

		if (universe, character) in variant_lookup:
			for base in variant_lookup[(universe, character)]:
				agg = row.to_dict()
				agg['character'] = 'Variants of ' + base
				expanded.append(agg)

		if ("*", character) in variant_lookup:
			for base in variant_lookup[("*", character)]:
				agg = row.to_dict()
				agg['universe'] = '*'
				agg['character'] = 'Variants of ' + base
				expanded.append(agg)

	return pd.DataFrame(expanded)


kills = inject_variants(kills)
deaths = inject_variants(deaths)

### Replacing Character Names with Aliases

In [None]:
aliases = pd.read_csv('Database/alias.tsv', sep='\t')
aliases['listed'] = aliases['character'].str.split(' / ')


for i in range(len(kills)):
	universe = kills['universe'][i]
	character = kills['character'][i]
	victim = kills['victims'][i]
	pool = aliases[aliases['universe'] == universe]
	pool.reset_index(inplace=True)

	for j in range(len(pool)):
		grouped = pool['listed'][j]
		if character in grouped:
			new = pool['character'][j]
			kills.at[i, 'character'] = new
		if victim in grouped:
			new = pool['character'][j]
			kills.at[i, 'victims'] = new


for i in range(len(deaths)):
	universe = deaths['universe'][i]
	character = deaths['character'][i]
	party = deaths['responsible party'][i]
	pool = aliases[aliases['universe'] == universe]
	pool.reset_index(inplace=True)

	for j in range(len(pool)):
		grouped = pool['listed'][j]
		if character in grouped:
			new = pool['character'][j]
			deaths.at[i, 'character'] = new

		changes = dict()
		for member in party:
			if member in grouped:
				new = pool['character'][j]
				changes[member] = new
		if changes:
			party = [changes.get(member, member) for member in party]
			deaths.at[i, 'responsible party'] = party


for i in range(len(edges)):
	universe = edges['universe'][i]
	killer = edges['killer'][i]
	victim = edges['victim'][i]
	pool = aliases[aliases['universe'] == universe]
	pool.reset_index(inplace=True)

	for j in range(len(pool)):
		grouped = pool['listed'][j]
		if killer in grouped:
			new = pool['character'][j]
			edges.at[i, 'killer'] = new
		if victim in grouped:
			new = pool['character'][j]
			edges.at[i, 'victim'] = new

### Pinpointing the Causes and Manner of Deaths [WORK IN PROGRESS]

In [None]:
causes = {
	'Asphyxia' : ['asphyxiated', 'suffocated'],
	'Beating' : ['beaten', 'kicked', 'obliterated'],
	'Bisection' : ['bisected', 'bifurcated', 'in half', 'half severed'],
	'Blunt Force Trauma' : ['bludgeoned', 'hit on the head', 'hit in the head', 'bashed in', 'smashed in'],
	'Boiling' : ['boiled', 'fried'],
	'Burning' : ['burned', 'incinerated', 'immolated'],
	'Cancer' : ['cancer'],
	'Cardiac Arrest' : ['heart attack', 'cardiac arrest', 'heart failure'],
	'Cervical Fracture' : ['neck broken', 'vertebrae snapped', 'skull broken', 'skull cracked'],
	'Choking' : ['choked'],
	'Crucifixion' : ['crucified'],
	'Crushing' : ['crushed', 'stomped', 'trampled'],
	'Curse' : ['cursed'],
	'Decapitation' : ['beheaded', 'decapitated', 'head kicked off', 'head bitten off', 'head severed', 'head removed'],
	'Defenestration' : ['defenestrated', 'jumped out of a window'],
	'Dehydration' : ['dehydration', 'dehydrated'],
	'Disease' : ['illness', 'disease', 'fever', 'cancer', 'pneumonia'],
	'Disintegration' : ['disintegrated', 'vaporized'],
	'Dismemberment' : ['dismembered', 'torn apart', 'torn to pieces', 'ripped apart', 'amputated'],
	'Dissolution' : ['dissolved', 'melted'],
	'Drowning' : ['drowned'],
	'Electrocution' : ['electrocuted'],
	'Erasure' : ['erased', 'disappeared', 'vanished'],
	'Exorcism' : ['exorcised'],
	'Explosion' : ['exploded', 'explosion', 'detonate', 'detonated', 'blasted', 'blown up', 'blew up', 'self-destructed', 'bombardment'],
	'Exsanguination' : ['bled out', 'throat', 'neck slashed', 'neck sliced', 'skinned', 'drained'],
	'Falling' : ['thrown off', 'dropped from'],
	'Freezing' : ['frozen', 'froze'],
	'Hanging' : ['hanged'],
	'Immurement' : ['immured'],
	'Impalement' : ['impaled'],
	'Lethal Injection' : ['lethal injection'],
	'Organ Failure' : ['organs destroyed', 'organs burst'],
	'Petrification' : ['petrified', 'crumbled to dust'],
	'Poison' : ['poisoned', 'poison', 'toxic fumes'],
	'Predation' : ['devoured', 'absorbed', 'consumed', 'eaten', 'mauled'],
	'Premature Burial' : ['buried'],
	'Projectile Wound' : ['shot', 'mowed down', 'gunned down'],
	'Senescence' : ['old age'],
	'Stab Wound' : ['stabbed', 'slashed', 'sliced', 'cut down', 'punctured', 'seppuku', 'cleaved', 'chainsawed', 'butchered', 'clawed', 'gutted'],
	'Starvation' : ['starved'],
	'Strangulation' : ['strangled'],
	'Torture' : ['tortured'],
	'Traffic Collision' : ['ran over', 'car accident', 'car crash', 'by a car', 'by a truck', 'by a subway']
}


def assign_causes(description, causes):
	desc = description.lower()
	causes = [cause for cause, keywords in causes.items()
			if any(word in desc for word in keywords)]
	return causes if causes else []


deaths['causes'] = deaths['death'].apply(lambda x: assign_causes(str(x), causes))

In [None]:
# manner (aka Homicide, Suicide, Natural or Accidental)

### Analyzing and Organizing the Deaths

In [None]:
print('----- Deadliest Universes -----')
death_sum = deaths[[not x.startswith('Variants of ') for x in deaths['character']]]
pd.DataFrame(death_sum.groupby(by=['universe']).size().sort_values(ascending=False)).reset_index().rename(columns={0:'deaths'})

In [None]:
def join_with_counts(series):
	seen = {}
	result = []
	for v in series:
		if v not in seen:
			seen[v] = 1
			result.append(v)
		else:
			seen[v] += 1
	return ", ".join(
		f"{v} (×{seen[v]})" if seen[v] > 1 else v
		for v in result
	)

In [None]:
kills = kills.groupby(by=['universe', 'character']).aggregate({'killed':sum, 'caused':sum, 'responsible for':sum, 'victims':join_with_counts})
kills.reset_index(inplace=True)
print('----- Deadliest Killer per Universe -----')
kills.drop('victims', axis=1).sort_values(by='responsible for', ascending=False).drop_duplicates('universe').reset_index(drop=True)

In [None]:
deaths['character'] = deaths['character'].apply(lambda x: x if isinstance(x, list) else [x])
deaths_exploded = deaths.explode('character', ignore_index=True)
deaths_count = deaths_exploded.groupby(['universe', 'character']).size().reset_index(name='deaths')

kills_count = kills.copy()
kills_count.drop(['victims'], axis=1, inplace=True)
kills_count.columns = ['universe', 'character', 'killed', 'caused', 'responsible for']

summary = pd.merge(deaths_count, kills_count, left_on=['universe', 'character'], right_on=['universe', 'character'], how='outer')
summary.fillna(0, inplace=True)
summary.sort_values(by=['universe', 'character'], inplace=True)

summary = summary.round(decimals=2).astype(object)
summary = summary.astype(str)
summary = summary.replace(to_replace = "\.0+$",value = "", regex = True)

In [None]:
deaths['resulting status'] = ''

for i in range(len(deaths)):
	if deaths.loc[i, 'character'][0].startswith('Variants of '):
		variant = True
	else:
		variant = False

	type = str(deaths['type'][i]).split(' | ')

	conditions = ["determinant", "continuant"]
	if re.search('|'.join(conditions), str(type)):
		dependent = True
	else:
		dependent = False

	conditions = ["unavoidable"]
	if re.search('|'.join(conditions), str(type)):
		dependent = False

	conditions = ["revived", "reincarnated", "undone", "non-canon"]
	if re.search('|'.join(conditions), str(type)):
		alive = True
	else:
		alive = False

	conditions = ["physical", "brain-dead", "undead"]
	if re.search('|'.join(conditions), str(type)):
		extant = True
	else:
		extant = False

	if variant:
		deaths['resulting status'][i] = 'variants'
	elif alive:
		deaths['resulting status'][i] = 'alive'
	elif extant and not alive:
		deaths['resulting status'][i] = 'extant'
	elif dependent and not extant and not alive:
		deaths['resulting status'][i] = 'dependent'
	elif not dependent and not extant and not alive:
		deaths['resulting status'][i] = 'deceased'

In [None]:
priority = {"variants": 0, "deceased": 1, "extant": 2, "dependent": 3, "alive": 4}

status = deaths.copy()
status["character"] = status["character"].apply(lambda x: x if isinstance(x, list) else [x])
status = status.explode("character", ignore_index=True)

status["priority"] = status["resulting status"].map(priority)

status = status.loc[status.groupby(["universe", "character"])["priority"].idxmin()]
status = status.drop(columns=["priority", "responsible party", "death", "type"]).reset_index(drop=True)
status.rename(columns={"resulting status": "last known status"}, inplace=True)

summary = pd.merge(summary, status, on=["universe", "character"], how="outer")
summary["last known status"] = summary["last known status"].fillna("alive")

In [None]:
deaths.to_csv('deaths.tsv', sep='\t', index=False)
kills.to_csv('kills.tsv', sep='\t', index=False)
summary.to_csv('summary.tsv', sep='\t', index=False)

### Generating the README file

In [None]:
progress = """### Terminology for Character Statuses

- **deceased** - the character stayed dead.
- **alive** - the character did not die or did not stay dead.
- - **revived**      - this death was nullified when the character was brought back from the dead.
- - **reincarnated** - this death was nullified when the character was reborn with their memories.
- - **undone**       - this death was nullified via time travel.
- - **non-canon**    - this death happened, but is not considered canon when it comes to the character's status.
- **dependent** - the character may or may not be deceased depending on certain factors.
- - **determinant** - this death happening depends upon a player's choice.
- - **continuant**  - this death exists but not in every adaptation of a story, and the character's canonical status is unspecified.
- - **unavoidable** - this death happens differently depending upon a choice but always happens nonetheless.
- **extant** - the character is arguably deceased yet still remains due to specific circumstances.
- - **physical**   - this death concerns only a character's body, leaving their mind or soul alive.
- - **brain-dead** - this death left the character only physically alive, with no brain activity possible.
- - **undead**     - this death turned the character into an undead being such as a zombie or a ghost.
- **variants** - the 'character' refers here to a cumulative summary of all different versions of a character.


### Status of Incomplete Entries

- Dr. Stone - missing characters revived via petrification
- Fate Grand Order - up to and including the **Third Singularity - Okeanos**
- Gantz - up to and including the **Dinosaur Alien Mission Arc**
- Hunter × Hunter - up to and including the **13th Hunter Chairman Election Arc**
- Limbus Company - up to and including **Intervallo VI: Spring Cultivation**
- Re:Zero - up to and including **Arc 3 - Return to the Royal Capital**


### Summary of Universes and Entries

"""

with open('README.md', 'w+') as f:
	f.write(progress)

In [None]:
deaths_summary = deaths.copy()
deaths_summary['deaths'] = 1
deaths_summary.loc[[x[0].startswith('Variants of ') for x in deaths_summary['character']], 'deaths'] = 0
deaths_summary = deaths_summary[deaths_summary['universe'] != '*']
deaths_summary = deaths_summary.drop(["character", "responsible party", "death", "type", "resulting status"], axis=1)
deaths_summary = deaths_summary.groupby(["universe", "entry"]).aggregate({"deaths": sum})
deaths_summary = deaths_summary.reset_index()

final_rows = []

grand_total = deaths_summary["deaths"].sum()
final_rows.append({
	"universe": "**\***",
	"entry": "**\***",
	"deaths": f"**{grand_total}**"
})

for universe, group in deaths_summary.groupby("universe"):
	universe_total = group["deaths"].sum()

	final_rows.append({
		"universe": f"**{universe}**",
		"entry": "**\***",
		"deaths": f"**{universe_total}**"
	})

	final_rows.extend(group.to_dict(orient='records'))

final_table = pd.DataFrame(final_rows)
deaths_table = final_table.to_markdown(tablefmt='github', index=False)

with open('README.md', 'a+') as f:
	f.write(deaths_table)

### Visualizing Death Links using PyVis

In [None]:
status = summary.copy()
status.drop(columns=["deaths", "killed", "caused", "responsible for"], inplace=True)
status.rename(columns={"last known status": "color"}, inplace=True)

colors = {"variants" : "#f0f0f0",
		  "alive" : "#107a06",
		  "extant" : "#0990b5",
		  "dependent" : "#631da3",
		  "deceased" : "#730a1c"}

status["color"].replace(colors, inplace=True)
status["character"] += " [" + status["universe"] + "]"

In [None]:
edges['killer'] += " [" + edges['universe'] + "]"
edges.loc[edges['killer'].str.startswith(' ['), 'killer'] = ''
edges['victim'] += " [" + edges['universe'] + "]"

nodes = pd.DataFrame()
nodes['character'] = pd.concat([edges['victim'], edges['killer']]).drop_duplicates().sort_values().reset_index(drop=True)

nodes = pd.merge(nodes, status, left_on='character', right_on='character', how='outer')
nodes.sort_values(by=['character', 'color'], inplace=True)

deaths["character"] = deaths["character"].apply(lambda x: x if isinstance(x, list) else [x])
deaths = deaths.explode("character", ignore_index=True)

descriptions = deaths.groupby(by=['universe', 'character']).aggregate({'death':'\n'.join})
descriptions.reset_index(inplace=True)
descriptions['character'] += " [" + descriptions['universe'] + "]"
descriptions.drop(columns=['universe'], inplace=True)

nodes = pd.merge(nodes, descriptions, left_on=['character'], right_on=['character'], how='outer')

profiles = pd.DataFrame(columns=['universe', 'character', 'profile'])

In [None]:
def make_network(universe):
	if universe == '*':
		return

	net = Network(height="100vh", width="100%", bgcolor="#111111", directed=True, notebook=False)
	net.force_atlas_2based(overlap=1, damping=0.5)

	current_edges = edges[edges['universe'] == universe]
	current_nodes = nodes[nodes['character'].str.endswith('[' + universe + ']')]

	sources, targets, direction = current_edges['killer'], current_edges['victim'], current_edges['direction']
	description = current_edges['death']
	edge_data = zip(sources, targets, direction, description)

	for i in range(len(current_nodes)):
		victim = current_nodes.iloc[i]['character']
		color = current_nodes.iloc[i]['color']
		label = victim.split(" [")[0]
		description = current_nodes.iloc[i]['death']
		title = label + "\n\nCharacter Profile :" + "\n\n\n"

		if description == description:
			value = description.count('\n') + 1
			if value != 1:
				plural = 's'
			else:
				plural = ''

			title += str(value) + " Death" + plural + " :\n\n" + description + "\n\n"
		else:
			title += ''

		if victim != '':
			font = f"15px baskerville {color} sans-serif"
			net.add_node(victim, label=label, title=title, color=color, font=font)

	for src, dst, dir, des in edge_data:
		if src != '':
			if dir == 'direct':
				net.add_edge(src, dst, title=des, color='#e8ac87', width=1, dashes=False)
			elif dir == 'indirect':
				net.add_edge(src, dst, title=des, color='#cccccc', width=1, dashes=True)

	neighbor_map = net.get_adj_list()
	edges_map = net.get_edges()

	pair_counts = Counter((e['from'], e['to']) for e in edges_map)

	def format_neighbors_with_counts(node_id):
		raw = neighbor_map.get(node_id, [])
		seen = []
		counts = {}
		for nbr in raw:
			if nbr not in counts:
				counts[nbr] = pair_counts.get((node_id, nbr), 0)
				seen.append(nbr)
		formatted = []
		for nbr in seen:
			cnt = counts[nbr]
			if cnt > 1:
				formatted.append(f"{nbr} (×{cnt})")
			else:
				formatted.append(nbr)
		return "\n".join(sorted(formatted))

	for node in net.nodes:
		nid = node['id']
		count_unique = len(neighbor_map.get(nid, []))
		if count_unique:
			plural = 's' if count_unique != 1 else ''
			node['title'] += f"{count_unique} Victim{plural} :\n\n"
			node['title'] += format_neighbors_with_counts(nid)
			node['title'] = re.sub(r' \[.*?\]', '', node['title'])
		node['value'] = count_unique

	net.inherit_edge_colors(False)
	net.set_edge_smooth('dynamic')

	net.save_graph('Networks/' + universe + '.html')

	for node in net.nodes:
		universe = node['id'].split(' [')[1].split(']')[0]
		character = node['id'].split(' [')[0]
		profile = node['title']
		row = [universe, character, profile]
		profiles.loc[len(profiles)] = row

In [None]:
for universe in kills['universe'].unique():
	make_network(universe)