In [239]:
import pandas as pd
import numpy as np
from functools import partial
import re

df = pd.read_excel('node-edges.xlsx', header=None, )
df.columns = df.iloc[61]
df = df.iloc[0:30]

count = 0
for i in range(0, len(df.columns)):
    if (i % 2 != 0):
        df.columns.values[i] = ("%02d" % count) + ': Destination'
        count += 1
        
slices = []
for i in range(2, len(df.columns), 2):
    s = df.iloc[:, i-2:i]
    s = s.dropna()
    s = s[s.iloc[:,1] != '-'] # edges are marked at entrance AND destination, so missing values can be removed
    slices.append(s)
    
slices_dict = {s.iloc[:,0].name : s for s in slices}

def extract_prefix(index, row):
    return " ".join(row[0].split(" ", index)[:index])

def extract_prefix_aqua_hideout(row):
    if 'Warps' in row[0] or 'B1F' in row[0]:
        return extract_prefix(4, row)
    else:
        return extract_prefix(3, row)
    
def extract_prefix_abandoned_ship(row):
    if 'B1F' in row[0] or 'R2' in row[0]:
        return extract_prefix(3, row)
    else:
        return extract_prefix(4, row)
    
def extract_prefix_meteor_falls(row):
    if 'B1F' in row[0] or 'Rooms' in row[0]:
        return extract_prefix(4, row)
    else:
        return extract_prefix(3, row)

def split_sub_nodes(node, extract_prefix):
    sub_nodes = []
    # current_group = []
    current_prefix = ""
    start = 0
    end = 0
    for row in node.iloc:
        name = extract_prefix(row)
        if current_prefix == name:
            end += 1
        else:
            sub_nodes.append(node.iloc[start:end])
            start = end
            end += 1
            current_prefix = name
    sub_nodes.append(node.iloc[start:end])
    # The first iteration adds an empty dataframe. This removes it.
    return sub_nodes[1:]


def process_sub_nodes(key_str, extract_prefix, slices_dict):
    sub_nodes = split_sub_nodes(slices_dict.pop(key_str), extract_prefix)
    key_num, key_name = key_str.split(':')
    
    for i, node in enumerate(sub_nodes):
        # This regex matches anything after
        new_key = re.sub('(?<=: ).*', extract_prefix(node.iloc[0]), key_str)
        node = node.rename({key_str: new_key}, axis='columns')
        slices_dict[new_key] = node

process_sub_nodes('00: Places of Interest', partial(extract_prefix, 2), slices_dict)
process_sub_nodes('12: Granite Cave', partial(extract_prefix, 3), slices_dict) 
process_sub_nodes('13: Abandoned Ship', extract_prefix_abandoned_ship, slices_dict)
process_sub_nodes('15: Meteor Falls', extract_prefix_meteor_falls, slices_dict)  
process_sub_nodes('18: Mt Pyre', partial(extract_prefix, 3), slices_dict) 
process_sub_nodes('20: Magma Hideout', partial(extract_prefix, 3), slices_dict)  
process_sub_nodes('21: Aqua Hideout', extract_prefix_aqua_hideout, slices_dict)
process_sub_nodes('24: Seafloor Cavern', partial(extract_prefix, 3), slices_dict) 
process_sub_nodes('25: Pokecenters', partial(extract_prefix, 2), slices_dict)
process_sub_nodes('26: Victory Road', partial(extract_prefix, 3), slices_dict) 
process_sub_nodes('28: E4 Pokecenter', partial(extract_prefix, 3), slices_dict) 
process_sub_nodes('31: Sky Pillar', partial(extract_prefix, 3), slices_dict) 

slices = slices_dict.values()

nodes = {}
edges = {}
for s in slices:
    node_name = s.iloc[:,0].name
    node_name = str.strip(node_name.split(':')[1]) # remove number and whitespace from name
    edges.update({e: node_name for e in s.iloc[:,0]})
    nodes.update({node_name: {'node_name': node_name,
                              'transitions': [{'entrance': e, 'destination': d} for e, d in s.iloc],
                              'visited':  False,
                              'previous': {}}})
    
class Graph:
    def __init__(self, nodes, edges):
        self.nodes = nodes
        self.edges = edges
        
graph = Graph(nodes, edges)

In [242]:
# for s in slices:
#     print(s.iloc[:,0].name)
# print([s.iloc[:,0].name for s in slices])
for key in graph.nodes.keys():
    print(key)

Oldale/Petalburg
Rustboro
Dewford
Slateport/Mauville/Verdanturf
Fallarbor/Lavaridge
Fortree/Lilycove
Mossdeep
Pacifidlog
Sootopolis
Petalburg Woods
Rusturf Tunnel
Jagged Pass
Mirage Tower
Route 119/123
Surf Hub
Lilycove Contest Hall
Lilycove Department Store
Waterfall Hub
Navel Rock
Dive Hub
Rustboro Gym
Mauville Gym
Lavaridge Gym
E4 Sidney
E4 Glacia
E4 Drake
Groudon
Ho-oh
Lugia
Regice
Granite Cave 1F
Granite Cave B1F
Granite Cave B2F
Abandoned Ship 1F Left
Abandoned Ship 1F Right
Abandoned Ship Rooms NE
Abandoned Ship Rooms SE
Abandoned Ship B1F
Meteor Falls 1F
Meteor Falls B1F Left
Meteor Falls B1F Right
Mt Pyre 2F
Mt Pyre 3F
Mt Pyre 4F
Mt Pyre 5F
Magma Hideout 1F
Magma Hideout 3F
Aqua Hideout B1F NE
Aqua Hideout B1F S
Aqua Hideout B2F
Aqua Hideout Warps 1
Aqua Hideout Warps 2
Aqua Hideout Warps 3
Aqua Hideout Warps 4
Seafloor Cavern R1
Seafloor Cavern R2
Seafloor Cavern R3
Seafloor Cavern R5
Seafloor Cavern R6
Petalburg Pokecenter
Rustboro Pokecenter
Slateport Pokecenter
Mauville Po

In [240]:
import copy

def graph_algo(src, dest, graph):
    if src == dest:
        return True
    src_node = graph.nodes[src]
    src_node['visited'] = True
    queue = [src_node]

    while len(queue) > 0:
        current = queue.pop(0)
        for t in current['transitions']:
            node_name = graph.edges.get(t['destination'])
            # the node might not exist: Strength, one-way; the 32: Marts thing is not really present in the sheet.
            if node_name:
                node = graph.nodes[node_name]
                if node['visited']:
                    continue
                # add previous
                prev = node['previous']
                if prev.get(current['node_name']):
                    prev[current['node_name']].append(t)
                else:
                    prev[current['node_name']] = [t]
                # mark visited
                node['visited'] = True
                queue.append(node)
    return graph.nodes[dest]

In [241]:
graph_copy = copy.deepcopy(graph)
dest = graph_algo('Slateport/Mauville/Verdanturf', 'Rustboro', graph_copy)
dest

{'node_name': 'Rustboro',
 'transitions': [{'entrance': 'Rustboro Mart',
   'destination': 'Petalburg Woods SE'},
  {'entrance': 'Rustboro Pokecenter',
   'destination': 'Sootopolis Pokecenter S'},
  {'entrance': 'Rustboro SE', 'destination': 'Meteor Falls 1F E'},
  {'entrance': 'Rustboro Cut House', 'destination': 'Slateport Pokecenter S'}],
 'visited': True,
 'previous': {'Meteor Falls 1F': [{'entrance': 'Meteor Falls 1F E',
    'destination': 'Rustboro SE'}]}}

In [238]:
def list_path(dest, graph):
    node = graph.nodes[dest]
    path = []
    while node['previous']:
        path.append((node['node_name'], node['previous']))
        prev = list(node['previous'].keys())[0]
        node = graph.nodes[prev]
    path.append((node['node_name'], {}))
    return path

list_path('Rustboro', graph_copy)

[('Rustboro',
  {'Meteor Falls 1F': [{'entrance': 'Meteor Falls 1F E',
     'destination': 'Rustboro SE'}]}),
 ('Meteor Falls 1F',
  {'Fallarbor/Lavaridge': [{'entrance': 'Fallarbor Fossil Maniac',
     'destination': 'Meteor Falls 1F S'}]}),
 ('Fallarbor/Lavaridge',
  {'Slateport/Mauville/Verdanturf': [{'entrance': 'R109 Seashore House',
     'destination': 'R114 Lanette'}]}),
 ('Slateport/Mauville/Verdanturf', {})]