In [29]:
import pandas as pd


In [30]:
hero_network_df = pd.read_csv("./hero-network.csv")

hero_network_df

Unnamed: 0,hero1,hero2
0,"LITTLE, ABNER",PRINCESS ZANDA
1,"LITTLE, ABNER",BLACK PANTHER/T'CHAL
2,BLACK PANTHER/T'CHAL,PRINCESS ZANDA
3,"LITTLE, ABNER",PRINCESS ZANDA
4,"LITTLE, ABNER",BLACK PANTHER/T'CHAL
...,...,...
574462,COLOSSUS II/PETER RA,CALLISTO
574463,CALLISTO,ROGUE /
574464,CALLISTO,CALIBAN/
574465,CALIBAN/,ROGUE /


In [48]:
# strip spaces from hero names
hero_network_df["hero1"] = hero_network_df["hero1"].apply(lambda x: x.strip())
hero_network_df["hero2"] = hero_network_df["hero2"].apply(lambda x: x.strip())

hero_network_df

Unnamed: 0,hero1,hero2
0,"LITTLE, ABNER",PRINCESS ZANDA
1,"LITTLE, ABNER",BLACK PANTHER/T'CHAL
2,BLACK PANTHER/T'CHAL,PRINCESS ZANDA
3,"LITTLE, ABNER",PRINCESS ZANDA
4,"LITTLE, ABNER",BLACK PANTHER/T'CHAL
...,...,...
574462,COLOSSUS II/PETER RA,CALLISTO
574463,CALLISTO,ROGUE /
574464,CALLISTO,CALIBAN/
574465,CALIBAN/,ROGUE /


In [49]:
# all_heroes contains each hero name once
all_heroes = set()

for row in hero_network_df.index:
    all_heroes.add(hero_network_df["hero1"][row])
    all_heroes.add(hero_network_df["hero2"][row])
    
print(len(all_heroes))
all_heroes

6421


{'KAMIKAZE',
 'NIGHTCRAWLER | MUTAN',
 'REANDEAU, DR.',
 'WILD CHILD DOPPELGAN',
 'RAM, SIR',
 'CHALMER, JUDGE',
 'BANTAM/',
 'WHIRLWIND/DAVID CANN',
 'JZEMLICO, WARDEN',
 'CRIME-MASTER/NICK LU',
 'MCLAREN, MEGAN',
 'RICTOR/JULIO ESTEBAN',
 'OX IV',
 'TOLLIVER, REV. JACKS',
 'NIKKI/NICHOLETTE GOL',
 'PSI-WOLF/NIKOLAI VON',
 'BALLANTINE, MR.',
 'MADAME MENACE/SUNSET',
 'LETHA',
 'TINKER, SHEA',
 'WARLORD',
 'REESE, MURRAY',
 'JOHNSON, ANNA',
 'RUBINO, STEVE HEADSE',
 'MATADOR/MANUEL ELOGA',
 'HEADLOK',
 'LANDSCAPE/BRETT',
 'RIDER, ROBERT ROBBIE',
 'CAPTAIN U.K./LINDA M',
 'SCREAMING MIMI/MELIS',
 'MEDUSA | MUTANT X-VE',
 'KUROYAMA',
 'SILVER SAMURAI/KENYU',
 'YANDROTH',
 'FIST/',
 'BONEBREAKER/',
 'NELSON, CANDACE',
 'HUNTER IN DARKNESS',
 'WATKINS, STEVIE',
 'HATCHWAY, VERNON',
 'MATHEMANIC/THOMAS SO',
 'CAPRICORN II/',
 'GORGON [INHUMAN]',
 'FAGIN',
 'HAYDEN',
 'MALPHER, KATHY',
 'LEAD-IN',
 'STANFORD, DWIGHT ROL',
 'MYSTERIO II/DANIEL B',
 'MENDOSA, EMILIO',
 'JACOBS, GLENDA',
 'X-MA

In [50]:
from collections import defaultdict

# create an undirected graph (adjacency list) for use with BFS
# this does not show edge weights (no count of edges between characters)
undir_hero_map = defaultdict(set)

for row in hero_network_df.index:
    hero1 = hero_network_df["hero1"][row]
    hero2 = hero_network_df["hero2"][row]
    
    undir_hero_map[hero1].add(hero2)
    undir_hero_map[hero2].add(hero1)
    
print(len(undir_hero_map.keys()))
undir_hero_map

6421


defaultdict(set,
            {'LITTLE, ABNER': {'BINARY/CAROL DANVERS',
              "BLACK PANTHER/T'CHAL",
              'CARNIVORE/COUNT ANDR',
              'DECAY II/YOSHIRO HAC',
              'DIXON, GENERAL',
              'FIN FANG FOOM/MIDGAR',
              'FUJIKAWA, RUMIKO',
              'GOLDEN-BLADE',
              'HOGAN, VIRGINIA PEPP',
              'INFERNO III/SAMANTHA',
              'IRON MAN IV/JAMES R.',
              'IRON MAN/TONY STARK',
              'JACOBS, GLENDA',
              'JARVIS, EDWIN',
              'JOCASTA',
              'MADAME MENACE/SUNSET',
              'MANN, DR. J. VERNON',
              'PRINCESS ZANDA',
              'SAPPER',
              'TEMPEST II/NICOLETTE',
              'THOR/DR. DONALD BLAK',
              'WAR MACHINE II/PARNE'},
             'PRINCESS ZANDA': {"BLACK PANTHER/T'CHAL",
              'CARNIVORE/COUNT ANDR',
              'DECAY II/YOSHIRO HAC',
              'HOGAN, VIRGINIA PEPP',
              'INFERNO II

In [94]:
# get the number of edges/links (not weighted)
# from undirected graph
total_num_edges = 0

for hero in undir_hero_map.keys():
    edge_length = len(undir_hero_map[hero])
    total_num_edges += edge_length
    
total_num_edges /= 2

total_num_edges

167106.0

In [54]:
from collections import deque

# basic connectivity test - can do now that we have 
# the undirected adjacency list 

# test how many groups of heroes there are (connectivity)

# helper function that gives all heroes connected to a given hero
def basicBFS(hero, graph_map):
    queue = deque([hero])
    seen = set([hero])
    
    while(len(queue) > 0):
        curr_hero = queue.popleft()
        
        # add all first-degree heroes not in seen
        for adjacent_hero in graph_map[curr_hero]:
            if(adjacent_hero not in seen):
                queue.append(adjacent_hero)
                seen.add(adjacent_hero)
            
    
    return seen

# connectivity function - returns the number of heroes in each group and number of unconnected groups
def connectivity(hero_set, graph_map):
    all_groups = []
    all_seen = set()
    count = 0
    
    for hero in graph_map.keys():
        count += 1
        if(hero not in all_seen):
            hero_group = basicBFS(hero, graph_map)
            all_groups.append(len(hero_group))
            for connected_hero in hero_group:
                all_seen.add(connected_hero)
                
    print("Number of Groupings and Hero Count:", all_groups)
    print("Total Number of Heroes Seen (should match total hero count):", count)
                
    return all_groups

print(connectivity(all_heroes, undir_hero_map))

Number of Groupings and Hero Count: [6403, 9, 7, 2]
Total Number of Heroes Seen (should match total hero count): 6421
[6403, 9, 7, 2]


In [83]:
# basic BFS for getting hero degree of separation
# includes information about the links between the heroes specified

def hero_BFS(hero1, hero2, graph_map):    
    queue = deque()
    queue.append((hero1, [hero1]))
    seen = set([hero1])
    
    while(len(queue) > 0):
        curr_hero, hero_chain = queue.popleft()
        
        # if curr_hero is hero2, end loop
        if(curr_hero == hero2):
            return hero_chain
        
        # otherwise, add all unseen heroes to queue, with chain
        for new_hero in graph_map[curr_hero]:
            if(new_hero not in seen):
                new_hero_chain = hero_chain.copy()
                new_hero_chain.append(new_hero)
                
                queue.append((new_hero, new_hero_chain))
                
                seen.add(new_hero)
#     print(seen)
    return ["Not connected!"]
            
# test
hero_BFS('IRON MAN/TONY STARK', "EMPRESS S'BYLL [SKRU", undir_hero_map)

['IRON MAN/TONY STARK', 'SUPREME INTELLIGENCE', "EMPRESS S'BYLL [SKRU"]

In [91]:
# max degrees of separation for a character

def maxSeparation(hero, graph_map):
    queue = deque()
    queue.append((hero, 0))
    seen = set([hero])
    
    while(len(queue) > 0):
        curr_hero, curr_distance = queue.popleft()
        
        # add all first-degree heroes not in seen
        for adjacent_hero in graph_map[curr_hero]:
            if(adjacent_hero not in seen):
                queue.append((adjacent_hero, curr_distance+1))
                seen.add(adjacent_hero)                
    
    return curr_distance

# test
maxSeparation("FAITH", undir_hero_map)

4