In [1]:
import pandas as pd


In [2]:
hero_network_df = pd.read_csv("./hero-network.csv")

hero_network_df

Unnamed: 0,hero1,hero2
0,"LITTLE, ABNER",PRINCESS ZANDA
1,"LITTLE, ABNER",BLACK PANTHER/T'CHAL
2,BLACK PANTHER/T'CHAL,PRINCESS ZANDA
3,"LITTLE, ABNER",PRINCESS ZANDA
4,"LITTLE, ABNER",BLACK PANTHER/T'CHAL
...,...,...
574462,COLOSSUS II/PETER RA,CALLISTO
574463,CALLISTO,ROGUE /
574464,CALLISTO,CALIBAN/
574465,CALIBAN/,ROGUE /


In [3]:
# strip spaces from hero names
hero_network_df["hero1"] = hero_network_df["hero1"].apply(lambda x: x.strip())
hero_network_df["hero2"] = hero_network_df["hero2"].apply(lambda x: x.strip())

hero_network_df

Unnamed: 0,hero1,hero2
0,"LITTLE, ABNER",PRINCESS ZANDA
1,"LITTLE, ABNER",BLACK PANTHER/T'CHAL
2,BLACK PANTHER/T'CHAL,PRINCESS ZANDA
3,"LITTLE, ABNER",PRINCESS ZANDA
4,"LITTLE, ABNER",BLACK PANTHER/T'CHAL
...,...,...
574462,COLOSSUS II/PETER RA,CALLISTO
574463,CALLISTO,ROGUE /
574464,CALLISTO,CALIBAN/
574465,CALIBAN/,ROGUE /


In [4]:
# all_heroes contains each hero name once
all_heroes = set()

for row in hero_network_df.index:
    all_heroes.add(hero_network_df["hero1"][row])
    all_heroes.add(hero_network_df["hero2"][row])
    
print(len(all_heroes))
all_heroes

6421


{'BLAKE, DR. DONALD (C',
 'POSITRON/RONNIE',
 'KANNOR, ORLANDO',
 'BLACK GOLIATH/DR. WI',
 'HYZAKTL',
 'SHIVER MAN',
 'BEAR/',
 'PYRAKH, COMMANDER',
 'GOLEM II',
 'ROCK/SAMUEL JOHN ROC',
 'SHADOW KING | MUTANT',
 'RHAPSODY/RACHEL ARGO',
 'SUAREZ, RAPHAEL',
 'SLEEPER',
 'FACE/COL. EISEN',
 'CHANGELING',
 'FENTON, BARNEY',
 'CHAR, BRON',
 "D'KEN [SHI'AR]",
 'SILVER SABLE',
 'AIREO/AEOLUS [INHUMA',
 'SUMMERS, NATHAN CHRI',
 'POWELL, JASON',
 'BULLET BIKER',
 'EARTHQUAKE',
 'MASTER ZEI',
 'IRON MAIDEN II/',
 'LOBO III',
 'ROSSIN, DR.',
 'WORMWOOD',
 'DEMONICUS/DR. DOUGLA',
 'QUICKSILVER | MUTANT',
 'MALICE V/NAKIA',
 'SLICK',
 'MIFUNE, DR.',
 'FIREFALL III/ARCHIE',
 'DOCTOR, DR. JOSHUA',
 'ECLIPTIC',
 'MUTO, JUNZO',
 'DR. STRANGER YET',
 'BEAST DOPPELGANGER',
 'PHOENIX III/RACHEL S',
 'REAGAN, NANCY',
 'PARKER, MAY II',
 'KOMODO',
 'DONAR',
 'MUTANT MASTER',
 'PISTON/ANDREI SEMYAN',
 'POWELL, AMY',
 'SKINNER',
 'HUDSON, JAMES | MUTA',
 'BAKER',
 'DESTINY II/IRENE ADL',
 'DADDY WRONGLEGS',


In [5]:
from collections import defaultdict

# create an undirected graph (adjacency list) for use with BFS
# this does not show edge weights (no count of edges between characters)
undir_hero_map = defaultdict(set)

for row in hero_network_df.index:
    hero1 = hero_network_df["hero1"][row]
    hero2 = hero_network_df["hero2"][row]
    
    undir_hero_map[hero1].add(hero2)
    undir_hero_map[hero2].add(hero1)
    
print(len(undir_hero_map.keys()))
undir_hero_map

6421


defaultdict(set,
            {'LITTLE, ABNER': {'BINARY/CAROL DANVERS',
              "BLACK PANTHER/T'CHAL",
              'CARNIVORE/COUNT ANDR',
              'DECAY II/YOSHIRO HAC',
              'DIXON, GENERAL',
              'FIN FANG FOOM/MIDGAR',
              'FUJIKAWA, RUMIKO',
              'GOLDEN-BLADE',
              'HOGAN, VIRGINIA PEPP',
              'INFERNO III/SAMANTHA',
              'IRON MAN IV/JAMES R.',
              'IRON MAN/TONY STARK',
              'JACOBS, GLENDA',
              'JARVIS, EDWIN',
              'JOCASTA',
              'MADAME MENACE/SUNSET',
              'MANN, DR. J. VERNON',
              'PRINCESS ZANDA',
              'SAPPER',
              'TEMPEST II/NICOLETTE',
              'THOR/DR. DONALD BLAK',
              'WAR MACHINE II/PARNE'},
             'PRINCESS ZANDA': {"BLACK PANTHER/T'CHAL",
              'CARNIVORE/COUNT ANDR',
              'DECAY II/YOSHIRO HAC',
              'HOGAN, VIRGINIA PEPP',
              'INFERNO II

In [6]:
# get the number of edges/links (not weighted)
# from undirected graph
total_num_edges = 0

for hero in undir_hero_map.keys():
    edge_length = len(undir_hero_map[hero])
    total_num_edges += edge_length
    
total_num_edges /= 2

total_num_edges

167106.0

In [12]:
# export basic csv with hero name to edge count (first-degree relations)

ordered_heroes = list(all_heroes)

first_deg_df = pd.DataFrame(data={"hero":[hero for hero in ordered_heroes], "count":[len(undir_hero_map[hero]) for hero in ordered_heroes]})

first_deg_df.to_csv("./first_degree.csv", index=False)

first_deg_df

Unnamed: 0,hero,count
0,"BLAKE, DR. DONALD (C",41
1,POSITRON/RONNIE,108
2,"KANNOR, ORLANDO",21
3,BLACK GOLIATH/DR. WI,185
4,HYZAKTL,15
...,...,...
6416,"GAULTHING, ANDERSON",8
6417,BUCKY II/FRED DAVIS,43
6418,"ALLEN, TERRY",11
6419,"LANDERS, RAY",14


In [7]:
from collections import deque

# basic connectivity test - can do now that we have 
# the undirected adjacency list 

# test how many groups of heroes there are (connectivity)

# helper function that gives all heroes connected to a given hero
def basicBFS(hero, graph_map):
    queue = deque([hero])
    seen = set([hero])
    
    while(len(queue) > 0):
        curr_hero = queue.popleft()
        
        # add all first-degree heroes not in seen
        for adjacent_hero in graph_map[curr_hero]:
            if(adjacent_hero not in seen):
                queue.append(adjacent_hero)
                seen.add(adjacent_hero)
            
    
    return seen

# connectivity function - returns the number of heroes in each group and number of unconnected groups
def connectivity(hero_set, graph_map):
    all_groups = []
    all_seen = set()
    count = 0
    
    for hero in graph_map.keys():
        count += 1
        if(hero not in all_seen):
            hero_group = basicBFS(hero, graph_map)
            all_groups.append(len(hero_group))
            for connected_hero in hero_group:
                all_seen.add(connected_hero)
                
    print("Number of Groupings and Hero Count:", all_groups)
    print("Total Number of Heroes Seen (should match total hero count):", count)
                
    return all_groups

print(connectivity(all_heroes, undir_hero_map))

Number of Groupings and Hero Count: [6403, 9, 7, 2]
Total Number of Heroes Seen (should match total hero count): 6421
[6403, 9, 7, 2]


In [8]:
# basic BFS for getting hero degree of separation
# includes information about the links between the heroes specified

def hero_BFS(hero1, hero2, graph_map):    
    queue = deque()
    queue.append((hero1, [hero1]))
    seen = set([hero1])
    
    while(len(queue) > 0):
        curr_hero, hero_chain = queue.popleft()
        
        # if curr_hero is hero2, end loop
        if(curr_hero == hero2):
            return hero_chain
        
        # otherwise, add all unseen heroes to queue, with chain
        for new_hero in graph_map[curr_hero]:
            if(new_hero not in seen):
                new_hero_chain = hero_chain.copy()
                new_hero_chain.append(new_hero)
                
                queue.append((new_hero, new_hero_chain))
                
                seen.add(new_hero)
#     print(seen)
    return ["Not connected!"]
            
# test
hero_BFS('IRON MAN/TONY STARK', "EMPRESS S'BYLL [SKRU", undir_hero_map)

['IRON MAN/TONY STARK', 'NOVA II/FRANKIE RAYE', "EMPRESS S'BYLL [SKRU"]

In [9]:
# max degrees of separation for a character

def maxSeparation(hero, graph_map):
    queue = deque()
    queue.append((hero, 0))
    seen = set([hero])
    
    while(len(queue) > 0):
        curr_hero, curr_distance = queue.popleft()
        
        # add all first-degree heroes not in seen
        for adjacent_hero in graph_map[curr_hero]:
            if(adjacent_hero not in seen):
                queue.append((adjacent_hero, curr_distance+1))
                seen.add(adjacent_hero)                
    
    return curr_distance

# test
maxSeparation("FAITH", undir_hero_map)

4