In [1]:

from itertools import combinations
import pandas as pd
import numpy as np
import networkx as nx 
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
edge_df = pd.read_csv('edges.csv')
edge_df.head()

Unnamed: 0,hero,comic
0,24-HOUR MAN/EMMANUEL,AA2 35
1,3-D MAN/CHARLES CHAN,AVF 4
2,3-D MAN/CHARLES CHAN,AVF 5
3,3-D MAN/CHARLES CHAN,COC 1
4,3-D MAN/CHARLES CHAN,H2 251


In [3]:
node_df = pd.read_csv('nodes.csv')
node_df.head()

Unnamed: 0,node,type
0,2001 10,comic
1,2001 8,comic
2,2001 9,comic
3,24-HOUR MAN/EMMANUEL,hero
4,3-D MAN/CHARLES CHAN,hero


In [4]:
hero_net_df = pd.read_csv('hero-network.csv')
hero_net_df.head()

Unnamed: 0,hero1,hero2
0,"LITTLE, ABNER",PRINCESS ZANDA
1,"LITTLE, ABNER",BLACK PANTHER/T'CHAL
2,BLACK PANTHER/T'CHAL,PRINCESS ZANDA
3,"LITTLE, ABNER",PRINCESS ZANDA
4,"LITTLE, ABNER",BLACK PANTHER/T'CHAL


In [5]:
temp = pd.DataFrame({'edges.csv':sorted([h for h in edge_df['hero'].unique() if 'SPIDER' in h]),
                     'hero-network.csv':sorted([h for h in hero_net_df['hero1'].unique() if 'SPIDER' in h])})

display(temp)

for c in ['hero1', 'hero2']:
    print(f"{c} name max in hero-network.csv : {max(hero_net_df[c].apply(lambda x : len(x)))}")
    

# Name Preprocessing
for c in ['hero1', 'hero2']:
    hero_net_df[c] = hero_net_df[c].apply(lambda x : x[:20].split("/")[0])
edge_df['hero'] = edge_df['hero'].apply(lambda x : x[:20].split("/")[0])

Unnamed: 0,edges.csv,hero-network.csv
0,"BEACH, SPIDER","BEACH, SPIDER"
1,BLOOD SPIDER/,BLOOD SPIDER/
2,MAN-SPIDER CLONE | M,MAN-SPIDER CLONE | M
3,MAN-SPIDER | MUTANT,MAN-SPIDER | MUTANT
4,SPIDER-MAN CLONE/BEN,SPIDER-MAN CLONE/BEN
5,SPIDER-MAN III/MARTH,SPIDER-MAN III/MARTH
6,SPIDER-MAN/PETER PARKER,SPIDER-MAN/PETER PAR
7,SPIDER-WOMAN DOPPELG,SPIDER-WOMAN DOPPELG
8,SPIDER-WOMAN II/JULI,SPIDER-WOMAN II/JULI
9,SPIDER-WOMAN IV/CHAR,SPIDER-WOMAN IV/CHAR


hero1 name max in hero-network.csv : 20
hero2 name max in hero-network.csv : 20


In [6]:
print("SPIDER & HULK in hero-network.csv")
print(f"hero1=SPIDER-MAN, hero2=HULK : {len(hero_net_df[(hero_net_df['hero1']=='SPIDER-MAN')&(hero_net_df['hero2']=='HULK')])}")
print(f"hero1=HULK, hero2=SPIDER-MAN : {len(hero_net_df[(hero_net_df['hero2']=='SPIDER-MAN')&(hero_net_df['hero1']=='HULK')])}")

temp1 = set(edge_df[edge_df['hero']=='SPIDER-MAN']['comic'])
temp2 = set(edge_df[edge_df['hero']=='HULK']['comic'])
print(f"Intersection in edges.csv : {len(temp1.intersection(temp2))}")

SPIDER & HULK in hero-network.csv
hero1=SPIDER-MAN, hero2=HULK : 43
hero1=HULK, hero2=SPIDER-MAN : 50
Intersection in edges.csv : 93


In [12]:
topn = 25
topn_hero = edge_df.groupby(['hero'])[['comic']].count().sort_values(by=['comic'], ascending=False).head(topn).index

h1_ = []; h2_ = []; cnt_ = [];
for comb in list(combinations(topn_hero, 2)):    
    temp1 = set(edge_df[edge_df['hero']==comb[0]]['comic'])
    temp2 = set(edge_df[edge_df['hero']==comb[1]]['comic'])
    cnt = len(temp1.intersection(temp2)) # Appear Together    
    h1_.append(comb[0]); h2_.append(comb[1]); cnt_.append(cnt);
appto_df = pd.DataFrame({'H1':h1_, 'H2':h2_, 'CNT':cnt_})

display(appto_df.head(10))

Unnamed: 0,H1,H2,CNT
0,SPIDER-MAN,CAPTAIN AMERICA,145
1,SPIDER-MAN,IRON MAN,95
2,SPIDER-MAN,THING,125
3,SPIDER-MAN,THOR,96
4,SPIDER-MAN,HUMAN TORCH,147
5,SPIDER-MAN,MR. FANTASTIC,120
6,SPIDER-MAN,HULK,93
7,SPIDER-MAN,WOLVERINE,63
8,SPIDER-MAN,INVISIBLE WOMAN,97
9,SPIDER-MAN,SCARLET WITCH,67


In [8]:
HERO_COLOR = {
    'CAPTAIN AMERICA':'darkblue',
    'IRON MAN':'gold',
    'SPIDER-MAN':'darkred',
    'HULK':'forestgreen',
    'THOR':'lightblue',
    'DR. STRANGE':'purple'
}

## Initialize graph
marvel_net = nx.Graph() 
for i, row in appto_df.iterrows():
    marvel_net.add_edge(row['H1'], row['H2'], weight=row['CNT'])  # specify edge data

## Get positions for the nodes in network
pos_ = nx.spring_layout(marvel_net, seed=11)
cent_ = nx.pagerank(marvel_net, weight='weight') # page rank
cent_top = sorted(cent_.items(), key=lambda item: item[1], reverse=True)[:1] # page rank top 1


In [9]:

## create an edge between node x and node y, with a given text and width
def make_edge(x, y, text, width):
    return  go.Scatter(x=x, y=y, line=dict(width=width, color='lightgray'), hoverinfo='text', text=([text]), mode='lines')

## For each edge, make an edge_trace, append to list
edge_trace = []
for edge in marvel_net.edges():    
    if marvel_net.edges()[edge]['weight'] > 0:
        char_1 = edge[0]
        char_2 = edge[1]
        x0, y0 = pos_[char_1]
        x1, y1 = pos_[char_2]
        trace  = make_edge([x0, x1, None], [y0, y1, None], None, width=5*(marvel_net.edges()[edge]['weight']/appto_df['CNT'].max()))
        edge_trace.append(trace)
                
## Make a node trace
node_trace = go.Scatter(x=[], y=[], text=[], textposition="top center", textfont_size=10, mode='markers+text', hoverinfo='none',
                        marker=dict(color=[], size=[], line_width=[], line_color=[]))

## For each node in network, get the position and size and add to the node_trace
for node in marvel_net.nodes():
    x, y = pos_[node]
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])
    color = 'gray'
    line_width = 2
    line_color = 'darkgray'
    name_text = node
    
    if node in HERO_COLOR:
        color = HERO_COLOR[node]; line_color='black';
        
    if node in [v[0] for v in cent_top]:
        name_text = '<b>' + node + '</b>'
        
    node_trace['marker']['color'] += tuple([color])
    node_trace['marker']['size'] += tuple([int(400*cent_[node])]) # node size is proportional to page rank
    node_trace['marker']['line_width'] += tuple([line_width])
    node_trace['marker']['line_color'] += tuple([line_color])
    node_trace['text'] += tuple([name_text])
    
    
## Customize layout
layout = go.Layout(
    paper_bgcolor='rgba(0,0,0,0)', # transparent background
    plot_bgcolor='rgba(0,0,0,0)', 
    xaxis =  {'showgrid': False, 'zeroline': False}, 
    yaxis = {'showgrid': False, 'zeroline': False},
)

## Create figure
fig = go.Figure(layout = layout)
## Add all edge traces
for trace in edge_trace:
    fig.add_trace(trace)
fig.add_trace(node_trace)
fig.update_layout(showlegend = False)
fig.update_xaxes(showticklabels = False)
fig.update_yaxes(showticklabels = False)
fig.update_layout(title=f"<b>Top {topn} Heroes Network</b>")
fig.show()