# Marvel Network

Now that we have a clean dataset, we can actually build the network.

In [1]:
import pandas as pd

import networkx as nx

import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

import ast

In [2]:
tqdm.pandas()

In [3]:
characters_df = pd.read_csv("../data/marvel_characters.csv")

characters_df["links"] = characters_df["links"].progress_apply(ast.literal_eval)

characters_df

  0%|          | 0/1295 [00:00<?, ?it/s]

Unnamed: 0,name,links,number_links
0,Clown_(Franklin)_(Earth-616),[],0
1,Nicos_Pelletier_(Earth-616),[Wolfgang_von_Strucker_(Earth-616)],1
2,Marduk_Kurios_(Earth-616),"[Cul_Borson_(Earth-616), Satannish_(Earth-616)...",7
3,Deltite_(Earth-616),"[Arnim_Zola_(Earth-616), Nicholas_Fury_(Earth-...",9
4,Medusalith_Amaquelin_(Earth-616),"[Maximus_Boltagon_(Earth-616), Ororo_Munroe_(E...",35
...,...,...,...
1290,Bond_(Earth-616),[Candra_(Earth-616)],1
1291,Anthony_Stark_(Earth-616),"[Veranke_(Earth-616), Mandarin_(Earth-616), Le...",71
1292,Johnny_Watts_(Earth-616),"[Noh-Varr_(Earth-200080), Clinton_Barton_(Eart...",4
1293,Karl_Lykos_(Earth-616),"[Ororo_Munroe_(Earth-616), Mortimer_Toynbee_(E...",20


In [4]:
def connect_node(row, g):
  g.add_node(row["name"])
  for link in row["links"]:
    g.add_edge(row["name"], link)

def createGraph(df, directed=False):
  
  g = nx.Graph()
  
  if directed:
    g = nx.DiGraph()

  _ = df.progress_apply(connect_node, g=g, axis=1)
    
  return g

In [5]:
g = createGraph(characters_df)

  0%|          | 0/1295 [00:00<?, ?it/s]

Now that we have a preliminar node, we can try to see the degree distribution, number of nodes and edges, to try to start understanding the graph.

In [6]:
print("Graph basic stats:")
print(f"\tNumber of nodes: {len(g.nodes)}")
print(f"\tNumber of edges: {len(g.edges)}")
print(f"\tAverage degree: {sum(x[1] for x in g.degree)/len(g.degree):.2f}")
print()
print(f"\tMost connected node: {max(g.degree, key=lambda x: x[1])[0]} \
with a degree of {max(g.degree, key=lambda x: x[1])[1]}")

Graph basic stats:
	Number of nodes: 1295
	Number of edges: 15992
	Average degree: 24.70

	Most connected node: Steven_Rogers_(Earth-616) with a degree of 301


Another relevant information that can be easily obtained is the top 5 characters with the bigger number of links in their wiki page

In [7]:
characters_df.sort_values(by=["number_links"], ascending=False).head()

Unnamed: 0,name,links,number_links
618,Bruce_Banner_(Earth-616),"[Cul_Borson_(Earth-616), Void_(Dark_Sentry)_(E...",145
307,James_Howlett_(Earth-616),"[Katherine_Power_(Earth-616), Evan_Sabahnur_(E...",120
716,William_Baker_(Earth-616),"[Natalia_Romanova_(Earth-616), Alicia_Masters_...",117
881,Max_Eisenhardt_(Earth-616),"[Ororo_Munroe_(Earth-616), Illyana_Rasputina_(...",103
545,Charles_Xavier_(Earth-616),"[Void_(Dark_Sentry)_(Earth-616), Ororo_Munroe_...",101
