# Marvel Network

Now that we have a clean dataset, we can actually build the network.

In [None]:
import pandas as pd

import networkx as nx

import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

import ast

In [None]:
tqdm.pandas()

In [None]:
characters_df = pd.read_csv("../data/marvel_characters.csv")

characters_df["links"] = characters_df["links"].progress_apply(ast.literal_eval)

characters_df

In [None]:
def connect_node(row, g):
  g.add_node(row["name"])
  
  for link in row["links"]:
    g.add_edge(row["name"], link)

def createGraph(df, directed=False):
  
  g = nx.Graph()
  
  if directed:
    g = nx.DiGraph()

  _ = df.progress_apply(connect_node, g=g, axis=1)
    
  return g

In [None]:
g = createGraph(characters_df)

Now that we have a preliminar node, we can try to see the degree distribution, number of nodes and edges, to try to start understanding the graph.

In [None]:
print("Graph basic stats:")
print(f"\tNumber of nodes: {len(g.nodes)}")
print(f"\tNumber of edges: {len(g.edges)}")
print(f"\tAverage degree: {sum(x[1] for x in g.degree)/len(g.degree):.2f}")
print()
print(f"\tMost connected node: {max(g.degree, key=lambda x: x[1])[0]} \
with a degree of {max(g.degree, key=lambda x: x[1])[1]}")

Another relevant information that can be easily obtained is the top 5 characters with the bigger number of links in their wiki page

In [None]:
characters_df.sort_values(by=["number_links"], ascending=False).head()