In [1]:
!pip install wikipedia

You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [2]:
from operator import itemgetter
import networkx as nx
import wikipedia
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import pandas as pd

In [3]:
class Pipeline:
    def __init__(self):
        self.tasks = []
        
    def task(self, depends_on=None):
        idx = 0
        if depends_on:
            idx = self.tasks.index(depends_on) + 1
        def inner(f):
            self.tasks.insert(idx, f)
            return f
        return inner
    
    def run(self, input_):
        output = input_
        for task in self.tasks:
            output = task(output)
        return output

In [4]:
pipeline = Pipeline()

In [5]:
@pipeline.task()
def get(seed):
  SEED = seed.title()
  STOPS = ("DC",
        "Amazon",
        "Movie",
        "Tv show",
        "Netflix",
        "screenwriting",
        "Marvel",
        "Story",
        "Film")
  todo_lst = [(0, SEED)] 
  todo_set = set(SEED) 
  done_set = set()
  g = nx.DiGraph()
  layer, page = todo_lst[0]

  while layer < 2:
    del todo_lst[0]
    done_set.add(page)
  
    # Attempt to download the selected page.
    try:
      wiki = wikipedia.page(page)
    except:
      print("Could not load", page)
      layer, page = todo_lst[0]
      continue
  
    for link in wiki.links:
      link = link.title()
      if link not in STOPS and not link.startswith("List Of") and np.all([word.lower() not in link.lower() for word in STOPS]):
        if link not in todo_set and link not in done_set:
          todo_lst.append((layer + 1, link))
          todo_set.add(link)
        g.add_edge(page, link)
    layer, page = todo_lst[0]
  print("{} nodes, {} edges".format(len(g), nx.number_of_edges(g)))

  return g

In [6]:
@pipeline.task(depends_on=get)
def remove(g):

  g.remove_edges_from(nx.selfloop_edges(g))

  duplicates = [(node, node + "s") 
                for node in g if node + "s" in g
                ]

  for dup in duplicates:
    g = nx.contracted_nodes(g, *dup, self_loops=False)

  print(duplicates)

  duplicates = []
  for x, y in [(node, node.replace("-", " ")) for node in g]:
    if x != y and y in g and y not in [y for x,y in duplicates]:
        duplicates.append((x,y))
  print(duplicates)

  for dup in duplicates:
    g = nx.contracted_nodes(g, *dup, self_loops=False)

  nx.set_node_attributes(g, 0,"contraction")
  nx.set_edge_attributes(g, 0,"contraction")

  return g

In [7]:
@pipeline.task(depends_on=remove)
def filters(g):
  core = [node for node, deg in dict(g.degree()).items() if deg >= 2]

  sub_g = nx.subgraph(g, core)
  return sub_g

In [8]:
@pipeline.task(depends_on=filters)
def plot_graphic(sub_g, pos):
  fig, ax = plt.subplots(1,1,figsize=(10,8))

  # color of nodes
  color_degree = list(dict(nx.degree_centrality(sub_g)).values())
  color_closeness = list(dict(nx.closeness_centrality(sub_g)).values())
  color_betweenness = list(dict(nx.betweenness_centrality(sub_g)).values())
  color_eigenvector = list(dict(nx.eigenvector_centrality(sub_g)).values())

  # draw 
  # degree centrality 
  # edges
  nx.draw_networkx_edges(sub_g,
                        pos=pos,
                        alpha=0.4, ax=ax)
  # nodes
  nodes = nx.draw_networkx_nodes(sub_g,
                  pos=pos,
                  node_color=color,
                  cmap=plt.cm.jet,
                  ax=ax)
  # labels
  nx.draw_networkx_labels(sub_g, pos=pos,
                          font_color='white',
                          font_size = 4, 
                          ax=ax)

  # closeness centrality
  # draw edges
  nx.draw_networkx_edges(sub_g,
                        pos=pos,
                        alpha=0.4, ax=ax)
  # nodes
  nodes = nx.draw_networkx_nodes(sub_g,
                  pos=pos,
                  node_color=color,
                  cmap=plt.cm.jet,
                  ax=ax)
  # labels
  nx.draw_networkx_labels(sub_g, pos=pos,
                          font_color='white',
                          font_size = 4, 
                          ax=ax)

  # betweenness centrality
  # edges
  nx.draw_networkx_edges(sub_g,
                        pos=pos,
                        alpha=0.4, ax=ax)
  # nodes
  nodes = nx.draw_networkx_nodes(sub_g,
                  pos=pos,
                  node_color=color,
                  cmap=plt.cm.jet,
                  ax=ax)
  # labels
  nx.draw_networkx_labels(sub_g, pos=pos,
                          font_color='white',
                          font_size = 4, 
                          ax=ax)

  # eigenvector centrality
  # edges
  nx.draw_networkx_edges(sub_g,
                        pos=pos,
                        alpha=0.4, ax=ax)
  # nodes
  nodes = nx.draw_networkx_nodes(sub_g,
                  pos=pos,
                  node_color=color,
                  cmap=plt.cm.jet,
                  ax=ax)
  # labels
  nx.draw_networkx_labels(sub_g, pos=pos,
                          font_color='white',
                          font_size = 4, 
                          ax=ax)

  # eliminate axis
  ax[0,0].axis("off")
  ax[1,0].axis("off")
  ax[0,1].axis("off")
  ax[1,1].axis("off")

  ax[0,0].set_title("Degree Centrality")
  ax[0,1].set_title("Closeness Centrality")
  ax[1,0].set_title("Betweenness Centrality")
  ax[1,1].set_title("Eigenvector Centrality")

  plt.subplots_adjust(bottom=0., right=0.9, top=1.)
  cax = plt.axes([0.95, 0.3, 0.025, 0.4])
  sm = plt.cm.ScalarMappable(cmap=plt.cm.jet, norm=plt.Normalize(vmin=0, vmax=max_centrality))
  cbar=plt.colorbar(sm,cax)

  plt.savefig('graphics.png', transparent=True,dpi=600,bbox_inches="tight")
  plt.show()

In [9]:
@pipeline.task(depends_on=plot_graphic)
def make_pdf(sub_g):
  degree_sequence = sorted([d for n, d in g.degree()], reverse=True)
  plt.style.use("fivethirtyeight")

  fig, ax = plt.subplots(1,1,figsize=(10,8))

  sns.histplot(degree_sequence,bins=7,label="Count",ax=ax)
  ax2 = ax.twinx()
  sns.kdeplot(degree_sequence,color='r',label="Probability Density Function (PDF)",ax=ax2)

  lines, labels = ax.get_legend_handles_labels()
  lines2, labels2 = ax2.get_legend_handles_labels()
  ax2.legend(lines + lines2, labels + labels2, loc=0)

  ax.grid(False)
  ax2.grid(False)
  ax.set_xlabel("Degree")
  ax2.set_ylabel("Probability")

  plt.savefig('probability_density_function.png', transparent=True,dpi=600,bbox_inches="tight")
  plt.show()

In [10]:
@pipeline.task(depends_on=filters)
def results(sub_g):
  
  make_pdf(sub_g)
  pos = nx.spring_layout(subgraf_g,seed=8375,k=0.2)
  
  plot_graphic(subgraf_g,pos)

  bc = pd.Series(nx.betweenness_centrality(sub_g))
  dc = pd.Series(nx.degree_centrality(sub_g))
  ec = pd.Series(nx.eigenvector_centrality(sub_g))
  cc = pd.Series(nx.closeness_centrality(sub_g))

  df = pd.DataFrame.from_dict({"Betweenness": bc,
                              "Degree": dc,
                              "EigenVector": ec,
                              "Closeness": cc})
  df.reset_index(inplace=True,drop=True)
  df.head()

  fig = sns.PairGrid(df)
  fig.map_upper(sns.scatterplot)
  fig.map_lower(sns.kdeplot, cmap="Reds_r")
  fig.map_diag(sns.kdeplot, lw=2, legend=False)


  plt.savefig('all.png', transparent=True,dpi=800,bbox_inches="tight")
  plt.show()

  fig, ax = plt.subplots(1,1,figsize=(10,8))

  g2_core_9 = nx.k_shell(subgraf_g, 9)
  g2_core_10 = nx.k_core(subgraf_g, 10)

  pos = nx.spring_layout(subgraf_g,seed=123456789,k=0.3)

  # draw edges
  nx.draw_networkx_edges(subgraf_g,
                        pos=pos,
                        alpha=0.4, ax=ax)

  # draw nodes
  nodes = nx.draw_networkx_nodes(subgraf_g,
                  pos=pos,
                  node_color="#333333")

  # draw nodes
  nodes = nx.draw_networkx_nodes(g2_core_9,
                  pos=pos,
                  node_color="blue")

  nodes = nx.draw_networkx_nodes(g2_core_10,
                  pos=pos,
                  node_color="red")

  # static legend
  red_patch = mpatches.Patch(color='red', label='10-core')
  blue_patch = mpatches.Patch(color='blue', label='9-shell')
  plt.legend(handles=[red_patch,blue_patch])

  plt.axis("off")
  plt.savefig('k-core.png', transparent=True,dpi=600)
  plt.show()

In [11]:
/pipeline.run('NeilGaiman')

NameError: name 'np' is not defined

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=84cb924b-f123-4e15-a431-a93b26bbb4e7' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>