In [1]:
import networkx as nx
import matplotlib as plt
import pandas as pd
import numpy as np

In [2]:
# initiate an empty list to store wiki entries
entries = []
# initiate an empty list to store edges between wikinodes
edges = []

# parse the txt database
# @line each line of the txt file
# @word each work in each line
with open('testData_100entries.txt') as f:
    lines = f.readlines()
    for line in lines:
        words = line.split(',') 
        print(words[0])
        entries.append(words[0])
        # for every word in every line
        for word in words:
            # words[1] is the link number
            if word is not words[1] and word is not word[0]:
                # there are definitely repetitive edges
                edges.append([words[0], word])

Cheese
Prehistory
Milking_pipeline
Langres_cheese
Curd
Mayonnaise
Common_Era
Toned_milk
Crema_(dairy_product)
Breastfeeding
West_Germanic_languages
Enzyme
Tyrosemiophilia
Pistou
Cheese_ripening
Sodium
Colostrum
Charlemagne
Mayo_Clinic
Vinegar
Tonkatsu
List_of_Pakistani_condiments
Goat_cheese
Crushed_red_pepper
Acid
Bhutan
Domestication
Cocktail_sauce
FAOSTAT
Amine
Breast_milk
Islam
A2_milk
Duqqa
Samuel_Butler_(novelist)
Organic_milk
Milk_crate
Chili_paste
Ancient_Rome
Guacamole
Cheesemaker
Soybean
Dabu-dabu
Macronutrient
Palapa_(condiment)
Yogurt
Cheeses_of_Switzerland
Barbecue_sauce
XO_sauce
USDA_National_Nutrient_Database
Fatty_acid
Philippine_condiments
Kaong_palm_vinegar
Toyomansi
Tonne
Chimichurri
Milk_sugar
Paneer
Say_cheese
Keen%27s
Occitan
Queso_fresco
Germany
Vitamin
Thermophilic
Agrodolce
Ymer_(dairy_product)
Curry_ketchup
Yunnan
Apennines
Remoulade
Yak
Malted_milk
Herb
Emmental_(cheese)
Tacuinum_sanitatis
Kosher_foods
Bagoong
Dairy_product
Colo-colo_(condiment)
Kumis
Cottage

In [3]:
# tranform the list of graph edges to a dataframe
df = pd.DataFrame(edges, columns=['source', 'target'])
df

Unnamed: 0,source,target
0,Cheese,Cheese
1,Cheese,Prehistory
2,Cheese,Milking_pipeline
3,Cheese,Langres_cheese
4,Cheese,Curd
...,...,...
38614,Chhurpi,Buttermilk
38615,Chhurpi,Cow
38616,Chhurpi,Himalayas
38617,Chhurpi,Nepal


In [4]:
G = nx.from_pandas_edgelist(df, 'source', 'target')

This viualization implementation is similiar to https://melaniewalsh.github.io/Intro-Cultural-Analytics/Network-Analysis/Making-Network-Viz-with-Bokeh.html

In [5]:
from bokeh.io import output_file, output_notebook, show, save
from bokeh.models import Range1d, Circle, ColumnDataSource, MultiLine, EdgesAndLinkedNodes, NodesAndLinkedEdges
from bokeh.plotting import figure
from bokeh.plotting import from_networkx
from bokeh.palettes import Blues8, Reds8, Purples8, Oranges8, Viridis8, Spectral8
from bokeh.transform import linear_cmap
from networkx.algorithms import community

In [10]:

from bokeh.models import (BoxSelectTool, Circle, EdgesAndLinkedNodes, HoverTool, MultiLine, NodesAndLinkedEdges, Plot, Range1d, TapTool,)
from bokeh.palettes import Spectral4

In [7]:
# Calculate children Wiki Articles (degree) for each node
degrees = dict(nx.degree(G))
nx.set_node_attributes(G, name='LinkNum', values=degrees)

In [8]:
# adjust degree so that the nodes with very small degrees are still visible
number_to_adjust_by = 5
adjusted_node_size = dict([(node, (degree/10)+number_to_adjust_by) for node, degree in nx.degree(G)])
nx.set_node_attributes(G, name='adjusted_node_size', values=adjusted_node_size)

In [9]:
#Choose attributes from G network to size and color
size_by_this_attribute = 'adjusted_node_size'
color_by_this_attribute = 'adjusted_node_size'

#Choose colors for node and edge highlighting
node_highlight_color = Spectral4[2]
edge_highlight_color = Spectral4[2]

title = 'Wikipedia Network'

#Categories will appear when hovering over each node
HOVER_TOOLTIPS = [
       ("WikiEntry", "@index"),
        ("LinkNum", "@degree")
]

#Create a plot — set dimensions, toolbar, and title
plot = figure(tooltips = HOVER_TOOLTIPS,
              tools="pan,wheel_zoom,save,reset,tap", active_scroll='wheel_zoom',
            x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title, plot_width=2000,plot_height=2000)

network_graph = from_networkx(G, nx.spring_layout, scale=10, center=(0, 0))

# for nodes, set node size and color
network_graph.node_renderer.glyph = Circle(size=size_by_this_attribute, fill_color=Spectral4[1], fill_alpha=0.7,
          line_color=None)
network_graph.node_renderer.hover_glyph = Circle(size=size_by_this_attribute, fill_color=node_highlight_color, line_width=2)
network_graph.node_renderer.selection_glyph = Circle(size=size_by_this_attribute, fill_color=node_highlight_color, line_width=2)

# for edges, set edge weight and color
#Set edge opacity and width
network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=0.3)
#Set edge highlight colors
network_graph.edge_renderer.selection_glyph = MultiLine(line_color=edge_highlight_color, line_width=0.5)
network_graph.edge_renderer.hover_glyph = MultiLine(line_color=edge_highlight_color, line_width=0.5)


# INTERACTIVE
network_graph.selection_policy = NodesAndLinkedEdges()
network_graph.inspection_policy = NodesAndLinkedEdges()
# network_graph.selection_policy = EdgesAndLinkedNodes()
# this inspection cause error
# network_graph.inspection_policy = EdgesAndLinkedNodes()

plot.renderers.append(network_graph)
save(plot, filename=f"{title}.html")
show(plot)


  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
  warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")
