In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Centrality Measures

In [None]:
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-02-26/small_trains.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
data = df[['departure_station','arrival_station']]
data

In [None]:
import networkx as nx

In [None]:
data.head()

In [None]:
data['distance'] = np.random.randint(1, 100, size=data.shape[0])

In [None]:
data

In [None]:
nx.from_pandas_edgelist

In [None]:
G = nx.from_pandas_edgelist(data, 'departure_station', 'arrival_station', edge_attr='distance')

In [None]:
plt.figure(figsize=(15,10))

nx.draw(G)

In [None]:
sorted(dict(G.degree).items(), key=lambda x : x[1], reverse=True)

In [None]:
dict(G.degree()).values()

In [None]:
G.order()

In [None]:
print('Number of edges:', G.size())
print('Average degree:', sum(dict(G.degree()).values())/G.order())
print('Density:', nx.density(G))

# Centrality Measures

## Degree

In [None]:
degree = nx.degree_centrality(G)
degree = pd.DataFrame.from_dict(degree, orient='index').reset_index()
degree.columns = ['Node', 'Degree Centrality']
degree.sort_values('Degree Centrality', ascending=False).head()

# Betweenness Centrality

In [None]:
nx.draw(G, node_size=50)

In [None]:
G.edges['PARIS LYON','LYON PART DIEU']

In [None]:
nx.betweenness_centrality(G)

In [None]:
# maior betweeness é o ponto que se você remover, afeta mais o grafo 
betweenness = nx.betweenness_centrality(G)

betweenness
betweenness = pd.DataFrame.from_dict(betweenness, orient='index').reset_index()
betweenness.columns = ['Node', 'Betweenness Centrality']
betweenness.sort_values('Betweenness Centrality', ascending=False).head()

# Pagerank

Pagerank mede a relevância de um nó baseado não só em quantos vizinhos ele tem, mas também medindo quantos vizinhos importantes ele tem

In [None]:
pagerank = nx.pagerank(G)

pagerank = pd.DataFrame.from_dict(pagerank, orient='index').reset_index()
pagerank.columns = ['Node', 'PageRank Centrality']
pagerank.sort_values('PageRank Centrality', ascending=False).head()

In [None]:
nx.draw_kamada_kawai(G, node_size=20,)

# Ego graph

In [None]:
nx.ego_graph(G, 'PARIS LYON', radius=1)

In [None]:
ego = nx.ego_graph(G, 'PARIS EST', radius=3)
nx.draw_kamada_kawai(ego, node_size=20, node_color='cyan', with_labels=True)

sorted(nx.betweenness_centrality(ego).items(), key=lambda x : x[1], reverse=True)

# Converting non-graph dataframes to edge lists

## Simpsons Network

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-08-27/simpsons-guests.csv', sep='|')

In [None]:
df.head()

In [None]:
def df_to_graph(df, node, edge):
    ''' 
    Transform a dataframe into another dataframe suitable to work with graphs.
    '''
    
    graph_df = pd.merge(df, df, how='inner', on=edge)
    graph_df = graph_df.groupby([f'{node}_x', f'{node}_y']).count().reset_index()
    graph_df = graph_df.query(f'{node}_x != {node}_y')

    if type(edge) == list:
        graph_df = graph_df.loc[:, [node + '_x', node + '_y'] + edge]
    else:
        graph_df = graph_df.loc[:, [node + '_x', node + '_y', edge]]
    
    return graph_df.rename(columns={f'{node}_x':f'{node}_src', 
                                    f'{node}_y':f'{node}_dest',
                                    f'{edge}':f'{edge}_count'})


In [None]:
df_to_graph(df = df, node='guest_star', edge='episode_title')

In [None]:
graph_df = df_to_graph(df, 'guest_star', 'episode_title')
graph_df.sort_values(by='episode_title_count')

In [None]:
G = nx.from_pandas_edgelist(graph_df, 'guest_star_src', 'guest_star_dest')
print('Number of edges:', G.size())
print('Average degree:', sum(dict(G.degree()).values())/G.order())
print('Density:', nx.density(G))

betweenness = nx.betweenness_centrality(G, weight='edge')
betweenness = pd.DataFrame.from_dict(betweenness, orient='index').reset_index()
betweenness.columns = ['Node', 'Betweenness Centrality']
betweenness.sort_values('Betweenness Centrality', ascending=False).head()

In [None]:
pagerank = nx.pagerank(G, weight='edge')
pagerank = pd.DataFrame.from_dict(pagerank, orient='index').reset_index()
pagerank.columns = ['Node', 'Pagerank Centrality']
pagerank.sort_values('Pagerank Centrality', ascending=False).head()

In [None]:
bb = nx.betweenness_centrality(G)
nx.set_node_attributes(G, bb, 'betweenness')

In [None]:
#d = dict(G.degree)
#d
nx.draw(G, nodelist=bb.keys(), node_size=2, edge_size=2)

In [None]:
nx.draw_kamada_kawai(G, node_size=20, node_color='cyan')

In [None]:
df

In [None]:
len(list(G.neighbors('Marcia Wallace')))

# Interactive Visualization

In [None]:
# !pip install pyvis

In [None]:
graph_df.head()

In [None]:
G

In [None]:
from pyvis.network import Network

In [None]:
g = Network(notebook=True)

In [None]:
G.nodes()

In [None]:
g.from_nx(G)

In [None]:
g

In [None]:

g.show_buttons()
g.show('test.html', )

# Game of Thrones Graph

In [None]:
url = 'https://www.macalester.edu/~abeverid/data/stormofswords.csv'
got_data = pd.read_csv(url)
G = nx.from_pandas_edgelist(got_data, 'Source','Target', edge_attr='Weight')

In [None]:
pagerank = nx.pagerank(G, weight='edge')
pagerank = pd.DataFrame.from_dict(pagerank, orient='index').reset_index()
pagerank.columns = ['Node', 'Pagerank Centrality']
pagerank.sort_values('Pagerank Centrality', ascending=False).head(10)

In [None]:
degree = nx.degree_centrality(G)
degree = pd.DataFrame.from_dict(degree, orient='index').reset_index()
degree.columns = ['Node', 'Degree Centrality']
degree.sort_values('Degree Centrality', ascending=False).head(10)

In [None]:
betweenness = nx.betweenness_centrality(G)
betweenness = pd.DataFrame.from_dict(betweenness, orient='index').reset_index()
betweenness.columns = ['Node', 'Betweenness Centrality']
betweenness.sort_values('Betweenness Centrality', ascending=False).head(10)

In [None]:
from pyvis.network import Network
import pandas as pd
url = 'https://www.macalester.edu/~abeverid/data/stormofswords.csv'
# https://raw.githubusercontent.com/mathbeveridge/asoiaf/master/data/asoiaf-all-edges.csv


got_net = Network(height="750px", width="100%", notebook=True)

# set the physics layout of the network
got_net.barnes_hut()
got_data = pd.read_csv(url)

sources = got_data['Source']
targets = got_data['Target']
weights = got_data['Weight']

edge_data = zip(sources, targets, weights)

# run through each edge and assign a value
for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    got_net.add_node(src, src, title=src)
    got_net.add_node(dst, dst, title=dst)
    got_net.add_edge(src, dst, value=w)

# get neighbors
neighbor_map = got_net.get_adj_list()

# add neighbor data to node hover data
for node in got_net.nodes:
    node["title"] += " Neighbors:<br>" + "<br>".join(neighbor_map[node["id"]])
    node["value"] = len(neighbor_map[node["id"]])

got_net.show_buttons()
got_net.show("gameofthrones.html", )

In [None]:
got_net.save_graph('got.html')

In [None]:
# https://pyvis.readthedocs.io/en/latest/tutorial.html#visualization