# First visualization

This is the first visualization, created by Jelte and Jesse (with a bunch of help by Victor). To run it, simply run the codeblocks from top to bottom. Make sure to uncomment the `pip install` lines of the first codeblock when you first run this notebook to make sure all libraries are installed in your current Jupyter kernel.

In [1]:
# Install Plotly in the current Jupyter kernel
# import sys
# !{sys.executable} -m pip install plotly
# !{sys.executable} -m pip install networkx

# Imports
import pandas as pd # General data handling
import networkx as nx # Handling network graphs
import plotly.graph_objs as go # Graph drawing imports
import random
import math

## General functions

These are our general functions to be used later on in the file.

In [2]:
# Functions
def generate_geometric_digraph(n, dim=2, pos=None, seed=None):
    n_name = nodes = n
    G = nx.DiGraph()
    G.add_nodes_from(nodes)
    
    if pos is None:
        random.seed(seed)
        pos = {v: [random.random() for i in range(dim)] for v in nodes}
    
    nx.set_node_attributes(G, pos, "pos")

    return G

def generate_geometric_multidigraph(n, dim=2, pos=None, seed=None):
    n_name = nodes = n
    G = nx.MultiDiGraph()
    G.add_nodes_from(nodes)
    
    if pos is None:
        random.seed(seed)
        pos = {v: [random.random() for i in range(dim)] for v in nodes}
    
    nx.set_node_attributes(G, pos, "pos")

    return G

## Visualisation 1

This is the code for visualization 1. It generates a graph with random node positions. Hovering the nodes shows a tooltip with  the ID, email, job and amount of neighbors.

In [3]:
# Read CSV and setup NX graph
mailSet = pd.read_csv("enron-v1.csv")
mailGraph = nx.from_pandas_edgelist(mailSet, 'fromId', 'toId', ['fromEmail', 'fromJobtitle', 'toEmail', 'toJobtitle', 'messageType', 'sentiment', 'date'], create_using = nx.MultiDiGraph())

# Generate random positions for the network nodes
# We can generate our own random positions pretty easily as follows:
random.seed(10)
# pos = {v: [random.random(), random.random()] for v in mailGraph.nodes}         # Standard random position distribution
# pos = {v: [random.gauss(0, 2), random.gauss(0, 2)] for v in mailGraph.nodes}      # Gaussian random position distribution

nf = len(mailGraph.nodes) / (2 * math.pi) # Normalization factor for normalizing the nodes to 2*pi (full circle)
pos = {v: [math.sin(v / nf), math.cos(v / nf)] for v in sorted(mailGraph.nodes)}    # Circular position distribution

# This "circular" distribution looks oval because of the automatic rescaling of Plotly's graph viewer. To make this look actually circular, set the width and height to the same value.

# Create a graph with the given nodes at random positions
G = generate_geometric_digraph(mailGraph.nodes, pos=pos, seed=10)



# Adding the edges from our mailGraph into the rendered graph G and initializing edge information
for edge in mailGraph.edges:
    G.add_edge(edge[0], edge[1])
    
    edgeAttribute = mailGraph.get_edge_data(*edge)
    
    if(edge[2] == 0):
        if(G.nodes[edge[0]].get('Email') is None):
            G.nodes[edge[0]]['Email'] = edgeAttribute['fromEmail']
            G.nodes[edge[0]]['Job'] = edgeAttribute['fromJobtitle']
        if(G.nodes[edge[1]].get('Email') is None):
            G.nodes[edge[1]]['Email'] = edgeAttribute['toEmail']
            G.nodes[edge[1]]['Job'] = edgeAttribute['toJobtitle']

edge_x = []
edge_y = []
for edge in G.edges:
    # Initializing positions
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)

# Drawing edge lines
edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='rgba(100, 100, 100, 0.2)'),
    hoverinfo='none',
    mode='lines')



# Initializing node positions
node_x = []
node_y = []
for node in G.nodes():
    x, y = G.nodes[node]['pos']
    node_x.append(x)
    node_y.append(y)

# Drawing nodes
node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale='YlGnBu',
        reversescale=True,
        color=[],
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=1))



# Coloring nodes by the amount of neighbors and adding tooltips
node_adjacencies = []
node_text = []
for index, adjacencies in enumerate(G.adjacency()):
    node, nbrdict = adjacencies
    node_adjacencies.append(len(nbrdict))
    node_text.append(
        'ID: ' + str(node) +
        '<br>Email: '+ G.nodes[node]["Email"] +
        '<br>Job: '+ G.nodes[node]["Job"] +
        '<br>Connections: ' + str(len(nbrdict))
    )

node_trace.marker.color = node_adjacencies
node_trace.text = node_text



# Drawing the graph as a figure
fig = go.Figure(data=[edge_trace, node_trace],
    layout=go.Layout(
    width=1500,
    height=850,
    title='Network graph testing using Plotly',
    titlefont_size=16,
    showlegend=False,
    hovermode='closest',
    margin=dict(b=20,l=5,r=5,t=40),
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
)
fig.show()