In [1]:
import dash
import dash_cytoscape as cyto
import dash_html_components as html
import dash_core_components as dcc
import pandas as pd
import json
import datetime

In [2]:
emails = pd.read_csv('email headers.csv', encoding='cp1252')
emails.head()

Unnamed: 0,From,To,Date,Subject
0,Sven.Flecha@gastech.com.kronos,"Isak.Baza@gastech.com.kronos, Lucas.Alcazar@ga...",1/6/2014 8:39,GT-SeismicProcessorPro Bug Report
1,Kanon.Herrero@gastech.com.kronos,"Felix.Resumir@gastech.com.kronos, Hideki.Cocin...",1/6/2014 8:58,Inspection request for site
2,Bertrand.Ovan@gastech.com.kronos,"Emile.Arpa@gastech.com.kronos, Varro.Awelon@ga...",1/6/2014 9:28,New refueling policies - Effective February 1
3,Valeria.Morlun@gastech.com.kronos,"Dante.Coginian@gastech.com.kronos, Albina.Hafo...",1/6/2014 9:38,Route suggestion for next shift
4,Mat.Bramar@gastech.com.kronos,"Rachel.Pantanal@gastech.com.kronos, Lars.Azada...",1/6/2014 9:49,Upcoming birthdays


In [3]:
# Split up multiple receivers to separate rows
emails = emails.assign(To=emails.To.str.split(', '))
emails = emails.explode('To')
emails

Unnamed: 0,From,To,Date,Subject
0,Sven.Flecha@gastech.com.kronos,Isak.Baza@gastech.com.kronos,1/6/2014 8:39,GT-SeismicProcessorPro Bug Report
0,Sven.Flecha@gastech.com.kronos,Lucas.Alcazar@gastech.com.kronos,1/6/2014 8:39,GT-SeismicProcessorPro Bug Report
1,Kanon.Herrero@gastech.com.kronos,Felix.Resumir@gastech.com.kronos,1/6/2014 8:58,Inspection request for site
1,Kanon.Herrero@gastech.com.kronos,Hideki.Cocinaro@gastech.com.kronos,1/6/2014 8:58,Inspection request for site
1,Kanon.Herrero@gastech.com.kronos,Inga.Ferro@gastech.com.kronos,1/6/2014 8:58,Inspection request for site
...,...,...,...,...
1169,Albina.Hafon@gastech.com.kronos,Valeria.Morlun@gastech.com.kronos,1/17/2014 20:46,RE: Traffic advisory for today
1169,Albina.Hafon@gastech.com.kronos,Cecilia.Morluniau@gastech.com.kronos,1/17/2014 20:46,RE: Traffic advisory for today
1169,Albina.Hafon@gastech.com.kronos,Henk.Mies@gastech.com.kronos,1/17/2014 20:46,RE: Traffic advisory for today
1169,Albina.Hafon@gastech.com.kronos,Dylan.Scozzese@gastech.com.kronos,1/17/2014 20:46,RE: Traffic advisory for today


In [4]:
# Separate the date and time in different columns
emails['Time'] = pd.to_datetime(emails['Date']).dt.time
emails['Date'] = pd.to_datetime(emails['Date']).dt.date
emails

Unnamed: 0,From,To,Date,Subject,Time
0,Sven.Flecha@gastech.com.kronos,Isak.Baza@gastech.com.kronos,2014-01-06,GT-SeismicProcessorPro Bug Report,08:39:00
0,Sven.Flecha@gastech.com.kronos,Lucas.Alcazar@gastech.com.kronos,2014-01-06,GT-SeismicProcessorPro Bug Report,08:39:00
1,Kanon.Herrero@gastech.com.kronos,Felix.Resumir@gastech.com.kronos,2014-01-06,Inspection request for site,08:58:00
1,Kanon.Herrero@gastech.com.kronos,Hideki.Cocinaro@gastech.com.kronos,2014-01-06,Inspection request for site,08:58:00
1,Kanon.Herrero@gastech.com.kronos,Inga.Ferro@gastech.com.kronos,2014-01-06,Inspection request for site,08:58:00
...,...,...,...,...,...
1169,Albina.Hafon@gastech.com.kronos,Valeria.Morlun@gastech.com.kronos,2014-01-17,RE: Traffic advisory for today,20:46:00
1169,Albina.Hafon@gastech.com.kronos,Cecilia.Morluniau@gastech.com.kronos,2014-01-17,RE: Traffic advisory for today,20:46:00
1169,Albina.Hafon@gastech.com.kronos,Henk.Mies@gastech.com.kronos,2014-01-17,RE: Traffic advisory for today,20:46:00
1169,Albina.Hafon@gastech.com.kronos,Dylan.Scozzese@gastech.com.kronos,2014-01-17,RE: Traffic advisory for today,20:46:00


In [5]:
def create_json(emails):
    """Given a dataframe with emails (from, to, and a date), a json is created that can be
    used as input for the Cytoscape network graph."""

    # Extract the from and to emails
    emails_from = emails.From.tolist()
    emails_to = emails.To.tolist()

    # Create a list with all appearing emails and remove duplicates
    nodes = emails_from + emails_to
    nodes = list(dict.fromkeys(nodes))

    # Create the correct format for the nodes
    nodes_list = []
    for node in nodes:
        name = node.split('@')[0].split('.')
        text = "{\"data\": {" +"\"id\": \"" + node +"\", \"label\": \"" + name[0] + " " + name[1] + "\"}}"
        nodes_list.append(text)

    # Create a list of from - to email pairs
    pairs = []
    for i in range(len(emails_from)):
        pairs.append([emails_from[i], emails_to[i]])
    # Compute weights and add them to the pairs
    weights = []
    for pair in pairs:
        weights.append(pairs.count(pair))
    count = 0
    for pair in pairs:
        pair.append(weights[count])
        count += 1
    # Remove duplicates
    unique = []
    for pair in pairs:
        if pair not in unique:
            unique.append(pair)
        else:
            pass
    # Create the correct format for the links
    links_list = []
    for link in unique:
        text = "{\"data\": {\"source\": \"" + link[0] + "\", \"target\": \"" + link[1] + "\", \"weight\": " + str(link[2]) + "}}"
        links_list.append(text)

    # Write to the json file
    with open('./network_data.json', 'w') as network_data:
        z = 0
        network_data.writelines("[")
        while z < (len(nodes_list)):
            network_data.writelines(nodes_list[z] + ",")
            z += 1
        z = 0
        while z < (len(links_list) - 1):
            network_data.writelines(links_list[z] + ",")
            z += 1
        network_data.writelines(links_list[-1])
        network_data.writelines("]")

In [6]:
# Filter out emails from the first day
emails_day1 = emails[emails['Date'] == datetime.date(2014, 1, 6)]

In [7]:
create_json(emails_day1)

In [8]:
app = dash.Dash(__name__)

with open('network_data.json', 'r') as f:
    data = json.load(f)

In [9]:
app.layout = html.Div([
    html.Div([
        html.Label(['Visualize network on date...'],
                   style={'font-weight': 'bold', 'text-align': 'center', 'font-size': '26px'}),
        dcc.Slider(
            min=0, max=9, step=1,
            value=0,
            marks={
                0: '2014-01-06',
                1: '2014-01-07',
                2: '2014-01-08',
                3: '2014-01-09',
                4: '2014-01-10',
                5: '2014-01-13',
                6: '2014-01-14',
                7: '2014-01-15',
                8: '2014-01-16',
                9: '2014-01-17'
            },
            id='my-slider'
        ),
        html.Label(['Select a part of the day to filter'],
                   style={'font-weight': 'bold', 'text-align': 'center', 'font-size': '26px'}),
        dcc.Dropdown(
            id='my-dropdown',
            options=[
                {'label': 'Morning', 'value': 'morning'},
                {'label': 'Afternoon', 'value': 'afternoon'},
                {'label': 'Evening', 'value': 'evening'}],
            placeholder='Select...'
        )
    ]),
    html.Div([
        cyto.Cytoscape(
            id='cytoscape-v1',
            layout={'name': 'circle'},
            style={'width': '100%', 'height': '1000px'},
            elements=data
        )
    ])
])

In [10]:
@app.callback(
    dash.dependencies.Output(component_id='cytoscape-v1', component_property='elements'),
    [dash.dependencies.Input(component_id='my-slider', component_property='value'),
     dash.dependencies.Input(component_id='my-dropdown', component_property='value')])

def update_output(slider_value, dropdown_value):
    dates = emails['Date'].unique()
    emails_day = emails[emails['Date'] == dates[slider_value]]

    if dropdown_value == 'morning':
        emails_day = emails_day[emails_day['Time'] <= datetime.time(12, 0, 0)]
    if dropdown_value == 'afternoon':
        emails_day = emails_day[(emails_day['Time'] > datetime.time(12, 0, 0)) & (emails_day['Time'] <= datetime.time(18, 0, 0))]
    if dropdown_value == 'evening':
        emails_day = emails_day[emails_day['Time'] > datetime.time(18, 0, 0)]

    create_json(emails_day)
    with open('network_data.json', 'r') as f:
        elements = json.load(f)

    return elements

In [11]:
if __name__ == '__main__':
    app.run_server()

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [04/Apr/2022 09:17:53] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2022 09:17:53] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2022 09:17:53] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2022 09:17:53] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2022 09:17:56] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2022 09:17:58] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2022 09:18:00] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2022 09:18:02] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2022 09:18:05] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2022 09:18:10] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2022 09:18:14] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2022 09:18:17] "POST /_dash-update-component