In [12]:
#imports
import nest_asyncio
nest_asyncio.apply()  # needed in Jupyter to run separate asyncio loops, such as slackclient's

import os
from collections import defaultdict, Counter
from itertools import count
import re

from graphviz import Digraph
import networkx as nx
import slack

In [16]:
#slack interaction
MAX_PER_PAGE = 100

SLACK_CLIENT = slack.WebClient(os.environ["SLACK_ADMIN_TOKEN"])

def fetch_user_mentions():
    """Fetch all Slack messages of past week which contain @ mentions"""
    query = f"@* during:week"
    for page in count(1):
        print("Fetching page", page)
        data = SLACK_CLIENT.search_messages(query=query, page=page,
                                            count=MAX_PER_PAGE, sort_dir="asc").data
        yield from data["messages"]["matches"]
        if page >= data["messages"]["pagination"]["page_count"]:
            break

In [23]:
%time it = fetch_user_mentions()

Wall time: 0 ns


In [30]:
anonymiser = defaultdict(count().__next__)

In [36]:
anonymiser

defaultdict(<method-wrapper '__next__' of itertools.count object at 0x0000020310298E88>,
            {'foo': 0, 'bar': 1, 'baz': 2})

In [14]:
#extract the mention tuples

# tagged users appear as e.g. `<@UF123D5G7>`, so we use a capturing group to get all of them
USER_ID_RE = re.compile("<@([A-Z0-9]{9})>")

def extract_mentions(messages):
    """Transform a sequence of messages into a sequence of mentions tuples"""
    # An autoincrementing integer id anonymiser
    anonymiser = defaultdict(count().__next__)

    for message in messages:
        author = message['user']
        users_mentioned = USER_ID_RE.findall(message['text'])
        # generator expression mapping from users mentioned id's to anonymous author,mentioned tuples 
        yield from ((anonymiser[author], anonymiser[mentioned]) for mentioned in users_mentioned)

In [39]:
Counter(i**2 % 10 for i in range(1000))

Counter({0: 100, 1: 200, 4: 200, 9: 200, 6: 200, 5: 100})

In [15]:
%time mention_iterator = extract_mentions(fetch_user_mentions())
%time mentions = Counter(mention_iterator)

Wall time: 0 ns
Wall time: 34.1 s


In [43]:
mentions

Counter({(0, 1): 1,
         (2, 3): 7,
         (4, 5): 2,
         (6, 1): 1,
         (7, 8): 2,
         (9, 8): 1,
         (0, 9): 1,
         (10, 11): 2,
         (12, 13): 1,
         (14, 15): 1,
         (14, 5): 1,
         (14, 16): 1,
         (1, 17): 5,
         (7, 18): 1,
         (19, 20): 1,
         (21, 22): 1,
         (23, 7): 2,
         (24, 14): 1,
         (25, 21): 1,
         (25, 26): 1,
         (10, 27): 6,
         (16, 14): 1,
         (1, 28): 13,
         (29, 15): 1,
         (7, 30): 2,
         (1, 31): 2,
         (9, 1): 2,
         (9, 9): 1,
         (32, 33): 1,
         (12, 20): 2,
         (29, 9): 1,
         (1, 0): 1,
         (1, 34): 3,
         (9, 28): 1,
         (35, 36): 2,
         (7, 26): 2,
         (37, 1): 5,
         (21, 28): 1,
         (15, 1): 3,
         (38, 30): 1,
         (39, 37): 2,
         (40, 13): 4,
         (1, 41): 3,
         (26, 1): 1,
         (42, 30): 2,
         (42, 38): 2,
         (7, 1): 1,
  

In [41]:
# For those using Anaconda on Windows
os.environ["PATH"] += r';C:\Program Files (x86)\Graphviz2.38\bin'

def render_graph(mentions, filename="output"):
    """Render a mentions counter to a graphviz digraph"""
    dot = Digraph(format="svg", graph_attr={"tooltip": " ", "overlap": "false"})
    
    # calculate node weights
    node_weights = defaultdict(int)
    for (_, mentioned), mention_count in mentions.items():
        node_weights[mentioned] += mention_count

    # add nodes
    for author, weight in node_weights.items():
        size = int(0.5 * weight**0.5)  # make area proportional to mention count
        fontsize = max(25*size, 10)
        dot.node(f"S{author}", width=str(size), height=str(size), fontsize=str(fontsize),
                 tooltip=f"S{author} was mentioned a total of {weight} time(s)")

    # add edges
    for (author, mentioned), mention_count in mentions.items():
        dot.edge(f"S{author}", f"S{mentioned}", penwidth=str(mention_count),
                 tooltip=f"S{author} mentioned S{mentioned} {mention_count} time(s)")

    dot.render(filename)
    
render_graph(mentions)

In [42]:
# A slightly more advanced version, with a pagerank calculation for color
def calculate_normalized_pagerank(mentions):
    """Calculate Pagerank values for mentions, in the range [0,1]"""
    G = nx.DiGraph()

    for (author, mentioned), mention_count in mentions.items():
        G.add_edge(author, mentioned, weight=mention_count)

    pr = nx.pagerank(G)
    largest = max(pr.values())
    return {k: v/largest for k, v in pr.items()}


def render_pagerank_graph(mentions, filename="output"):
    """Render a mentions counter to a graphviz digraph"""
    dot = Digraph(format="svg", graph_attr={"tooltip": " ", "overlap": "false"})
    
    # calculate node weights and pagerank
    node_weights = defaultdict(int)
    for (_, mentioned), mention_count in mentions.items():
        node_weights[mentioned] += mention_count
    normalized_pr = calculate_normalized_pagerank(mentions)

    # add nodes
    for author, weight in node_weights.items():
        size = int(0.5 * weight**0.5)  # make area proportional to mention count
        fontsize = max(25*size, 10)
        pagerank = f"{normalized_pr[author]:.3f}"
        fillcolor = f"0.000 {pagerank} 1.000"  # use the HSV space, with the pagerank as the saturation of red
        dot.node(f"S{author}", width=str(size), height=str(size), fontsize=str(fontsize),
                 style="filled", fillcolor=fillcolor,
                 tooltip=f"S{author} was mentioned a total of {weight} time(s), with normalized pagerank of {pagerank}")

    # add edges
    for (author, mentioned), mention_count in mentions.items():
        dot.edge(f"S{author}", f"S{mentioned}", penwidth=str(mention_count),
                 tooltip=f"S{author} mentioned S{mentioned} {mention_count} time(s)")

    dot.render(filename)
    
render_pagerank_graph(mentions)