# Imports and Data

In [12]:
from typing import NamedTuple
import pandas as pd
from pathlib import Path
import networkx as nx
import matplotlib.pyplot as plt
from tqdm import tqdm
from functools import lru_cache


class CollegeMessage(NamedTuple):
    source: int  #source id
    target: int  # target id
    unix_timestamp: int  # seconds after last epoc


def read_message_data(datapath: str) -> list[CollegeMessage]:
    filepath = Path.cwd() / Path(datapath)

    if not filepath.exists():
        raise FileNotFoundError(
            f"File not found: {filepath}; " +
            f"Current working directory: {Path.cwd()}; " +
            f"Relative path to data file: {Path(datapath)}"
        )

    print(f"Using file: {filepath.absolute()}")

    messages = []
    with open(filepath, 'r', encoding="UTF-8") as f:
        for line in f:
            source, target, timestamp = line.strip().split()
            messages.append(CollegeMessage(int(source), int(target), int(timestamp)))
    print("done")
    return messages


def remove_low_degree_nodes(data: pd.DataFrame, min_degree: int) -> pd.DataFrame:
    in_degree = data['target'].value_counts()
    out_degree = data['source'].value_counts()
    high_degree_nodes = set(in_degree[in_degree >= min_degree].index) & set(out_degree[out_degree >= min_degree].index)
    return data[data['source'].isin(high_degree_nodes) & data['target'].isin(high_degree_nodes)]


def create_graph(data: pd.DataFrame) -> nx.DiGraph:
    digraph = nx.DiGraph()

    for row in data.itertuples(index=False):
        digraph.add_edge(row.source, row.target, timestamp=row.unix_timestamp)

    return digraph


def graph_figure(graph: nx.DiGraph):
    pos = nx.arf_layout(graph)
    nx.draw(graph, pos, with_labels=False, node_size=10)


# Layout calculations, cached by LRU to avoid recalculating the layout
@lru_cache(maxsize=None)
def cache_layout(graph: nx.DiGraph, layout_func: callable):
    print("Calculating layout ...")
    layout = layout_func(graph)
    print("... done calculating layout")
    return layout

In [13]:
_DATA_PATH = "./dataset/CollegeMsg.txt"

_MESSAGES = read_message_data(_DATA_PATH)

DF_MESSAGES = remove_low_degree_nodes(pd.DataFrame(_MESSAGES), 2)

GRAPH_MESSAGES = create_graph(DF_MESSAGES)

Using file: C:\Users\majerm\projects\local\rug-sna-pizza-giving\college_msg_files\dataset\CollegeMsg.txt
done


In [14]:
DF_MESSAGES.head()

Unnamed: 0,source,target,unix_timestamp
39,41,42,1082574211
48,36,50,1082597715
49,44,51,1082597751
50,32,52,1082598056
51,36,32,1082598122


# Time window

In [15]:
def time_window(graph: nx.DiGraph, unix_start: int, unix_end: int) -> nx.DiGraph:
    return graph.edge_subgraph([
        (source, target)
        for source, target, timestamp in graph.edges(data='timestamp')
        if unix_start <= timestamp <= unix_end
    ])


import networkx as nx
from pathlib import Path
from matplotlib.colors import Normalize

def export_graph_undirected(
        graph: nx.Graph,
        pos: dict,
        export_path: Path = None,
        fig_size=(20, 20),
        base_node_size=0,
        node_size_factor=10,
        edge_alpha=0.2,
        edge_color='gray',
        node_color_map='viridis',
        default_node_color='lightgray',
        dpi=300,
        xlim=None,
        ylim=None
):
    """
    Exports a NetworkX undirected graph to an image file with consistent styling,
    edge widths based on message counts, and a colormap for node intensities.

    Parameters:
    - graph (nx.Graph): The undirected graph to export with 'weight' on edges.
    - pos (dict): A dictionary specifying node positions.
    - export_path (Path, optional): Path to save the exported image. If None, the image is not saved.
    - fig_size (tuple, optional): Size of the figure in inches. Defaults to (10, 10).
    - base_node_size (int, optional): Base size for nodes. Defaults to 100.
    - node_size_factor (int, optional): Factor to scale node sizes based on degree. Defaults to 20.
    - edge_alpha (float, optional): Transparency level for edges (0 to 1). Defaults to 0.6.
    - edge_color (str or list, optional): Color of the edges. Defaults to 'gray'.
    - node_color_map (str, optional): Matplotlib colormap name for node colors. Defaults to 'viridis'.
    - default_node_color (str, optional): Color for nodes with zero messages. Defaults to 'lightgray'.
    - dpi (int, optional): Dots per inch for the saved image. Defaults to 300.
    - xlim (tuple, optional): Limits for the x-axis.
    - ylim (tuple, optional): Limits for the y-axis.
    """

    # Create a new figure with the specified size
    plt.figure(figsize=fig_size)

    # Calculate degrees
    degrees = dict(graph.degree())

    # Scale node sizes
    node_sizes = [base_node_size + degrees[node] * node_size_factor for node in graph.nodes()]

    # Prepare color mapping
    non_zero_degrees = [deg for deg in degrees.values() if deg > 0]

    if non_zero_degrees:
        norm = Normalize(vmin=min(non_zero_degrees), vmax=max(non_zero_degrees))
        cmap = plt.colormaps.get_cmap(node_color_map)
        node_colors = [
            cmap(norm(degrees[node])) if degrees[node] > 0 else default_node_color
            for node in graph.nodes()
        ]
    else:
        node_colors = [default_node_color for _ in graph.nodes()]

    # Prepare edge widths
    edge_weights = [graph[u][v]['weight'] for u, v in graph.edges()]
    if edge_weights:
        max_weight = max(edge_weights)
        min_weight = min(edge_weights)
        min_width = 0.5
        max_width = 2
        if max_weight == min_weight:
            edge_widths = [(max_width + min_width) / 2 for _ in edge_weights]
        else:
            edge_widths = [
                min_width + (weight - min_weight) / (max_weight - min_weight) * (max_width - min_width)
                for weight in edge_weights
            ]
    else:
        edge_widths = []

    # Draw edges
    nx.draw_networkx_edges(
        graph,
        pos,
        alpha=edge_alpha,
        edge_color=edge_color,
        width=edge_widths
    )

    # Draw nodes
    nx.draw_networkx_nodes(
        graph,
        pos,
        node_size=node_sizes,
        node_color=node_colors,
        linewidths=0.5
    )

    # Set consistent axis limits
    if xlim and ylim:
        plt.xlim(xlim)
        plt.ylim(ylim)

    # Optional: Remove axes for a cleaner look
    plt.axis('off')

    # Save the figure if an export path is provided
    if export_path:
        # Ensure the parent directory exists
        export_path.parent.mkdir(parents=True, exist_ok=True)
        plt.savefig(export_path, bbox_inches='tight', dpi=dpi, pad_inches=0.1)

    # Close the figure to free up memory
    plt.close()


def time_window_undirected(graph: nx.DiGraph, unix_start: int, unix_end: int) -> nx.Graph:
    """
    Extracts a subgraph within the specified time window, treating edges as undirected
    and aggregating the number of messages between node pairs.

    Parameters:
    - graph (nx.DiGraph): The original directed graph with 'timestamp' on edges.
    - unix_start (int): Start of the time window (inclusive).
    - unix_end (int): End of the time window (inclusive).

    Returns:
    - nx.Graph: An undirected graph with 'weight' attributes on edges.
    """
    # Filter edges within the time window
    filtered_edges = [
        (min(source, target), max(source, target))
        for source, target, timestamp in graph.edges(data='timestamp')
        if unix_start <= timestamp <= unix_end
    ]

    # Aggregate message counts between node pairs
    edge_weights = {}
    for src, dest in filtered_edges:
        if (src, dest) in edge_weights:
            edge_weights[(src, dest)] += 1
        else:
            edge_weights[(src, dest)] = 1

    # Create an undirected graph
    undirected_graph = nx.Graph()
    undirected_graph.add_nodes_from(graph.nodes(data=True))

    # Add edges with 'weight' attribute
    for (src, dest), weight in edge_weights.items():
        undirected_graph.add_edge(src, dest, weight=weight)

    return undirected_graph


from pathlib import Path
from tqdm import tqdm
import math


def export_time_windows_undirected(pos: dict, window_size_days, window_step_days, max_windows=None):
    """
    Exports graph visualizations for each time window into uniquely numbered folders.
    Each graph is undirected with edge widths based on message counts.

    Parameters:
    - pos (dict): A dictionary specifying node positions.
    """
    start_unix_timestamp = DF_MESSAGES['unix_timestamp'].min()
    end_unix_timestamp = DF_MESSAGES['unix_timestamp'].max()

    day_in_seconds = 60 * 60 * 24
    window_in_seconds = window_size_days * day_in_seconds
    window_step_in_seconds = window_step_days * day_in_seconds
    total_seconds = end_unix_timestamp - start_unix_timestamp
    total_windows = max(1, math.ceil((total_seconds - window_in_seconds) / window_step_in_seconds) + 1) if max_windows is None else max_windows

    # Generate timeframes for each day
    timeframes = (
        start_unix_timestamp + window_step_in_seconds * i
        for i in range(total_windows)
    )

    # Generate subgraphs for each timeframe
    subgraphs = (
        time_window_undirected(GRAPH_MESSAGES, timeframe, timeframe + window_in_seconds)
        for timeframe in timeframes
    )

    # Determine the next export folder using pathlib
    export_base = Path("./export")
    export_base.mkdir(exist_ok=True)

    new_folder_name = f"overview - window {window_size_days} every {window_step_days}"
    new_folder = export_base / new_folder_name
    
    if new_folder.exists():
        print(f"Skipping folder: '{new_folder_name}' as it exists already")
        return
    
    new_folder.mkdir()

    # Calculate consistent axis limits
    x_values = [pos[node][0] for node in pos]
    y_values = [pos[node][1] for node in pos]
    x_min, x_max = min(x_values), max(x_values)
    y_min, y_max = min(y_values), max(y_values)
    x_padding = (x_max - x_min) * 0.05
    y_padding = (y_max - y_min) * 0.05
    xlim = (x_min - x_padding, x_max + x_padding)
    ylim = (y_min - y_padding, y_max + y_padding)

    # Iterate over each subgraph and export the corresponding graph image
    for i, graph in tqdm(enumerate(subgraphs), total=total_windows, desc="Exporting graphs"):
        export_path = new_folder / f"graph_{i:03}.png"
        export_graph_undirected(
            graph,
            pos,
            export_path,
            node_color_map='plasma',
            default_node_color='lightgray',
            xlim=xlim,
            ylim=ylim
        )


In [16]:
variations = (
    (7 * step, step) 
    for step in range(1, 8)
)

for variation in variations:
    export_time_windows_undirected(cache_layout(GRAPH_MESSAGES, nx.arf_layout), *variation)

Calculating layout ...
... done calculating layout
Exporting graphs to folder: .export\window 2 every 1


Exporting graphs: 100%|██████████| 187/187 [03:55<00:00,  1.26s/it]


All graphs have been exported to .export\window 2 every 1
Exporting graphs to folder: .export\window 3 every 1


Exporting graphs: 100%|██████████| 186/186 [04:00<00:00,  1.29s/it]


All graphs have been exported to .export\window 3 every 1
Exporting graphs to folder: .export\window 7 every 1


Exporting graphs: 100%|██████████| 182/182 [04:52<00:00,  1.61s/it]


All graphs have been exported to .export\window 7 every 1
Exporting graphs to folder: .export\window 7 every 2


Exporting graphs: 100%|██████████| 92/92 [02:22<00:00,  1.55s/it]


All graphs have been exported to .export\window 7 every 2
Exporting graphs to folder: .export\window 14 every 1


Exporting graphs: 100%|██████████| 175/175 [05:38<00:00,  1.93s/it]


All graphs have been exported to .export\window 14 every 1
Exporting graphs to folder: .export\window 14 every 2


Exporting graphs: 100%|██████████| 88/88 [02:54<00:00,  1.98s/it]


All graphs have been exported to .export\window 14 every 2
Exporting graphs to folder: .export\window 14 every 7


Exporting graphs: 100%|██████████| 26/26 [00:46<00:00,  1.80s/it]


All graphs have been exported to .export\window 14 every 7
Exporting graphs to folder: .export\window 21 every 1


Exporting graphs: 100%|██████████| 168/168 [05:43<00:00,  2.05s/it]


All graphs have been exported to .export\window 21 every 1
Exporting graphs to folder: .export\window 21 every 2


Exporting graphs: 100%|██████████| 85/85 [02:57<00:00,  2.09s/it]


All graphs have been exported to .export\window 21 every 2
Exporting graphs to folder: .export\window 21 every 3


Exporting graphs: 100%|██████████| 57/57 [02:02<00:00,  2.14s/it]


All graphs have been exported to .export\window 21 every 3
Exporting graphs to folder: .export\window 21 every 7


Exporting graphs: 100%|██████████| 25/25 [00:51<00:00,  2.06s/it]


All graphs have been exported to .export\window 21 every 7
Exporting graphs to folder: .export\window 28 every 2


Exporting graphs: 100%|██████████| 81/81 [03:39<00:00,  2.70s/it]


All graphs have been exported to .export\window 28 every 2
Exporting graphs to folder: .export\window 28 every 4


Exporting graphs: 100%|██████████| 41/41 [01:32<00:00,  2.26s/it]


All graphs have been exported to .export\window 28 every 4
Exporting graphs to folder: .export\window 28 every 7


Exporting graphs: 100%|██████████| 24/24 [00:54<00:00,  2.29s/it]


All graphs have been exported to .export\window 28 every 7
Exporting graphs to folder: .export\window 28 every 14


Exporting graphs: 100%|██████████| 13/13 [00:29<00:00,  2.25s/it]

All graphs have been exported to .export\window 28 every 14



