# Imports and Data

In [1]:
from typing import NamedTuple
import pandas as pd
from pathlib import Path
import networkx as nx
import matplotlib.pyplot as plt
from tqdm import tqdm
from functools import lru_cache


class CollegeMessage(NamedTuple):
    source: int  #source id
    target: int  # target id
    unix_timestamp: int  # seconds after last epoc


def read_message_data(datapath: str) -> list[CollegeMessage]:
    filepath = Path.cwd() / Path(datapath)

    if not filepath.exists():
        raise FileNotFoundError(
            f"File not found: {filepath}; " +
            f"Current working directory: {Path.cwd()}; " +
            f"Relative path to data file: {Path(datapath)}"
        )

    print(f"Using file: {filepath.absolute()}")

    messages = []
    with open(filepath, 'r', encoding="UTF-8") as f:
        for line in f:
            source, target, timestamp = line.strip().split()
            messages.append(CollegeMessage(int(source), int(target), int(timestamp)))
    print("done")
    return messages


def remove_low_degree_nodes(data: pd.DataFrame, min_degree: int) -> pd.DataFrame:
    in_degree = data['target'].value_counts()
    out_degree = data['source'].value_counts()
    high_degree_nodes = set(in_degree[in_degree >= min_degree].index) & set(out_degree[out_degree >= min_degree].index)
    return data[data['source'].isin(high_degree_nodes) & data['target'].isin(high_degree_nodes)]


def create_graph(data: pd.DataFrame) -> nx.DiGraph:
    digraph = nx.DiGraph()

    for row in data.itertuples(index=False):
        digraph.add_edge(row.source, row.target, timestamp=row.unix_timestamp)

    return digraph


def graph_figure(graph: nx.DiGraph):
    pos = nx.arf_layout(graph)
    nx.draw(graph, pos, with_labels=False, node_size=10)


# Layout calculations, cached by LRU to avoid recalculating the layout
@lru_cache(maxsize=None)
def cache_layout(graph: nx.DiGraph, layout_func: callable):
    print("Calculating layout ...")
    layout = layout_func(graph)
    print("... done calculating layout")
    return layout

In [2]:
_DATA_PATH = "./dataset/CollegeMsg.txt"

_MESSAGES = read_message_data(_DATA_PATH)

DF_MESSAGES = remove_low_degree_nodes(pd.DataFrame(_MESSAGES), 2)

GRAPH_MESSAGES = create_graph(DF_MESSAGES)

Using file: C:\Users\majerm\projects\local\rug-sna-pizza-giving\college_msg_files\dataset\CollegeMsg.txt
done


# Important Nodes

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Assuming DF_MESSAGES is already imported and GRAPH_MESSAGES is your directed graph

# Calculate betweenness centrality
betweenness = nx.betweenness_centrality(GRAPH_MESSAGES)

# Find the node with the highest betweenness centrality
max_betweenness_node = max(betweenness, key=betweenness.get)

# Draw the graph focusing on the highest betweenness centrality node
plt.figure(figsize=(12, 8))

# Layout for positioning the nodes
pos = cache_layout(GRAPH_MESSAGES, nx.arf_layout)

# Draw all nodes with default color
nx.draw_networkx_nodes(GRAPH_MESSAGES, pos, node_color='lightblue', node_size=50, alpha=0.7)

# Highlight the node with the highest betweenness centrality
nx.draw_networkx_nodes(GRAPH_MESSAGES, pos, nodelist=[max_betweenness_node], node_color='red', node_size=300, alpha=1.0)

# Draw edges
nx.draw_networkx_edges(GRAPH_MESSAGES, pos, alpha=0.3)

# Draw labels for better understanding (optional)
nx.draw_networkx_labels(GRAPH_MESSAGES, pos, labels={max_betweenness_node: max_betweenness_node}, font_size=10, font_color='black')

# Title for the plot
plt.title('Network Visualization Highlighting Node with Highest Betweenness Centrality')

# Display the graph
plt.show()


Calculating layout ...
