In [18]:
import os
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import collections
from itertools import combinations

# Tasks in Part 1

select dataset

In [6]:
def get_file_size(filepath):
    return os.path.getsize(filepath)

In [7]:
def select_networks(directory):
    """
    Select 3 networks based on file size
    """
    # Filter CSV files
    csv_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.csv')]
    
    #  Sort by file size
    sorted_files = sorted(csv_files, key=get_file_size)
    # Select small, medium, large networks (first, middle, last)
    selected_networks = [
        sorted_files[0],  # Smallest network
        sorted_files[len(sorted_files)//2],  # Medium network
        sorted_files[-1]  # Largest network
    ]
    
    return selected_networks


In [8]:
dataset_dir = '/Users/wangfeifei/Desktop/network_data/datasets'
selected_networks = select_networks(dataset_dir)
print("Selected network datasets:")
for network in selected_networks:
    print(os.path.basename(network))

Selected network datasets:
BOT_REQUESTS.csv
ITEMS.csv
REQUEST_FOR_DELETION.csv


## Task A (network construction):

In [17]:

def build_network(filepath):
    """根据给定文件构建编辑网络"""
    df = pd.read_csv(filepath)
    G = nx.Graph()
    
    # 为每个page-thread组合生成边
    grouped = df.groupby(['page_name', 'thread_subject'])
    edges = set()
    
    for (page, thread), group in grouped:
        users = list(group['username'].unique())
        # 生成用户间的两两连接
        for u1, u2 in combinations(users, 2):
            if u1 != u2:
                edge = tuple(sorted((u1, u2)))  # 避免重复边
                edges.add(edge)
    
    # 添加带权重的边
    edge_counts = {}
    for edge in edges:
        edge_counts[edge] = edge_counts.get(edge, 0) + 1
    
    G.add_weighted_edges_from([(u, v, w) for (u, v), w in edge_counts.items()])
    return G

files = {
    "small": "/Users/wangfeifei/Desktop/network_data/datasets/BOT_REQUESTS.csv",
    "medium": "/Users/wangfeifei/Desktop/network_data/datasets/ITEMS.csv", 
    "large": "/Users/wangfeifei/Desktop/network_data/datasets/REQUEST_FOR_DELETION.csv"
}

# 构建三个网络
networks = {}
for size, path in files.items():
    print(f"正在构建 {size} 网络...")
    networks[size] = build_network(path)
    print(f"{size}网构建完成：节点数={networks[size].number_of_nodes()}, 边数={networks[size].number_of_edges()}\n")


正在构建 small 网络...
small网构建完成：节点数=527, 边数=2425

正在构建 medium 网络...
medium网构建完成：节点数=3539, 边数=7252

正在构建 large 网络...
large网构建完成：节点数=9887, 边数=33488



## Task B (network metrics)：

## Task C (epidemic models):

## Task D (comparing networks and social issues):

# Tasks in Part 2

## Task A (spatial networks and planarity):

## Task B (road accidents):

## Task C (Voronoi diagrams):

## Task D (TransE, PROV, PageRank):