In [164]:
import pandas as pd

def read_graph(file_path):
    graph = {}
    all_nodes = set()  # 用于记录所有出现过的节点
    
    with open(file_path, 'r') as file:
        for line in file:
            # 跳过注释行（# 或其他符号）
            if line.startswith('#'):
                continue
            
            # 读取边的起点和终点
            source, target = map(int, line.strip().split())
            
            # 记录所有节点
            all_nodes.update([source, target])
            
            # 构建邻接列表（有向图逻辑）
            if source not in graph:
                graph[source] = []
            
            graph[source].append(target)
    
    # 添加孤立节点（没有出边的节点）
    for node in all_nodes:
        if node not in graph:
            graph[node] = []
    
    return graph

def get_graph_info(graph):
    # 节点数：邻接表中的所有键值对数量
    num_nodes = len(graph)
    
    # 边数：计算所有出边的数量总和
    num_edges = sum(len(targets) for targets in graph.values())

    # 收集所有度数信息
    degrees = {node: len(targets) for node, targets in graph.items()}
    
    # 构建表格数据
    graph_info = {
        "Metric": ["Number of Nodes", "Number of Edges", "Average Degree"],
        "Value": [num_nodes, num_edges, round(num_edges / num_nodes, 2) if num_nodes > 0 else 0]
    }

    return graph_info, degrees

def print_graph_table(graph_info):
    df = pd.DataFrame(graph_info)
    print(df)




In [165]:
file_paths = [
    'dataset/amazon0601.txt',
    'dataset/web-Google.txt',
    'dataset/cit-Patents.txt',
    'dataset/soc-Epinions1.txt',
    'dataset/email-Eu-core.txt',
    'dataset/soc-Slashdot0811.txt',
    'dataset/wiki-Vote.txt',
    'dataset/as-skitter.txt'
]


In [166]:
import heapq

def degree_discount_IC(graph, k,p):
    """
    IC模型下使用度折扣启发式算法，并利用堆优化，选择种子节点以最大化影响力传播。

    参数:
    - graph: dict，图的邻接列表，key 是节点，value 是邻居列表。
    - k: int，要选择的种子节点数量。
    - p: float，传播概率。

    返回:
    - seeds: list，包含选出的种子节点。
    """
    # 初始化
    degree = {v: len(neighbors) for v, neighbors in graph.items()}  # 每个节点的度
    t = {v: 0 for v in graph}  # 每个节点已选为种子的邻居数
    dd = degree.copy()  # 度折扣值
    heap = []  # 使用最小堆（存储负值模拟最大堆）
    seeds = []  # 最终的种子节点集合
    
    # 构建初始堆
    for v, val in dd.items():
        heapq.heappush(heap, (-val, v))  # 堆存储 (-度折扣值, 节点)
    
    # 贪婪选择种子节点
    for _ in range(k):
        while True:
            # 弹出堆顶，获取最大度折扣值节点
            _, u = heapq.heappop(heap)
            # 检查是否是最新的度折扣值
            if -dd[u] == _:
                break  # 最新的值，继续执行
            # 否则跳过，因为它是旧值
        
        # 添加种子节点
        seeds.append(u)
        # print(f"选出的第{i}个种子节点: {u}")
        # 更新邻居节点的度折扣值
        for v in graph[u]:
            if v not in seeds:  # 如果邻居节点尚未被选为种子
                t[v] += 1  # 更新该节点已选种子邻居数
                # 计算新的度折扣值
                dd[v] = degree[v] - 2 * t[v] - (degree[v] - t[v]) * t[v] * p
                # 将更新后的节点重新加入堆
                heapq.heappush(heap, (-dd[v], v))  # 存储负值
    
    return seeds


In [167]:
import random

def influence_spread(graph, seeds, p):
    """
    Simulate influence propagation and calculate the spread.

    Parameters:
    - graph: dict, adjacency list representation of the graph.
    - seeds: list, initial seed nodes.
    - p: float, propagation probability.

    Returns:
    - int: Total number of activated nodes.
    """
    active_nodes = set(seeds)  # Set of nodes already activated.
    newly_active = set(seeds)  # Nodes activated in the current step.

    while newly_active:
        next_newly_active = set()
        for node in newly_active:
            for neighbor in graph[node]:
                # Activate the neighbor with probability p if it is not already active
                if neighbor not in active_nodes and random.random() < p:
                    active_nodes.add(neighbor)
                    next_newly_active.add(neighbor)
        newly_active = next_newly_active  # Update for the next round

    return len(active_nodes)

In [168]:
import heapq
import random

def degree_discount_LT(graph, k):
    """
    Implements a degree discount heuristic for the Linear Threshold (LT) model.
    
    Parameters:
    - graph: dict, adjacency list where key is a node, and value is a list of its neighbors.
    - k: int, number of seed nodes to select.
    
    Returns:
    - seeds: list, selected seed nodes.
    """
    # Step 1: Initialize thresholds for all nodes (randomly chosen from [0, 1])
    thresholds = {v: random.uniform(0, 1) for v in graph}

    # Step 2: Initialize data structures
    influence_received = {v: 0 for v in graph}  # Tracks total influence received by each node
    degree = {v: len(graph[v]) for v in graph}  # Degree of each node
    dd = degree.copy()  # Discounted degree values
    seeds = []  # Selected seed nodes
    heap = []  # Max-heap for selecting nodes (negative value for max-heap behavior)
    
    # Step 3: Build initial heap based on node degrees
    for v, val in dd.items():
        heapq.heappush(heap, (-val, v))  # Push (-degree, node)
    
    # Step 4: Greedy seed selection
    for _ in range(k):
        while True:
            # Pop the node with the highest discounted degree
            _, u = heapq.heappop(heap)
            
            # Validate if the value is up-to-date
            if -dd[u] == _:
                break  # Up-to-date, continue
            
        # Add the selected node to the seed set
        seeds.append(u)
         
        # Update influence for neighbors of the selected node
        for neighbor in graph[u]:
            if neighbor not in seeds:  # Skip already selected nodes
                # Update influence received by the neighbor
                influence_received[neighbor] += 1 / degree[u]  # Assuming uniform weight
                
                # Check if the neighbor is already activated
                if influence_received[neighbor] >= thresholds[neighbor]:
                    continue  # Skip, as it's already activated
                
                # Update discounted degree for the neighbor
                dd[neighbor] = degree[neighbor] - influence_received[neighbor]
                heapq.heappush(heap, (-dd[neighbor], neighbor))  # Push updated value to heap
    
    return seeds


In [169]:
import random

def influence_spread_LT(graph, seeds):
    """
    Simulate influence propagation in the Linear Threshold (LT) model.

    Parameters:
    - graph: dict, adjacency list representation of the graph.
    - seeds: list, initial seed nodes.

    Returns:
    - int: Total number of activated nodes.
    """
    # 初始化：激活的节点集合
    active_nodes = set(seeds)
    # 初始化：每个节点的阈值 theta_v
    thresholds = {node: random.random() for node in graph.keys()}
    # 初始化：当前轮激活的节点集合
    newly_active = set(seeds)

    while newly_active:
        next_newly_active = set()
        for node in graph.keys():
            if node not in active_nodes:  # 仅检查未激活的节点
                # 计算该节点的影响力
                activated_neighbors = [neighbor for neighbor in graph[node] if neighbor in active_nodes]
                influence = len(activated_neighbors) / len(graph[node]) if graph[node] else 0

                # 如果累计影响力超过阈值，则激活节点
                if influence >= thresholds[node]:
                    active_nodes.add(node)
                    next_newly_active.add(node)

        # 更新：当前轮激活节点集合
        newly_active = next_newly_active

    return len(active_nodes)


In [170]:
k=50
for file_path in file_paths:
    print(f"\nProcessing file: {file_path}")
    
    # Step 1: Read graph
    graph = read_graph(file_path)
    
    # Step 2: Get and display graph info
    graph_info, degrees = get_graph_info(graph)
    print_graph_table(graph_info)
    
    # Step 3: Run Degree Discount IC
    p = 0.1  # Influence spread propagation probability for IC model
    print("\n--- Degree Discount IC Results ---\n")
    start_time = time.time()
    seeds = degree_discount_IC(graph, k, p)  # Initial propagation probability for seed selection
    end_time = time.time()
    print(f"Selected seeds: {seeds}")
    print(f"运行时间: {end_time - start_time:.4f} 秒")
    
    spread = influence_spread(graph, seeds, p)
    print("最终传播影响范围的节点数 (IC模型):", spread)




    # Step 4: Run Degree Discount LT
    
    print("\n--- Degree Discount LT Results ---\n")
    start_time = time.time()
    seeds = degree_discount_LT(graph, k)  # LT model typically doesn't use p for seed selection
    end_time = time.time()
    print(f"Selected seeds: {seeds}")
    print(f"运行时间: {end_time - start_time:.4f} 秒")
    
    
    spread = influence_spread(graph, seeds,p)
    print("最终传播影响范围的节点数 (LT模型):", spread)



Processing file: dataset/amazon0601.txt
            Metric      Value
0  Number of Nodes   403394.0
1  Number of Edges  3387388.0
2   Average Degree        8.4

--- Degree Discount IC Results ---

Selected seeds: [0, 11, 22, 32, 43, 44, 45, 46, 49, 53, 54, 56, 59, 60, 64, 65, 66, 68, 69, 72, 74, 79, 82, 83, 85, 86, 92, 94, 95, 125, 135, 144, 145, 148, 157, 158, 160, 170, 171, 172, 177, 182, 184, 185, 186, 187, 188, 189, 190, 197]
运行时间: 0.1844 秒
最终传播影响范围的节点数 (IC模型): 161

--- Degree Discount LT Results ---

Selected seeds: [0, 6, 11, 16, 22, 32, 40, 43, 44, 45, 49, 53, 59, 60, 61, 62, 64, 65, 66, 68, 70, 72, 73, 75, 76, 79, 81, 82, 83, 85, 110, 112, 114, 119, 121, 122, 123, 124, 125, 133, 134, 136, 144, 145, 148, 156, 157, 158, 160, 161]
运行时间: 0.3163 秒
最终传播影响范围的节点数 (LT模型): 174

Processing file: dataset/web-Google.txt
            Metric       Value
0  Number of Nodes   875713.00
1  Number of Edges  5105039.00
2   Average Degree        5.83

--- Degree Discount IC Results ---

Selected se