In [144]:
def read_graph(file_path):
    graph = {}
    all_nodes = set()  # 用于记录所有出现过的节点
    with open(file_path, 'r') as file:
        for line in file:
            # 跳过注释行（# 或其他符号）
            if line.startswith('#'):
                continue
            
            # 读取边的起点和终点
            source, target = map(int, line.strip().split())
            
            # 记录所有节点
            all_nodes.update([source, target])
            
            # 构建邻接列表（有向图逻辑）
            if source not in graph:
                graph[source] = []
            if target not in graph[source]:  # 避免重复边
                graph[source].append(target)
    
    # 添加孤立节点（没有出边的节点）
    for node in all_nodes:
        if node not in graph:
            graph[node] = []
    
    return graph

def print_graph_info(graph):
    # 节点数：邻接表中的所有键值对数量
    num_nodes = len(graph)
    
    # 边数：计算所有出边的数量总和
    num_edges = sum(len(targets) for targets in graph.values())
    
    print(f"Number of nodes: {num_nodes}")
    print(f"Number of edges: {num_edges}")


file_path = 'dataset/email-Enron.txt'
file_path ='dataset/amazon0601.txt'
graph = read_graph(file_path)

# 打印节点和边数
print_graph_info(graph)


Number of nodes: 403394
Number of edges: 3387388


In [145]:
# def degree_discount_ic(graph, k, p):
#     """
#     使用度折扣启发式算法选择种子节点以最大化影响力传播。

#     参数:
#     - graph: dict，表示图的邻接列表，key 是节点，value 是该节点的邻居列表。
#     - k: int，要选择的种子节点数量。
#     - p: float，传播概率。

#     返回:
#     - seeds: list，包含选出的种子节点。
#     """
#     # 初始化
#     degree = {v: len(neighbors) for v, neighbors in graph.items()}  # 每个节点的度
#     t = {v: 0 for v in graph}  # 每个节点已选为种子的邻居数
#     dd = degree.copy()  # 初始化度折扣值
#     seeds = []  # 存放种子节点

#     for _ in range(k):
#         # 选择度折扣值最大的节点作为新种子
#         u = max(dd, key=dd.get)
#         seeds.append(u)
#         print(f"选出的种子节点: {u}")  # 打印每次选出的种子节点

#         # 更新新种子节点的邻居的度折扣值
#         for v in graph[u]:
#             if v not in seeds:  # 如果邻居节点尚未被选为种子
#                 t[v] += 1  # 增加该节点的已选种子邻居数
#                 dd[v] = degree[v] - 2 * t[v] - (degree[v] - t[v]) * t[v] * p  # 更新度折扣值

#         # 移除已经选为种子的节点的度折扣值
#         dd.pop(u)

#     return seeds




# # 参数
# k = 50  
# p = 0.01  # 传播概率

# seeds = degree_discount_ic(graph, k, p)
# # print("选择的种子节点:", seeds)


In [146]:
import heapq

def degree_discount_ic_heap(graph, k, p):
    """
    使用度折扣启发式算法，并利用堆优化，选择种子节点以最大化影响力传播。

    参数:
    - graph: dict，图的邻接列表，key 是节点，value 是邻居列表。
    - k: int，要选择的种子节点数量。
    - p: float，传播概率。

    返回:
    - seeds: list，包含选出的种子节点。
    """
    # 初始化
    degree = {v: len(neighbors) for v, neighbors in graph.items()}  # 每个节点的度
    t = {v: 0 for v in graph}  # 每个节点已选为种子的邻居数
    dd = degree.copy()  # 度折扣值
    heap = []  # 使用最小堆（存储负值模拟最大堆）
    seeds = []  # 最终的种子节点集合
    
    # 构建初始堆
    for v, val in dd.items():
        heapq.heappush(heap, (-val, v))  # 堆存储 (-度折扣值, 节点)
    
    # 贪婪选择种子节点
    for _ in range(k):
        while True:
            # 弹出堆顶，获取最大度折扣值节点
            _, u = heapq.heappop(heap)
            # 检查是否是最新的度折扣值
            if -dd[u] == _:
                break  # 最新的值，继续执行
            # 否则跳过，因为它是旧值
        
        # 添加种子节点
        seeds.append(u)
        print(f"选出的种子节点: {u}") 
        # 更新邻居节点的度折扣值
        for v in graph[u]:
            if v not in seeds:  # 如果邻居节点尚未被选为种子
                t[v] += 1  # 更新该节点已选种子邻居数
                # 计算新的度折扣值
                dd[v] = degree[v] - 2 * t[v] - (degree[v] - t[v]) * t[v] * p
                # 将更新后的节点重新加入堆
                heapq.heappush(heap, (-dd[v], v))  # 存储负值
    
    return seeds
# 参数
k = 50  
p = 0.01  # 传播概率

seeds1 = degree_discount_ic_heap(graph, k, p)


选出的种子节点: 0
选出的种子节点: 11
选出的种子节点: 22
选出的种子节点: 32
选出的种子节点: 43
选出的种子节点: 44
选出的种子节点: 45
选出的种子节点: 46
选出的种子节点: 49
选出的种子节点: 53
选出的种子节点: 54
选出的种子节点: 56
选出的种子节点: 59
选出的种子节点: 60
选出的种子节点: 64
选出的种子节点: 65
选出的种子节点: 66
选出的种子节点: 68
选出的种子节点: 69
选出的种子节点: 72
选出的种子节点: 74
选出的种子节点: 79
选出的种子节点: 82
选出的种子节点: 83
选出的种子节点: 85
选出的种子节点: 86
选出的种子节点: 92
选出的种子节点: 94
选出的种子节点: 95
选出的种子节点: 125
选出的种子节点: 135
选出的种子节点: 144
选出的种子节点: 145
选出的种子节点: 148
选出的种子节点: 157
选出的种子节点: 158
选出的种子节点: 160
选出的种子节点: 170
选出的种子节点: 171
选出的种子节点: 172
选出的种子节点: 177
选出的种子节点: 182
选出的种子节点: 184
选出的种子节点: 185
选出的种子节点: 186
选出的种子节点: 187
选出的种子节点: 188
选出的种子节点: 189
选出的种子节点: 190
选出的种子节点: 197


In [147]:
import heapq
import random

def linear_threshold_degree_discount(graph, k):
    """
    Implements a degree discount heuristic for the Linear Threshold (LT) model.
    
    Parameters:
    - graph: dict, adjacency list where key is a node, and value is a list of its neighbors.
    - k: int, number of seed nodes to select.
    
    Returns:
    - seeds: list, selected seed nodes.
    """
    # Step 1: Initialize thresholds for all nodes (randomly chosen from [0, 1])
    thresholds = {v: random.uniform(0, 1) for v in graph}
    
    # Step 2: Initialize data structures
    influence_received = {v: 0 for v in graph}  # Tracks total influence received by each node
    degree = {v: len(graph[v]) for v in graph}  # Degree of each node
    dd = degree.copy()  # Discounted degree values
    seeds = []  # Selected seed nodes
    heap = []  # Max-heap for selecting nodes (negative value for max-heap behavior)
    
    # Step 3: Build initial heap based on node degrees
    for v, val in dd.items():
        heapq.heappush(heap, (-val, v))  # Push (-degree, node)
    
    # Step 4: Greedy seed selection
    for _ in range(k):
        while True:
            # Pop the node with the highest discounted degree
            _, u = heapq.heappop(heap)
            
            # Validate if the value is up-to-date
            if -dd[u] == _:
                break  # Up-to-date, continue
            
        # Add the selected node to the seed set
        seeds.append(u)
        
        # Update influence for neighbors of the selected node
        for neighbor in graph[u]:
            if neighbor not in seeds:  # Skip already selected nodes
                # Update influence received by the neighbor
                influence_received[neighbor] += 1 / degree[u]  # Assuming uniform weight
                
                # Check if the neighbor is already activated
                if influence_received[neighbor] >= thresholds[neighbor]:
                    continue  # Skip, as it's already activated
                
                # Update discounted degree for the neighbor
                dd[neighbor] = degree[neighbor] - influence_received[neighbor]
                heapq.heappush(heap, (-dd[neighbor], neighbor))  # Push updated value to heap
    
    return seeds


    
k = 50 # Number of seeds to select
seeds2 = linear_threshold_degree_discount(graph, k)
print("Selected seed nodes:", seeds2)


Selected seed nodes: [0, 11, 13, 24, 25, 26, 30, 32, 35, 38, 43, 44, 45, 46, 49, 53, 54, 56, 59, 60, 72, 86, 92, 94, 95, 99, 109, 112, 113, 116, 121, 125, 126, 127, 129, 131, 134, 135, 137, 138, 141, 144, 145, 148, 149, 150, 152, 154, 157, 158]


In [148]:
import random

def influence_spread(graph, seeds, p):
    """
    Simulate influence propagation and calculate the spread.

    Parameters:
    - graph: dict, adjacency list representation of the graph.
    - seeds: list, initial seed nodes.
    - p: float, propagation probability.

    Returns:
    - int: Total number of activated nodes.
    """
    active_nodes = set(seeds)  # Set of nodes already activated.
    newly_active = set(seeds)  # Nodes activated in the current step.

    while newly_active:
        next_newly_active = set()
        for node in newly_active:
            for neighbor in graph[node]:
                # Activate the neighbor with probability p if it is not already active
                if neighbor not in active_nodes and random.random() < p:
                    active_nodes.add(neighbor)
                    next_newly_active.add(neighbor)
        newly_active = next_newly_active  # Update for the next round

    return len(active_nodes)

In [154]:
p = 0.2  # 传播概率



    # 使用 influence_spread 模拟传播范围
spread = influence_spread(graph, seeds1, p)
print("最终传播影响范围的节点数:", spread)


最终传播影响范围的节点数: 62193


In [153]:
p = 0.2  # 传播概率



    # 使用 influence_spread 模拟传播范围
spread = influence_spread(graph, seeds2, p)
print("最终传播影响范围的节点数:", spread)

最终传播影响范围的节点数: 59033
