In [6]:
import networkx as nx
import pandas as pd
import random

In [7]:
def estimate_influence_spread(G, seed_set, num_simulations=10):
    def simulate_diffusion(G, seed_set):
        active_nodes = set(seed_set)
        newly_active_nodes = set(seed_set)
        while newly_active_nodes:
            next_newly_active_nodes = set()
            for node in newly_active_nodes:
                neighbors = set(G.neighbors(node))
                for neighbor in neighbors - active_nodes:
                    if random.random() < 0.1:  # Activation probability
                        next_newly_active_nodes.add(neighbor)
            active_nodes.update(next_newly_active_nodes)
            newly_active_nodes = next_newly_active_nodes
        return len(active_nodes)

    total_spread = 0
    for _ in range(num_simulations):
        total_spread += simulate_diffusion(G, seed_set)
    return total_spread / num_simulations

In [8]:
def influence_maximization(G, k):
    seed_set = set()
    for _ in range(k):
        best_node = None
        best_spread = 0
        for node in G.nodes:
            if node not in seed_set:
                temp_set = seed_set | {node}
                spread = estimate_influence_spread(G, temp_set)
                if spread > best_spread:
                    best_spread = spread
                    best_node = node
        if best_node is not None:
            seed_set.add(best_node)
    return seed_set

In [9]:
df = pd.read_csv('../../facebook_clean_data/tvshow_edges.csv')
# Create a sample graph
G = nx.from_pandas_edgelist(df, 'node_1', 'node_2')

# Run greedy influence maximization
k = 3  # Number of nodes to select
selected_nodes = influence_maximization(G, k)

print(f"Selected nodes for influence maximization: {selected_nodes}")

Selected nodes for influence maximization: {672, 2434, 111}
