In [9]:
import sys
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter 
import powerlaw
import os
import random
print(os.getcwd())
import importlib
from utils import *


c:\Priyanka\dynamic-infmax-gnn-lstm\code


In [11]:
# Specify the full path to utils.py
path_to_utils = 'c:/Priyanka/dynamic-infmax-gnn-lstm/code/utils.py'

# Load the module
spec = importlib.util.spec_from_file_location("utils", path_to_utils)
utils = importlib.util.module_from_spec(spec)
sys.modules["utils"] = utils
spec.loader.exec_module(utils)

In [2]:
def graph_details(graph_dir):
    # Iterate over the gpickle files in the directory
    for file_name in os.listdir(graph_dir):
        if file_name.endswith('.gpickle'):
            # Construct the full file path
            file_path = os.path.join(graph_dir, file_name)
            
            # Load the graph from the gpickle file
            graph = nx.read_gpickle(file_path)
            
            # Get the number of nodes and edges
            num_nodes = graph.number_of_nodes()
            num_edges = graph.number_of_edges()
            
            # Print the number of nodes and edges
            print(f"Graph: {file_name}", "Number of nodes & edges :", num_nodes,  num_edges)
    return 1

# Define the directory where your graph data files are located

data_dir = "C:\\Priyanka\\dynamic-infmax-gnn-lstm\\data\\Oregon-data\\graphs\\"
graph_details(data_dir)

Graph: snapshot_0.gpickle Number of nodes & edges : 11492 22002
Graph: snapshot_1.gpickle Number of nodes & edges : 11492 21999
Graph: snapshot_2.gpickle Number of nodes & edges : 11492 22469
Graph: snapshot_3.gpickle Number of nodes & edges : 11492 22747
Graph: snapshot_4.gpickle Number of nodes & edges : 11492 22493
Graph: snapshot_5.gpickle Number of nodes & edges : 11492 22607
Graph: snapshot_6.gpickle Number of nodes & edges : 11492 22677
Graph: snapshot_7.gpickle Number of nodes & edges : 11492 22724
Graph: snapshot_8.gpickle Number of nodes & edges : 11492 23409


1

In [3]:
# get IFC centrality score for each node
def get_inflcapapcity(g, uniinfweight):
    nodelist = list(g.nodes)
    il = np.zeros((len(nodelist), 1))
    ig = np.zeros((len(nodelist), 1))

    degn = max([nx.degree(g, ind) for ind in g.nodes])

    for countnode in range(len(nodelist)):
        tempw = 0
        for neighbnode in g.neighbors(nodelist[countnode]):
            tempw = tempw + uniinfweight * uniinfweight * nx.degree(g, neighbnode)

        # local score
        il[nodelist[countnode]] = 1 + list(g.degree([nodelist[countnode]], weight='weight'))[0][1] + tempw

        # global score
        ig[nodelist[countnode]] = nx.core_number(g)[nodelist[countnode]] * (
                    1 + (nx.degree(g, nodelist[countnode])) / (degn))

    # overall score
    ic = np.array([(il[nodelist[countnode]] / np.max(il)) * (ig[nodelist[countnode]] / np.max(ig)) for countnode in
                    range(len(nodelist))])

    return ic

In [4]:
def top_influence_capacity_score(ifc_score):
    #ifc_score = get_inflcapapcity(graph, 0.5)
    threshold = np.percentile(ifc_score, 40)
    print(f"The 90% quartile threshold for IFC scores is: {threshold}")
    top_influencers_indices = [index for index, score in enumerate(ifc_score) if score >= threshold]
    #print(top_influencers_indices)
    print("len", len(top_influencers_indices))
    return top_influencers_indices


In [12]:
def compute_spread_for_all_graphs(data_dir, graphs, k=10, p=0.2, mc=100, uniinfweight=1):
    """
    Computes the spread and IFC scores for a list of graphs.

    Parameters:
    - graphs (list): List of networkx graph objects.
    - k (int): Number of seed nodes to select.
    - p (float): Propagation probability.
    - mc (int): Number of Monte-Carlo simulations.
    - uniinfweight (int): Uniform influence weight for IFC score computation.

    Returns:
    - results (dict): A dictionary containing results for each graph.
    - ifc_scores_dict (dict): A dictionary containing IFC scores for each node in each graph.
    """
    results = {}
    ifc_scores_dict = {}

    for i, r in enumerate(graphs):
        g =nx.read_gpickle(data_dir + graphs[i])
        print(graphs[i])
        nodes_g, edges_g = g.number_of_nodes(), g.number_of_edges()
        print(f"Computing spread for Graph {i + 1} with {len(g.nodes())} nodes and {len(g.edges())} edges...")
        
        # Compute spread with the greedy algorithm
        seeds_greedy, spread_greedy, greedy_time= utils.greedy(g, k, p, mc)

        # Compute IFC scores
        ifc_scores = get_inflcapapcity(g, uniinfweight)
        ifc_scores_dict[f"Graph_{i + 1}"] = ifc_scores

        # Convert IFC scores to a list to find candidate nodes based on top IFC scores
        # Assuming we use IFC scores to determine candidate nodes for greedy_mod
        # Here we consider top nodes based on IFC scores as candidates
        candidate_nodes = top_influence_capacity_score(ifc_scores)

        # Compute spread with the greedy_mod algorithm using nodes selected based on IFC scores
        seeds_greedy_mod, spread_greedy_mod, ifc_time = utils.greedy_mod(g, k, candidate_nodes , p, mc)

        # Store results in a structured dictionary
        results[f"Graph_{i + 1}"] = {
            "greedy_spread": spread_greedy,
            "greedy_time": greedy_time,
            "ifc_time": ifc_time,
            "greedy_mod_spread": spread_greedy_mod
        }

    return results, ifc_scores_dict

# Note: Replace `utils.greedy` and `utils.greedy_mod` with just `greedy` and `greedy_mod`
# if you are defining these functions in the same script or have imported them correctly.


In [13]:
# List the .gpickle files in the data directory
graph_paths = [f for f in os.listdir(data_dir) if f.endswith(".gpickle")]
sorted_graph_paths = sorted(graph_paths, key=lambda x: int(x.split('_')[-1].split('.')[0]))  # Assuming filenames contain index


# # Iterate through subsequent graphs
# for i in range(0, len(sorted_graph_paths)):
#     g1 =nx.read_gpickle(data_dir + sorted_graph_paths[i])
#     nodes_g1, edges_g1 = g1.number_of_nodes(), g1.number_of_edges()

In [None]:
ouput = compute_spread_for_all_graphs(data_dir, sorted_graph_paths , k= 5 , p=0.5, mc=1, uniinfweight=1)

snapshot_0.gpickle
Computing spread for Graph 1 with 11492 nodes and 22002 edges...


KeyboardInterrupt: 

: 

In [None]:
results, infc_all_graphs = ouput

# # Convert the dictionary to a DataFrame
# df = pd.DataFrame(results)

# # Export the DataFrame to an Excel file
# excel_path = 'spread_100_mc.xlsx'  # Specify your path and file name
# df.to_excel(excel_path, index=False, engine='openpyxl')