ModuleNotFoundError: No module named 'src'

In [1]:
# --- File: influence_analysis.ipynb ---
# Cell 1: Setup Paths and Imports using the ABSOLUTE PATH

import sys
import os

# Define the absolute path to your project's SRC folder.
# NOTE: YOU MUST PASTE YOUR ACTUAL PATH HERE, replacing the example below.
# Use double backslashes (\\) or forward slashes (/) for Windows paths.
ABSOLUTE_SRC_PATH = "C:/Users/hari2/Documents/DM/src" 

# Add the path to the system path list
sys.path.append(ABSOLUTE_SRC_PATH) 

# --- Verification ---
if os.path.isdir(ABSOLUTE_SRC_PATH):
    print("SUCCESS: SRC path added to system.")
else:
    print(f"ERROR: Path {ABSOLUTE_SRC_PATH} does not exist. Please correct the ABSOLUTE_SRC_PATH.")


import networkx as nx
import pandas as pd
# This import should now work!
from influence_metrics import compute_influence_metrics, get_top_k_influencers

SUCCESS: SRC path added to system.


In [2]:
# Step 2: Load the preprocessed Facebook graph
fb_file = "../data/processed/facebook_cleaned.edgelist" 
# NOTE: If this path fails (FileNotFoundError), you must first run your preprocessing script.
try:
    G_fb = nx.read_edgelist(fb_file, nodetype=int)
    print(f"Graph loaded for influence analysis: Nodes={G_fb.number_of_nodes()}, Edges={G_fb.number_of_edges()}")
except FileNotFoundError:
    print(f"ERROR: Graph file not found at {fb_file}. Please ensure your data preprocessing notebook was run successfully.")
    # Exit gracefully if the graph is missing
    G_fb = None


if G_fb:
    # Step 3: Compute all influence metrics (PageRank, Eigenvector, Betweenness)
    df_influence_scores = compute_influence_metrics(
        G_fb, 
        save_path="../results/influence_scores.csv"
    )

    # Step 4: Identify and display Top-20 Influencers
    top_influencers = get_top_k_influencers(df_influence_scores, k=20)

    # Step 5: Validation - Check correlation between metrics (as per project plan)
    df_influence_scores['Degree'] = pd.Series(dict(G_fb.degree()))

    # Calculate and display the correlation matrix
    correlation_matrix = df_influence_scores.corr()
    print("\n--- Correlation Matrix of Influence Metrics ---")
    print(correlation_matrix)
    correlation_matrix.to_csv("../reports/influence_correlation_matrix.csv")

else:
    print("Execution halted due to missing graph file.")

Graph loaded for influence analysis: Nodes=4039, Edges=88234
Computing PageRank...
Computing Eigenvector Centrality...
Computing Betweenness Centrality...
Metrics computed. Runtimes: PageRank=0.58s, Eigenvector=1.26s, Betweenness=146.32s
Influence scores saved to ../results/influence_scores.csv

--- Top 20 by PageRank ---
3437    0.007615
107     0.006936
1684    0.006367
0       0.006290
1912    0.003877
348     0.002348
686     0.002219
3980    0.002170
414     0.001800
698     0.001317
483     0.001297
3830    0.001184
376     0.000901
2047    0.000841
56      0.000804
25      0.000800
828     0.000789
322     0.000787
475     0.000785
428     0.000780
Name: PageRank, dtype: float64

--- Top 20 by Eigenvector_Centrality ---
1912    0.095407
2266    0.086983
2206    0.086052
2233    0.085173
2464    0.084279
2142    0.084193
2218    0.084156
2078    0.084136
2123    0.083671
1993    0.083532
2410    0.083518
2244    0.083342
2507    0.083273
2240    0.083057
2340    0.083053
2229    