In [None]:
# Import packages - Modernized imports with type hints and additional libraries
import pandas as pd
import networkx as nx
import nx_parallel as nxp
from networkx.algorithms import community
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import math
import json
import os
import hashlib
import datetime
from pyvis.network import Network
import time
from multiprocessing import Pool
from collections import defaultdict
from typing import Optional, Dict, List, Tuple

# Enable parallel backend globally for NetworkX
# Set this to the number of CPU cores you want to utilize.
# A safe default is often (os.cpu_count() or 4)
nx.config.backends.parallel.active = True
nx.config.backends.parallel.n_jobs = (os.cpu_count() or 4) - 1

# =================================== CENTRALIZED CONFIGURATION SYSTEM ===================================
# Comprehensive CONFIG dictionary structure for different analysis aspects
# This replaces all hardcoded parameters throughout the notebook

CONFIG = {
    # --- INPUT/OUTPUT CONFIGURATION ---
    "input_file": "followers_following.json",  # Path to input data file (JSON format preferred)
    "output_file_prefix": "Output/FollowWeb",   # Base path for all output files
    
    # --- PIPELINE CONFIGURATION ---
    "pipeline": {
        # Analysis strategy selection:
        # 1. "k-core": (Default) Prunes the full L1+L2 graph. Good for general overview.
        # 2. "reciprocal_k-core": Filters for mutuals (A follows B AND B follows A) then prunes. Good for finding "real" friend groups.
        # 3. "ego_alter_k-core": Creates a graph of only your L1 contacts, connected if they follow each other. Good for analyzing your immediate circle.
        "strategy": "k-core",    # Options: "k-core", "reciprocal_k-core", "ego_alter_k-core"
        
        # Skip computationally expensive structural analysis (community detection, centrality)
        "skip_analysis": False,  # Set to True to skip ALL computationally expensive structural analysis
        
        # Required for "ego_alter_k-core" strategy - the central node (you)
        "ego_username": "_alexs.life"  # Must be set if using "ego_alter_k-core"
    },

    # --- ANALYSIS CONFIGURATION ---
    "analysis": {
        # Specific username to find a path to (must be in your followers_following.json file)
        # Set to "" or None to disable manual path finding
        "contact_path_target": None,
    },

    # --- FAME ANALYSIS CONFIGURATION ---
    "fame_analysis": {
        # Find contact paths to every famous account identified
        "find_paths_to_all_famous": True, 
        
        # Minimum followers within your L1/L2 network for an account to be considered
        "min_followers_in_network": 5, 
        
        # Minimum ratio of (followers / following) to be considered famous
        # (e.g., 5.0 means 5 followers for every 1 person they follow)
        "min_fame_ratio": 5.0 
    },

    # --- PRUNING CONFIGURATION ---
    "pruning": {
        # Strategy-specific k-values (minimum connections required)
        # Nodes with fewer connections than this will be removed
        "k_values": {
            "k-core": 1,              # Conservative pruning for full network
            "reciprocal_k-core": 6,   # More aggressive pruning for mutual connections
            "ego_alter_k-core": 3,    # Moderate pruning for ego network
        },
        "default_k_value": 2  # Fallback if strategy name is incorrect
    },
    
    # --- VISUALIZATION CONFIGURATION ---
    "visualization": {
        # --- Shared Settings for Both HTML and PNG ---
        "node_size_metric": "degree",        # Options: "degree", "betweenness", "eigenvector"
        "base_node_size": 6,                 # Base size for nodes
        "node_size_multiplier": 5,           # Multiplier for node size scaling
        "scaling_algorithm": "logarithmic",  # Options: "logarithmic", "linear"
        
        "base_edge_width": 0.5,              # Base width for edges
        "edge_width_multiplier": 2,          # Multiplier for edge width scaling
        "edge_width_scaling": "logarithmic", # Options: "logarithmic", "linear"
        "intra_community_color": "#c0c0c0",  # Gray color for within-community edges
        "bridge_color": "#6e6e6e",           # Darker gray for between-community edges

        # --- Interactive HTML Visualization (Pyvis) Configuration ---
        "pyvis_interactive": { 
            "width": "100%",                 # Canvas width
            "height": "90vh",                # Canvas height
            "notebook": False,               # Set to True for Jupyter notebook display
            "show_labels": True,             # Set to False to hide node names for faster rendering
            "show_tooltips": True,           # Set to False to disable hover tooltips for faster loading
            "physics_solver": "forceAtlas2Based",  # Physics simulation algorithm
        },

        # --- Static PNG Image Configuration ---
        "static_image": {
            "generate": True,                # Set to True to generate static PNG images
            "layout": "spring",             # Options: "spring", "kamada_kawai", "circular", "shell"
            "with_labels": False,            # Labels are often too cluttered on static graphs
            "font_size": 8,                 # Font size for labels (if enabled)
            "image_size_inches": (25, 25),   # Image dimensions (width, height) in inches
            "dpi": 300,                     # Dots Per Inch - higher for better quality
            "spring_k": 0.3,               # Spring layout parameter (adjusts node repulsion)
            "spring_iterations": 50,        # Number of iterations for spring layout
            "edge_alpha": 0.3,              # Edge transparency (0.0 to 1.0)
            "node_alpha": 0.8,              # Node transparency (0.0 to 1.0)
            "edge_arrow_size": 8,            # Size of arrow heads on edges
            "show_legend": True              # Include legend in static images
        }
    }
}

# Print configuration summary
print("=== CONFIGURATION LOADED ===")
print(f"Strategy: {CONFIG['pipeline']['strategy']}")
print(f"Input file: {CONFIG['input_file']}")
print(f"Output prefix: {CONFIG['output_file_prefix']}")
print(f"Skip analysis: {CONFIG['pipeline']['skip_analysis']}")
print(f"K-value for {CONFIG['pipeline']['strategy']}: {CONFIG['pruning']['k_values'].get(CONFIG['pipeline']['strategy'], CONFIG['pruning']['default_k_value'])}")
print("============================\n")

#=================================== CONSTRUCT GRAPH FROM DATA FILE =====================================
# Note: The data file path is now configured via CONFIG['input_file']
DATA_FILE = CONFIG['input_file']

# Initialise packages
G = nx.DiGraph()
net = Network(700, 700, directed=True, notebook=False) # For jupyter notebook = True

# Open and read the data file
try:
    with open(DATA_FILE, "r") as f: # open file in read mode
        lines = f.readlines() #Make an array containing each line
except FileNotFoundError:
    print(f"Error: Data file '{DATA_FILE}' not found. Please create this file with your network data.")
    exit()

# Process the file line by line to build the network
i = 0
while (i < len(lines)):
    # Scan in from file, ensuring we don't read past the end of the file
    if i + 2 < len(lines):
        curProfile = lines[i].strip()
        curFollowerList = lines[i+1].strip().split(",")
        curFolloweeList = lines[i+2].strip().split(",")

        # Remove empty strings that can result from empty follower/followee lists
        if curFollowerList == ['']: curFollowerList = []
        if curFolloweeList == ['']: curFolloweeList = []
        
        # Create edges based on follower/followee relationships
        for follower in curFollowerList:
            if follower: # Ensure follower is not an empty string
                G.add_edge(follower, curProfile) # Add edge to networkx
        
        for followee in curFolloweeList:
            if followee: # Ensure followee is not an empty string
                G.add_edge(curProfile, followee) # Add edge to networkx
    
    i += 3

print("Graph constructed from data file.")

#============================================ PROCESS NETWORK ==============================================
PrintStats = True #Print stats about network before and after processing 
MinimumNumConnections = 0 #Set the minimum number of node connections, 0 for off 
MinimumNumConnectionsAggressive = 6 #Set the minimum number of node connections, 0 for off 
DeleteAccountConnections = '' #Delete nodes connected to specified users account
DeleteAccountConnectionsExFirst = '' #Delete nodes connected to specified users account except for first ring
RemoveUser = '' #Remove a user
RemovePopular = 0 #Remove celebrity and meme accounts that dont follow more than x people back, 0 if off (should be less than min connections)

def numEdges(nodeID):
    return len(G.in_edges(nodeID)) + len(G.out_edges(nodeID))
def listEdges(nodeID):
    l1 = G.in_edges(nodeID)
    l2 = G.out_edges(nodeID)
    new = set(l2) - set(l1)
    l = list(l1 + list(new))
    return l
    
#Remove popular celebrities and meme accounts who dont follow people back 
if (RemovePopular != 0):
    popRemoved = 0
    numNodes = G.number_of_nodes() #Work out how many nodes
    allNodes = list(G.nodes) # list all node names
    for i in range (0, numNodes):
        if (len(G.out_edges(allNodes[i])) < RemovePopular): #Check if node has under x edges followings
            G.remove_node(allNodes[i]) #Remove it 
            popRemoved += 1
    print ("(RemovePopular) Number of celebs removed: ", popRemoved)

#Remove nodes with under x connection not connected to original account
deletedNodesTot = 0
if (MinimumNumConnections != 0):
    numNodes = G.number_of_nodes() #Work out how many nodes
    allNodes = list(G.nodes) # list all node names
    for i in range (0, numNodes):
        if (numEdges(allNodes[i]) <= MinimumNumConnections): #Check if node has less than 2 connections
            G.remove_node(allNodes[i]) #Remove it 
            deletedNodesTot += 1
    print ("(MinConnections) Number of nodes removed: ", deletedNodesTot)
                
#Remove nodes with under x connection not connected to original account repeats until all over the set number
deletedNodes = 1
deletedNodesTot = 0
if (MinimumNumConnectionsAggressive != 0):
    while (deletedNodes != 0): #Repeat until new nodes arent being deleted 
        deletedNodes = 0
        numNodes = G.number_of_nodes() #Work out how many nodes
        allNodes = list(G.nodes) # list all node names
        for i in range (0, numNodes):
            if (numEdges(allNodes[i]) <= MinimumNumConnectionsAggressive): #Check if node has less than 2 connections
                G.remove_node(allNodes[i]) #Remove it 
                deletedNodes += 1
                deletedNodesTot += 1
    print ("(MinConnectionsAggressive) Number of nodes removed: ", deletedNodesTot)
        
#Remove nodes connected to selected user
deletedNodesTot = 1
if (DeleteAccountConnections != ''):
    connectionsToMain = listEdges(DeleteAccountConnections) #Convert the object to a list so its subscriptale
    for i in range (0, len(connectionsToMain)):
        G.remove_node(connectionsToMain[i][1]) #Remove nodes connected to user
        deletedNodesTot += 1
    
    G.remove_node(DeleteAccountConnections) #Remove users node
    
    #Remove connectionless nodes 
    numNodes = G.number_of_nodes() #Work out how many nodes
    allNodes = list(G.nodes) # list all node names
    for i in range (0, numNodes):
        if (numEdges(allNodes[i]) == 0): #Check if node has 0 connections
            G.remove_node(allNodes[i]) #Remove it 
            deletedNodesTot += 1
    print ("(DelteAccountConnections) Number of nodes connected to ", DeleteAccountConnections, " removed: ", deletedNodesTot)
    
#Remove nodes connected to selected user that arent one of originals 
deletedNodesTot = 1
if (DeleteAccountConnectionsExFirst != ''):
    connectionsToMain = listEdges(DeleteAccountConnectionsExFirst) #Convert the object to a list so its subscriptale
    for i in range (0, len(connectionsToMain)):
        if not(connectionsToMain[i][1] in recScanned):
            G.remove_node(connectionsToMain[i][1]) #Remove nodes connected to user
            deletedNodesTot += 1
    
    G.remove_node(DeleteAccountConnectionsExFirst) #Remove users node
    
    #Remove connectionless nodes 
    numNodes = G.number_of_nodes() #Work out how many nodes
    allNodes = list(G.nodes) # list all node names
    for i in range (0, numNodes):
        if (numEdges(allNodes[i]) == 0): #Check if node has 0 connections
            G.remove_node(allNodes[i]) #Remove it 
            deletedNodesTot += 1
    print ("(DelteAccountConnectionsExFirst) Number of nodes connected to ", DeleteAccountConnectionsExFirst, " removed: ", deletedNodesTot)
    
#Remove a selected user
if (RemoveUser != ''):
    G.remove_node(RemoveUser) #Remove users node
    
    #Remove connectionless nodes 
    numNodes = G.number_of_nodes() #Work out how many nodes
    allNodes = list(G.nodes) # list all node names
    for i in range (0, numNodes):
        if (numEdges(allNodes[i]) == 0): #Check if node has 0 connections
            G.remove_node(allNodes[i]) #Remove it 
            
    print ("(RemoveUser) Removed node: ", RemoveUser)

#======================================== NETWORKX TO PYVIS =============================================
#Aesthetic Options
sizeByConnections = 1 #Change a nodes size by number of connections 

# Add nodes and edges from the processed NetworkX graph to the Pyvis network
net.from_nx(G)

# Apply size scaling if enabled
if sizeByConnections:
    for node in net.nodes:
        node['size'] = (numEdges(node['id'])/50)+9

# Set physics options for the visualization
net.force_atlas_2based(gravity=-50, central_gravity=0.01, spring_length=100, spring_strength=0.07, damping=0.8, overlap=1)
net.show_buttons(filter_=['physics'])

# Generate and show the HTML file
net.save_graph("FollowWeb.html")
print("Visualization complete. Check 'FollowWeb.html'.")
