In [None]:
#Import packages
import networkx as nx #Network packages
from pyvis.network import Network 

#=================================== CONSTRUCT GRAPH FROM DATA FILE =====================================
# Define the name of the data file
DATA_FILE = "followers_following.txt"

# Initialise packages
G = nx.DiGraph()
net = Network(700, 700, directed=True, notebook=False) # For jupyter notebook = True

# Open and read the data file
try:
    with open(DATA_FILE, "r") as f: # open file in read mode
        lines = f.readlines() #Make an array containing each line
except FileNotFoundError:
    print(f"Error: Data file '{DATA_FILE}' not found. Please create this file with your network data.")
    exit()

# Process the file line by line to build the network
i = 0
while (i < len(lines)):
    # Scan in from file, ensuring we don't read past the end of the file
    if i + 2 < len(lines):
        curProfile = lines[i].strip()
        curFollowerList = lines[i+1].strip().split(",")
        curFolloweeList = lines[i+2].strip().split(",")

        # Remove empty strings that can result from empty follower/followee lists
        if curFollowerList == ['']: curFollowerList = []
        if curFolloweeList == ['']: curFolloweeList = []
        
        # Create edges based on follower/followee relationships
        for follower in curFollowerList:
            if follower: # Ensure follower is not an empty string
                G.add_edge(follower, curProfile) # Add edge to networkx
        
        for followee in curFolloweeList:
            if followee: # Ensure followee is not an empty string
                G.add_edge(curProfile, followee) # Add edge to networkx
    
    i += 3

print("Graph constructed from data file.")

#============================================ PROCESS NETWORK ==============================================
PrintStats = True #Print stats about network before and after processing 
MinimumNumConnections = 0 #Set the minimum number of node connections, 0 for off 
MinimumNumConnectionsAggressive = 6 #Set the minimum number of node connections, 0 for off 
DeleteAccountConnections = '' #Delete nodes connected to specified users account
DeleteAccountConnectionsExFirst = '' #Delete nodes connected to specified users account except for first ring
RemoveUser = '' #Remove a user
RemovePopular = 0 #Remove celebrity and meme accounts that dont follow more than x people back, 0 if off (should be less than min connections)

def numEdges(nodeID):
    return len(G.in_edges(nodeID)) + len(G.out_edges(nodeID))
def listEdges(nodeID):
    l1 = G.in_edges(nodeID)
    l2 = G.out_edges(nodeID)
    new = set(l2) - set(l1)
    l = list(l1 + list(new))
    return l
    
#Remove popular celebrities and meme accounts who dont follow people back 
if (RemovePopular != 0):
    popRemoved = 0
    numNodes = G.number_of_nodes() #Work out how many nodes
    allNodes = list(G.nodes) # list all node names
    for i in range (0, numNodes):
        if (len(G.out_edges(allNodes[i])) < RemovePopular): #Check if node has under x edges followings
            G.remove_node(allNodes[i]) #Remove it 
            popRemoved += 1
    print ("(RemovePopular) Number of celebs removed: ", popRemoved)

#Remove nodes with under x connection not connected to original account
deletedNodesTot = 0
if (MinimumNumConnections != 0):
    numNodes = G.number_of_nodes() #Work out how many nodes
    allNodes = list(G.nodes) # list all node names
    for i in range (0, numNodes):
        if (numEdges(allNodes[i]) <= MinimumNumConnections): #Check if node has less than 2 connections
            G.remove_node(allNodes[i]) #Remove it 
            deletedNodesTot += 1
    print ("(MinConnections) Number of nodes removed: ", deletedNodesTot)
                
#Remove nodes with under x connection not connected to original account repeats until all over the set number
deletedNodes = 1
deletedNodesTot = 0
if (MinimumNumConnectionsAggressive != 0):
    while (deletedNodes != 0): #Repeat until new nodes arent being deleted 
        deletedNodes = 0
        numNodes = G.number_of_nodes() #Work out how many nodes
        allNodes = list(G.nodes) # list all node names
        for i in range (0, numNodes):
            if (numEdges(allNodes[i]) <= MinimumNumConnectionsAggressive): #Check if node has less than 2 connections
                G.remove_node(allNodes[i]) #Remove it 
                deletedNodes += 1
                deletedNodesTot += 1
    print ("(MinConnectionsAggressive) Number of nodes removed: ", deletedNodesTot)
        
#Remove nodes connected to selected user
deletedNodesTot = 1
if (DeleteAccountConnections != ''):
    connectionsToMain = listEdges(DeleteAccountConnections) #Convert the object to a list so its subscriptale
    for i in range (0, len(connectionsToMain)):
        G.remove_node(connectionsToMain[i][1]) #Remove nodes connected to user
        deletedNodesTot += 1
    
    G.remove_node(DeleteAccountConnections) #Remove users node
    
    #Remove connectionless nodes 
    numNodes = G.number_of_nodes() #Work out how many nodes
    allNodes = list(G.nodes) # list all node names
    for i in range (0, numNodes):
        if (numEdges(allNodes[i]) == 0): #Check if node has 0 connections
            G.remove_node(allNodes[i]) #Remove it 
            deletedNodesTot += 1
    print ("(DelteAccountConnections) Number of nodes connected to ", DeleteAccountConnections, " removed: ", deletedNodesTot)
    
#Remove nodes connected to selected user that arent one of originals 
deletedNodesTot = 1
if (DeleteAccountConnectionsExFirst != ''):
    connectionsToMain = listEdges(DeleteAccountConnectionsExFirst) #Convert the object to a list so its subscriptale
    for i in range (0, len(connectionsToMain)):
        if not(connectionsToMain[i][1] in recScanned):
            G.remove_node(connectionsToMain[i][1]) #Remove nodes connected to user
            deletedNodesTot += 1
    
    G.remove_node(DeleteAccountConnectionsExFirst) #Remove users node
    
    #Remove connectionless nodes 
    numNodes = G.number_of_nodes() #Work out how many nodes
    allNodes = list(G.nodes) # list all node names
    for i in range (0, numNodes):
        if (numEdges(allNodes[i]) == 0): #Check if node has 0 connections
            G.remove_node(allNodes[i]) #Remove it 
            deletedNodesTot += 1
    print ("(DelteAccountConnectionsExFirst) Number of nodes connected to ", DeleteAccountConnectionsExFirst, " removed: ", deletedNodesTot)
    
#Remove a selected user
if (RemoveUser != ''):
    G.remove_node(RemoveUser) #Remove users node
    
    #Remove connectionless nodes 
    numNodes = G.number_of_nodes() #Work out how many nodes
    allNodes = list(G.nodes) # list all node names
    for i in range (0, numNodes):
        if (numEdges(allNodes[i]) == 0): #Check if node has 0 connections
            G.remove_node(allNodes[i]) #Remove it 
            
    print ("(RemoveUser) Removed node: ", RemoveUser)

#======================================== NETWORKX TO PYVIS =============================================
#Aesthetic Options
sizeByConnections = 1 #Change a nodes size by number of connections 

# Add nodes and edges from the processed NetworkX graph to the Pyvis network
net.from_nx(G)

# Apply size scaling if enabled
if sizeByConnections:
    for node in net.nodes:
        node['size'] = (numEdges(node['id'])/50)+9

# Set physics options for the visualization
net.force_atlas_2based(gravity=-50, central_gravity=0.01, spring_length=100, spring_strength=0.07, damping=0.8, overlap=1)
net.show_buttons(filter_=['physics'])

# Generate and show the HTML file
net.save_graph("FollowWeb.html")
print("Visualization complete. Check 'FollowWeb.html'.")
