In [1]:
import pandas as pd
from pyvis.network import Network
import uuid

# Loading and processing the data
df = pd.read_excel('network_no self-join.xlsx')

# Cleaning data: ensuring Count is numeric and removing invalid rows
df['Count'] = pd.to_numeric(df['Count'], errors='coerce')
df = df.dropna(subset=['school name', 'target', 'Count', 'source'])

# Aggregating data to calculate node sizes (total alumni moved in per destination)
node_sizes = df.groupby('target')['Count'].sum().to_dict()

# Scaling node sizes for visualization (adjusting for better visual representation)
max_node_size = max(node_sizes.values(), default=1)
node_sizes = {k: 20 + 80 * (v / max_node_size) for k, v in node_sizes.items()}

# Creating a pyvis network
net = Network(height="800px", width="100%", directed=True, notebook=False)

# Adding nodes (destinations) with sizes based on total alumni
for node, size in node_sizes.items():
    net.add_node(node, label=node, size=size, title=f"Total Alumni: {int(node_sizes[node])}")

# Adding nodes for sources (if not already added as destinations)
sources = df['source'].unique()
for source in sources:
    if source not in node_sizes:
        net.add_node(source, label=source, size=20, title=f"Source: {source}")

# Adding edges with thickness based on Count
for _, row in df.iterrows():
    source = row['source']
    target = row['target']
    count = row['Count']
    # Scaling edge width based on count
    max_count = df['Count'].max()
    width = 1 + 9 * (count / max_count) if max_count > 0 else 1
    net.add_edge(source, target, value=width, title=f"Alumni: {int(count)}")

# Configuring network options for better visualization
net.set_options("""
var options = {
  "nodes": {
    "font": {
      "size": 12
    }
  },
  "edges": {
    "arrows": {
      "to": {
        "enabled": true,
        "scaleFactor": 0.5
      }
    },
    "smooth": {
      "type": "continuous"
    }
  },
  "physics": {
    "forceAtlas2Based": {
      "gravitationalConstant": -50,
      "centralGravity": 0.01,
      "springLength": 100
    },
    "minVelocity": 0.75,
    "solver": "forceAtlas2Based"
  }
}
""")

# Generating unique filename using UUID
output_file = f"network_graph_{uuid.uuid4().hex}.html"

# Saving the network graph to an HTML file
net.save_graph(output_file)

print(f"Network graph saved as {output_file}")

Network graph saved as network_graph_8601ed8d8b144836ac415a00bf3439ff.html
