# Graph visualizaiton
## Step 1: Graph creation
Simillarly to lab 1, I create the grpah from the chosen dataset and clean it.

In [1]:
import pandas as pd
from social_media_analytics.configs import DATA_DIR
import networkx as nx

df = pd.read_csv(DATA_DIR / "github-organizations.csv")
df = df[df["Organisation"] != "None"]
df = df[df["member"] != "None"].dropna()
G = nx.Graph()

for index, row in df.iterrows():
    G.add_edge(row["Organisation"], row["member"])

Because the grpah is so large I create a custom script to let the physics simulation run and save the positions at the end.

Usually graphs of this size have specialized software to display them using better harware optimization, but for simplicity sake, a simple pyvis visualization was used and more time was given for the physics simulation to stabilize.

In [2]:
from pyvis.network import Network

# Convert node IDs to strings
G_str = nx.relabel_nodes(G, lambda x: str(x))
nt = Network(notebook=True)
nt.from_nx(G_str)

nt = Network(notebook=True)
nt.barnes_hut()
nt.from_nx(G_str)
# Save the graph to an HTML file
html_path = "graph.html"
nt.save_graph(html_path)

# Read the HTML content
with open(html_path, 'r') as file:
    content = file.read()

# Your custom JavaScript
js_code = """
<script>
function extractPositions(){
    var positions = network.getPositions();
    console.log(positions);  // This will print positions to the browser's console
}
network.once("stabilizationIterationsDone", function() {
    extractPositions();
});
</script>
"""

# Insert the JavaScript before the closing body tag
content = content.replace("</body>", js_code + "</body>")

# Write the modified content back to the HTML file
with open(html_path, 'w') as file:
    file.write(content)



The network is then reconstructed, giving it the positions. Some coloring and sizing is also used to make the visualizaiton nicer.

In [8]:
from pyvis.network import Network
import json
import math

org_nodes = df["Organisation"].unique().tolist()
mem_nodes = df["member"].unique().tolist()

# Get degrees for sizing
degrees = dict(G.degree())

# Initialize the network
net = Network(notebook=True, width='100%', height='1000px')

# Load positions
with open('graph_positions_final.json', 'r') as file:
    positions = json.load(file)

# Add nodes with specific colors and sizes
for node, pos in positions.items():
    color = "blue" if node in org_nodes else "green"
    
    # Apply a function to adjust sizes, especially for nodes with lower degrees
    size = 10 + 30 * math.log(degrees[node] + 1)  # Adjust the base size and scaling factor as needed
    
    net.add_node(node, x=pos['x'], y=pos['y'], color=color, size=size, title=node)  # title is displayed on hover

# Add edges
for edge in G.edges():
    net.add_edge(*edge, color="rgba(128, 128, 128, 0.5)")  # gray and semi-transparent

# Disable physics and adjust some aesthetics
net.toggle_physics(False)
net.set_edge_smooth('dynamic')
net.show('network.html')

network.html
