In [1]:
# Import Required Libraries
from arango import ArangoClient
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import community  # Louvain Community Detection
import os
from dotenv import load_dotenv

In [2]:
# Load environment variables from .env file
load_dotenv()

# Initialize the ArangoDB client.
host = os.getenv("HOST")
username = os.getenv("USER")
password = os.getenv("PASSWORD")
db_name = "_system"

In [3]:
client = ArangoClient(hosts=host)
db = client.db(db_name, username=username, password=password, verify=True)

In [4]:
# Load Edge Collection
edge_collection = "eventActor"  # Change this to different edges like eventActor, hasLocation, etc.
edges = list(db.collection(edge_collection).all())

In [5]:
# Convert Edges to Pandas DataFrame
df = pd.DataFrame(edges)
df.rename(columns={"_from": "start_node", "_to": "end_node"}, inplace=True)

In [6]:
# Create NetworkX Graph
G = nx.from_pandas_edgelist(df, "start_node", "end_node")

In [None]:
# Plot the Initial Graph
plt.figure(figsize=(15, 9))
nx.draw(G, with_labels=False, node_size=10, width=0.15)
plt.title("Geopolitical Influence Graph")
plt.show()

# 1. Compute Centrality Measures

In [None]:
# Degree Centrality
# Measure of no. of connections(edges) it has to others
degree_centrality = nx.degree_centrality(G)
top_degree = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
print("Top 10 Nodes by Degree Centrality:", top_degree)

In [None]:
# Betweenness Centrality
# Measure how often a node lies between pair of other nodes
betweenness_centrality = nx.betweenness_centrality(G)
top_betweenness = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
print("Top 10 Nodes by Betweenness Centrality:", top_betweenness)

In [None]:
# PageRank (Influence Score)
# self explanatory
pagerank = nx.pagerank(G)
top_pagerank = sorted(pagerank.items(), key=lambda x: x[1], reverse=True)[:10]
print("Top 10 Nodes by PageRank:", top_pagerank)

# 2. Identify Key Political Figures

In [None]:
# Find Specific Political Figures
political_figures = [node for node in G.nodes if "Barack_Obama" in node or "Vladimir_Putin" in node]

for figure in political_figures:
    print(f"{figure}: Degree = {degree_centrality.get(figure, 0)}, PageRank = {pagerank.get(figure, 0)}")

# 3. Find Paths of Influence

In [None]:
# Shortest Path Between Two Figures
source_node = "Actor/Barack_Obama"
target_node = "Actor/Vladimir_Putin"

if nx.has_path(G, source_node, target_node):
    path = nx.shortest_path(G, source=source_node, target=target_node)
    print(f"Shortest Path between {source_node} and {target_node}:", path)
else:
    print("No direct path found.")

# Nodes Influenced by a Political Figure
if source_node in G:
    descendants = nx.descendants(G, source_node)
    print(f"Nodes influenced by {source_node}:", descendants)

# Nodes Influencing a Political Figure
if target_node in G:
    predecessors = nx.ancestors(G, target_node)
    print(f"Nodes that influenced {target_node}:", predecessors)

# 4. Detect Communities (Clusters of Influence)

In [None]:
# Louvain Community Detection
partition = community.best_partition(G)
community_counts = {}
for node, comm in partition.items():
    community_counts[comm] = community_counts.get(comm, 0) + 1
print("Community Distribution:", community_counts)

# Girvan-Newman Community Detection
from networkx.algorithms.community import girvan_newman

communities = girvan_newman(G)
top_communities = next(communities)  # First level of hierarchy
print("Communities:", [list(c) for c in top_communities])

# 5. Influence Over Time

In [None]:
query = """
FOR doc IN Event
    FILTER doc.actor == "Barack_Obama"
    RETURN {date: doc.date, influence: doc.influence_score}
"""
events = db.aql.execute(query)

In [None]:
# Extract Date and Influence Score
dates, influence_scores = zip(*[(event["date"], event["influence"]) for event in events])

# Plot Influence Over Time
plt.figure(figsize=(12, 6))
plt.plot(dates, influence_scores, marker="o")
plt.xlabel("Time")
plt.ylabel("Influence Score")
plt.title("Influence Over Time for Barack Obama")
plt.xticks(rotation=45)
plt.show()

# 6. Visualize the Influence of a Political Figure

In [None]:
# Extract a Subgraph for Barack Obama
sub_nodes = list(nx.descendants(G, source_node)) + [source_node]
sub_G = G.subgraph(sub_nodes)

plt.figure(figsize=(12, 8))
pos = nx.spring_layout(sub_G)
nx.draw(sub_G, pos, with_labels=True, node_color="skyblue", edge_color="gray", node_size=100, font_size=8)
plt.title("Influence Network of Barack Obama")
plt.show()