In [6]:
import pandas as pd
import networkx as nx
import numpy as np

# --- 1. Simulate Twitter Interaction Data (100 users) ---
# Define 100 hypothetical users
num_users = 100
users = [f"User_{i}" for i in range(1, num_users + 1)]

# Create a list of random interactions for a larger network
# We'll increase the number of interactions proportionally
np.random.seed(42) # for reproducibility
interactions_data = []
# Simulate more interactions for a larger network (e.g., 5-6 times the number of users)
num_interactions = 5 * num_users
for _ in range(num_interactions):
    source = np.random.choice(users)
    target = np.random.choice(users)
    if source != target: # A user can't interact with themselves
        interactions_data.append((source, target))

# Convert to DataFrame
df_interactions = pd.DataFrame(interactions_data, columns=['Source', 'Target'])

# --- 2. Build NetworkX Graph ---
# Create a directed graph (DiGraph)
G = nx.DiGraph()
G.add_edges_from(df_interactions.values)

print(f"Network created with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")

# --- 3. Compute Node-Level and Graph-Level Measures ---

in_degree_centrality = nx.in_degree_centrality(G)
out_degree_centrality = nx.out_degree_centrality(G)
in_degree = dict(G.in_degree())
out_degree = dict(G.out_degree())
total_degree = dict(G.degree())

# Handle potential disconnected components for closeness centrality
# NetworkX's closeness_centrality handles unreachable nodes by default (returns 0 or very low values)
closeness_centrality = nx.closeness_centrality(G)

betweenness_centrality = nx.betweenness_centrality(G)

try:
    eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=1000, tol=1e-06) # Added tolerance for robustness
except nx.PowerIterationFailedConvergence:
    print("Eigenvector centrality did not converge for some nodes. Setting to NaN for problematic nodes.")
    eigenvector_centrality = {node: np.nan for node in G.nodes()} # Fallback for non-convergence
except ValueError as e: # Handle cases where graph might be empty or disconnected for eigenvector
    print(f"ValueError during eigenvector centrality: {e}. Setting to NaN for all nodes.")
    eigenvector_centrality = {node: np.nan for node in G.nodes()}


# Clustering Coefficient: typically computed on the undirected version for a standard definition
node_clustering = nx.clustering(G.to_undirected())

transitivity = nx.transitivity(G.to_undirected())
reciprocity = nx.reciprocity(G)
density = nx.density(G)

# --- Calculate Degree Centralization Manually ---
N = G.number_of_nodes()

# In-Degree Centralization
max_in_degree_centrality = max(in_degree_centrality.values()) if in_degree_centrality else 0
sum_of_diffs_in = sum(max_in_degree_centrality - in_degree_centrality[node] for node in G.nodes())
if N > 1:
    in_degree_centralization = sum_of_diffs_in / (N - 1)
else:
    in_degree_centralization = 0.0

# Out-Degree Centralization
max_out_degree_centrality = max(out_degree_centrality.values()) if out_degree_centrality else 0
sum_of_diffs_out = sum(max_out_degree_centrality - out_degree_centrality[node] for node in G.nodes())
if N > 1:
    out_degree_centralization = sum_of_diffs_out / (N - 1)
else:
    out_degree_centralization = 0.0

# --- 4. Organize Data into Pandas DataFrame ---
df_nodes = pd.DataFrame({
    'Node': list(G.nodes()),
    'In_Degree': [in_degree.get(node, 0) for node in G.nodes()],
    'Out_Degree': [out_degree.get(node, 0) for node in G.nodes()],
    'Total_Degree': [total_degree.get(node, 0) for node in G.nodes()],
    'In_Degree_Centrality': [in_degree_centrality.get(node, 0) for node in G.nodes()],
    'Out_Degree_Centrality': [out_degree_centrality.get(node, 0) for node in G.nodes()],
    'Closeness_Centrality': [closeness_centrality.get(node, 0) for node in G.nodes()],
    'Betweenness_Centrality': [betweenness_centrality.get(node, 0) for node in G.nodes()],
    'Eigenvector_Centrality': [eigenvector_centrality.get(node, np.nan) for node in G.nodes()],
    'Clustering_Coefficient': [node_clustering.get(node, np.nan) for node in G.nodes()]
})

# Add graph-level measures to the first row of the DataFrame
df_nodes_summary = df_nodes.copy()
df_nodes_summary.loc[df_nodes_summary.index[0], 'Graph_Density'] = density
df_nodes_summary.loc[df_nodes_summary.index[0], 'Graph_Reciprocity'] = reciprocity
df_nodes_summary.loc[df_nodes_summary.index[0], 'Graph_Transitivity'] = transitivity
df_nodes_summary.loc[df_nodes_summary.index[0], 'Graph_In_Degree_Centralization'] = in_degree_centralization
df_nodes_summary.loc[df_nodes_summary.index[0], 'Graph_Out_Degree_Centralization'] = out_degree_centralization

# Fill NaN for graph-level measures for other rows with an empty string for cleaner CSV
for col in ['Graph_Density', 'Graph_Reciprocity', 'Graph_Transitivity', 'Graph_In_Degree_Centralization', 'Graph_Out_Degree_Centralization']:
    df_nodes_summary[col] = df_nodes_summary[col].fillna('')

# --- 5. Save to CSV ---
output_csv_path = 'twitter_influencer_network_100_users_measures.csv'
df_nodes_summary.to_csv(output_csv_path, index=False)

print(f"\nNode-level and graph-level measures for {num_users} users saved to '{output_csv_path}'")
print("\n--- Analysis Complete ---")

Network created with 100 nodes and 483 edges.

Node-level and graph-level measures for 100 users saved to 'twitter_influencer_network_100_users_measures.csv'

--- Analysis Complete ---
