In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from community import best_partition

In [3]:
congress_graph = {}

for congress in range(95, 119):
    if congress < 100:
        congress = '0' + str(congress)
    name = 'G_' + str(congress)
    congress_graph[name] = name = nx.from_pandas_edgelist(pd.read_csv(f"data/USA/edgelist/H{congress}_USA_edgelist.csv"), source="Source", target="Target")


In [4]:
# interesting graphs
inter = ['G_102','G_103','G_104','G_105','G_106','G_107','G_116']


In [None]:
def analyze_graphs(graph_dict, interesting_graphs):

    results = {}
    
    for graph_name in interesting_graphs:
        if graph_name not in graph_dict:
            print(f"Warning: {graph_name} not found in graph dictionary")
            continue
            
        G = graph_dict[graph_name]
        analysis = {
            'nodes': G.number_of_nodes(),
            'edges': G.number_of_edges(),
            'density': nx.density(G),
            'avg_clustering': nx.average_clustering(G),
            'diameter': nx.diameter(G) if nx.is_connected(G) else float('inf'),
            'avg_shortest_path': nx.average_shortest_path_length(G) if nx.is_connected(G) else float('inf'),
            'connected_components': nx.number_connected_components(G),
            'largest_component_size': len(max(nx.connected_components(G), key=len)),
            'assortativity': nx.degree_assortativity_coefficient(G),
            
        }
        
        # community detection 
        partition = best_partition(G)
        communities = {}
        for node, community_id in partition.items():
            if community_id not in communities:
                communities[community_id] = []
            communities[community_id].append(node)
            
        analysis['community_count'] = len(communities)
        analysis['modularity'] = nx.community.modularity(G, communities.values())
        analysis['largest_community_size'] = max(len(community) for community in communities.values())

        results[graph_name] = analysis
    
    return results


In [6]:
results = analyze_graphs(congress_graph, inter)

In [7]:
results

{'G_102': {'nodes': 442,
  'edges': 30879,
  'density': 0.31683442607812357,
  'avg_clustering': 0.8406380419608419,
  'diameter': 5,
  'avg_shortest_path': 2.0933501605770513,
  'connected_components': 1,
  'largest_component_size': 442,
  'assortativity': 0.6864467871159042,
  'community_count': 3,
  'modularity': 0.3003476641900126,
  'largest_community_size': 217},
 'G_103': {'nodes': 441,
  'edges': 29967,
  'density': 0.30887445887445886,
  'avg_clustering': 0.8511068230877277,
  'diameter': 9,
  'avg_shortest_path': 3.0153370439084726,
  'connected_components': 1,
  'largest_component_size': 441,
  'assortativity': 0.5520445231862126,
  'community_count': 3,
  'modularity': 0.39668436171881494,
  'largest_community_size': 232},
 'G_104': {'nodes': 435,
  'edges': 32768,
  'density': 0.3471370305630595,
  'avg_clustering': 0.8590738984694473,
  'diameter': 13,
  'avg_shortest_path': 5.2385295831347,
  'connected_components': 1,
  'largest_component_size': 435,
  'assortativity': 