In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

In [None]:
congress_graph = {}

for congress in range(95, 119):
    if congress < 100:
        congress = '0' + str(congress)
    name = 'G_' + str(congress)
    congress_graph[name] = name = nx.from_pandas_edgelist(pd.read_csv(f"data/USA/edgelist/H{congress}_USA_edgelist.csv"), source="Source", target="Target")


In [31]:
# 103-107 + 116
inter = ['G_103','G_104','G_105','G_106','G_107','G_116']


In [None]:
def analyze_graphs(graph_dict, interesting_graphs):
    """
    Perform basic graph analysis on selected graphs.
    
    Parameters:
    graph_dict (dict): Dictionary containing all graphs with keys like 'G_095'
    interesting_graphs (list): List of graph names to analyze
    
    Returns:
    dict: Analysis results for each graph
    """
    results = {}
    
    for graph_name in interesting_graphs:
        if graph_name not in graph_dict:
            print(f"Warning: {graph_name} not found in graph dictionary")
            continue
            
        G = graph_dict[graph_name]
        analysis = {
            'nodes': G.number_of_nodes(),
            'edges': G.number_of_edges(),
            'density': nx.density(G),
            'avg_clustering': nx.average_clustering(G),
            'diameter': nx.diameter(G) if nx.is_connected(G) else float('inf'),
            'avg_shortest_path': nx.average_shortest_path_length(G) if nx.is_connected(G) else float('inf'),
            'connected_components': nx.number_connected_components(G),
            'largest_component_size': len(max(nx.connected_components(G), key=len)),
            'assortativity': nx.degree_assortativity_coefficient(G),
            'top_5_degree_centrality': sorted([(node, round(cent, 4)) 
                                           for node, cent in nx.degree_centrality(G).items()], 
                                          key=lambda x: x[1], reverse=True)[:5]
        }
        
        results[graph_name] = analysis
    
    return results


In [32]:
results = analyze_graphs(congress_graph, inter)

In [33]:
results

{'G_103': {'nodes': 441,
  'edges': 29967,
  'density': 0.30887445887445886,
  'avg_clustering': 0.8511068230877277,
  'diameter': 9,
  'avg_shortest_path': 3.0153370439084726,
  'connected_components': 1,
  'largest_component_size': 441,
  'assortativity': 0.5520445231862126,
  'top_5_degree_centrality': [(15077, 0.5318),
   (14058, 0.5205),
   (15438, 0.5182),
   (29351, 0.5182),
   (14233, 0.5182)]},
 'G_104': {'nodes': 435,
  'edges': 32768,
  'density': 0.3471370305630595,
  'avg_clustering': 0.8590738984694473,
  'diameter': 13,
  'avg_shortest_path': 5.2385295831347,
  'connected_components': 1,
  'largest_component_size': 435,
  'assortativity': 0.7890825903060621,
  'top_5_degree_centrality': [(14288, 0.5253),
   (29329, 0.5253),
   (39312, 0.5253),
   (29130, 0.5253),
   (15006, 0.5253)]},
 'G_105': {'nodes': 441,
  'edges': 29126,
  'density': 0.3002061430632859,
  'avg_clustering': 0.8377498064890021,
  'diameter': 7,
  'avg_shortest_path': 2.764873222016079,
  'connected_c