# Committee Collaboration Network Construction and Visualization

This notebook demonstrates the process to construct Committee Collaboration Network and generate interactive visualizations saved as HTML files. 

## Import Libraries

First, we import the necessary libraries for data manipulation and visualization.

In [180]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import re
from pyvis.network import Network
import numpy as np
from itertools import combinations
from collections import Counter

## Load Data

In [181]:
congression_num = 108
bills_df = pd.read_csv("../../data/preprocessed/preprocessed_dataset.csv")
bills_df = bills_df[bills_df['congress'] == congression_num]

## Convert Committee String to List Format

In [182]:
def convert_string_to_list(array_string):
    array_string = str(array_string)
    if isinstance(array_string, float) and pd.isna(array_string):
        return array_string
    # Use regex to find patterns between single or double quotes
    pattern = re.compile(r'["\'](.*?)["\']')
    elements = pattern.findall(array_string)
    # Strip any extraneous spaces or characters
    elements = [element.strip() for element in elements]
    return elements

# Extract subjects
bills_df['committees'] = bills_df['committees'].apply(lambda x: convert_string_to_list(x))

bills_df.head(100)


Unnamed: 0,id,committees,congress,bill_type,bill_number,bill_version,title,text_length,policy_area,legislative_subjects
0,108hconres530ih,[Foreign Affairs Committee],108.0,House Concurrent Resolution,530.0,Introduced (House),Encouraging The Former Yugoslav Republic of Ma...,538.0,International Affairs,"['Europe', 'Greece', 'Macedonia', 'Names', 'Tr..."
1,108hconres407ih,"[Judiciary Committee, Armed Services Committee]",108.0,House Concurrent Resolution,407.0,Introduced (House),Saluting the life and courage of the late Comm...,617.0,Commemorations,"['Aggression', 'Armed Forces and National Secu..."
2,108hconres441ih,[Energy and Commerce Committee],108.0,House Concurrent Resolution,441.0,Introduced (House),Recognizing the essential role of nuclear powe...,354.0,Energy,"['Energy policy', 'Energy security', 'Environm..."
3,108hconres428ih,"[Armed Services Committee, Transportation and ...",108.0,House Concurrent Resolution,428.0,Introduced (House),Recommending that Congress not provide funds f...,790.0,Armed Forces and National Security,"['Appropriations', 'Ballistic missile defenses..."
4,108hconres426ih,[Foreign Affairs Committee],108.0,House Concurrent Resolution,426.0,Introduced (House),Supporting the goals and ideals of Taiwanese-A...,308.0,International Affairs,"['Asian American ethnic groups', 'Commemoratio..."
...,...,...,...,...,...,...,...,...,...,...
95,108hconres457ih,[Judiciary Committee],108.0,House Concurrent Resolution,457.0,Introduced (House),Expressing the sense of Congress with respect ...,532.0,Crime and Law Enforcement,"['Civil Rights and Liberties, Minority Issues'..."
96,108hconres416ih,[Agriculture Committee],108.0,House Concurrent Resolution,416.0,Introduced (House),Designating a week as Extension Living Well Week.,713.0,Commemorations,"['Agricultural extension work', 'Agriculture a..."
97,108hconres501ih,[Education and the Workforce Committee],108.0,House Concurrent Resolution,501.0,Introduced (House),"Honoring the life and work of Duke Ellington, ...",344.0,Commemorations,"['Anniversaries', 'Arts, Culture, Religion', '..."
98,108hconres460ih,"[Foreign Relations Committee, Foreign Affairs ...",108.0,House Concurrent Resolution,460.0,Introduced (House),Regarding the security of Israel and the princ...,525.0,International Affairs,"['Arab-Israeli conflict', 'Armed Forces and Na..."


In [183]:
# Flatten the list of subjects, ignoring NaN values
all_committees = [committee for sublist in bills_df['committees'] if isinstance(sublist, list) for committee in sublist]
all_committees = list(filter(None, all_committees))

print(set(all_committees))

{'Armed Services Committee', 'Small Business and Entrepreneurship Committee', 'Ethics Committee', 'Finance Committee', 'Appropriations Committee', 'Aging (Special) Committee', 'Oversight and Accountability Committee', 'Transportation and Infrastructure Committee', 'Judiciary Committee', 'Energy and Natural Resources Committee', 'Financial Services Committee', 'Budget Committee', 'Homeland Security Committee', 'United States Senate Caucus on International Narcotics Control', 'Education and the Workforce Committee', 'Intelligence (Permanent Select) Committee', 'Ethics (Select) Committee', 'Ways and Means Committee', 'Natural Resources Committee', 'Veterans', 'Affairs Committee', 'Commerce, Science, and Transportation Committee', 'Banking, Housing, and Urban Affairs Committee', 'Energy and Commerce Committee', 'Foreign Relations Committee', 'Indian Affairs Committee', 'Environment and Public Works Committee', 'Small Business Committee', 'Foreign Affairs Committee', 'Intelligence (Select) 

In [184]:
# Count the frequency of each subject
committee_df = pd.Series(all_committees).value_counts().reset_index()
committee_df.columns = ['committee', 'count']
committee_df['committee'] = committee_df['committee'].str.strip()
committee_df = committee_df[committee_df['committee'].astype(bool)]
committee_df.reset_index(drop=True, inplace=True)
committee_df.head(100)

committee_df.to_csv(f"../../data/network/committee_collaboration_network/committee_counts.csv", index = False)

In [185]:
# Extract pairs of committees for each bill
committee_pairs = []
for committees in bills_df['committees']:
    if len(committees) > 1:
        committee_pairs.extend(combinations(sorted(committees), 2))

In [186]:
# Count the occurrences of each pair
pair_counts = Counter(committee_pairs)

# Construct the edgelist DataFrame
edgelist_data = {'source': [], 'target': [], 'weight': []}
for (source, target), weight in pair_counts.items():
    if source != target:
        edgelist_data['source'].append(source)
        edgelist_data['target'].append(target)
        edgelist_data['weight'].append(weight)

edgelist_df = pd.DataFrame(edgelist_data)

edgelist_df.to_csv(f"../../data/network/committee_collaboration_network/edgelist.csv", index = False)

In [187]:
# Create the graph
G = nx.Graph()
for index, row in edgelist_df.iterrows():
    G.add_edge(row['source'], row['target'], weight=(row['weight']))  # Apply log transform

# Map committee counts to node sizes
node_size_map = {}
for index, row in committee_df.iterrows():
    node_size_map[row['committee']] = np.log1p(row['count']) * 5

# Create and configure Pyvis network
net = Network(notebook=True, height='900px', width='100%', bgcolor='#222222', font_color='white')

# Add nodes with sizes based on log-transformed counts
for node in G.nodes:
    size = node_size_map.get(node, 2)  # Default smaller size if not found in committee_df
    net.add_node(node, label=node, title=node, color='#00c0a3', size=size)

# Add edges with fixed width
for source, target, data in G.edges(data=True):
    net.add_edge(source, target, title=f'Weight: {np.exp(data["weight"])-1:.0f}', color='rgba(255, 255, 255, 1)', width=1)



  net.add_edge(source, target, title=f'Weight: {np.exp(data["weight"])-1:.0f}', color='rgba(255, 255, 255, 1)', width=1)


In [188]:
adjacency_matrix = nx.adjacency_matrix(G).todense()
adjacency_df = pd.DataFrame(adjacency_matrix, index=G.nodes(), columns=G.nodes())
adjacency_df.to_csv(f'../../data/network/committee_collaboration_network/adjacency_matrix.csv')

../../plots/committee_collaboration_network/committee_collaboration_network.html
