In [15]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import re
from pyvis.network import Network
import numpy as np
from itertools import combinations
from collections import Counter

In [16]:
congression_num = 109
bills_df = pd.read_csv("../data/preprocessed/preprocessed_dataset.csv")
bills_df = bills_df[bills_df['congress'] == congression_num]

In [17]:
def convert_string_to_list(array_string):
    array_string = str(array_string)
    if isinstance(array_string, float) and pd.isna(array_string):
        return array_string
    # Use regex to find patterns between single or double quotes
    pattern = re.compile(r'["\'](.*?)["\']')
    elements = pattern.findall(array_string)
    # Strip any extraneous spaces or characters
    elements = [element.strip() for element in elements]
    return elements

# Extract subjects
bills_df['committees'] = bills_df['committees'].apply(lambda x: convert_string_to_list(x))

# Flatten the list of subjects, ignoring NaN values
all_committees = [committee for sublist in bills_df['committees'] if isinstance(sublist, list) for committee in sublist]
all_committees = list(filter(None, all_committees))

# Count the frequency of each subject
committee_df = pd.Series(all_committees).value_counts().reset_index()
committee_df.columns = ['committee', 'count']
committee_df['committee'] = committee_df['committee'].str.strip()
committee_df = committee_df[committee_df['committee'].astype(bool)]
committee_df.reset_index(drop=True, inplace=True)
committee_df.head(10)

committee_df.to_csv(f"../data/network/committee_collaboration_network/committee_counts_{congression_num}.csv", index = False)

In [18]:
# Step 1: Extract pairs of committees for each bill
committee_pairs = []
for committees in bills_df['committees']:
    if len(committees) > 1:
        committee_pairs.extend(combinations(sorted(committees), 2))

# Step 2: Count the occurrences of each pair
pair_counts = Counter(committee_pairs)

# Step 3: Construct the edgelist DataFrame
edgelist_data = {'source': [], 'target': [], 'weight': []}
for (source, target), weight in pair_counts.items():
    edgelist_data['source'].append(source)
    edgelist_data['target'].append(target)
    edgelist_data['weight'].append(weight)

edgelist_df = pd.DataFrame(edgelist_data)

edgelist_df.to_csv(f"../data/network/committee_collaboration_network/edgelist_{congression_num}.csv", index = False)

In [22]:
# Step 4: Create the graph
G = nx.Graph()
for index, row in edgelist_df.iterrows():
    G.add_edge(row['source'], row['target'], weight=np.log1p(row['weight']))  # Apply log transform

# Map committee counts to node sizes
node_size_map = {}
for index, row in committee_df.iterrows():
    node_size_map[row['committee']] = np.log1p(row['count']) * 5  # Smaller scale for smaller nodes

# Step 5: Create and configure Pyvis network
net = Network(notebook=True, height='900px', width='100%', bgcolor='#222222', font_color='white')

# Add nodes with sizes based on log-transformed counts
for node in G.nodes:
    size = node_size_map.get(node, 2)  # Default smaller size if not found in committee_df
    net.add_node(node, label=node, title=node, color='#00c0a3', size=size)

# Add edges with fixed width
for source, target, data in G.edges(data=True):
    net.add_edge(source, target, title=f'Weight: {np.exp(data["weight"])-1:.0f}', color='rgba(255, 255, 255, 1)', width=1)

# Configure options for a professional look and less dense layout
net.set_options("""
var options = {
  "nodes": {
    "borderWidth": 1,
    "borderWidthSelected": 2,
    "color": {
      "border": "#ffffff",
      "background": "#00c0a3",
      "highlight": {
        "border": "#ffffff",
        "background": "#ff7f50"
      },
      "hover": {
        "border": "#ffffff",
        "background": "#ff7f50"
      }
    },
    "font": {
      "color": "#ffffff",
      "size": 24
    }
  },
  "edges": {
    "color": {
      "color": "rgba(255, 255, 255, 0.2)",
      "highlight": "#ff7f50",
      "hover": "#ff7f50"
    },
    "smooth": {
      "type": "continuous"
    },
    "width": 0.5,
    "hoverWidth": 0.5
  },
  "interaction": {
    "hover": true,
    "navigationButtons": true,
    "tooltipDelay": 200
  },
  "physics": {
    "enabled": true,
    "repulsion": {
      "centralGravity": 0.2,
      "springLength": 500,
      "springConstant": 0.05,
      "nodeDistance": 200,
      "damping": 0.09
    },
    "minVelocity": 0.75,
    "solver": "repulsion",
    "timestep": 0.35
  }
}
""")

# Show the network
net.show("committee_collaboration_network.html")

committee_collaboration_network.html
