In [None]:
import pandas as pd
import networkx as nx
from modules import ps
import sys

import functions

import utils
import os
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse import csr_matrix
from sklearn.decomposition import PCA
from sklearn.manifold import MDS
import umap
import numpy as np
from itertools import combinations
from scipy.stats import gaussian_kde
from matplotlib.lines import Line2D

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def make_all_edges(df):
    df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value
    df.columns = ("src", "trg")
    df = df.groupby(by = ["src", "trg"]).size().reset_index().rename(columns = {0: "nij"})                        # Counts how many times a pair of congressmen appears in df (i.e. they co-voted)
    return df

def make_pdfs(edges, nodes):
    party_lookup = nodes.set_index("icpsr")["party_code"].to_dict()
    edges["party_src"] = edges["src"].map(party_lookup)
    edges["party_trg"] = edges["trg"].map(party_lookup)
    edges["same_party"] = edges["party_src"] == edges["party_trg"]
    edges["nij"] /= edges["nij"].max()  # Normalize co-vote counts

    sp_pdf = gaussian_kde(edges[edges["same_party"]]["nij"])
    cp_pdf = gaussian_kde(edges[~edges["same_party"]]["nij"])
    return edges, sp_pdf, cp_pdf
def find_intersection(kde1, kde2, init_interval=0.01, scope=[0.3,1], convergence=0.0001):
    x_left, x_right = scope[0], scope[0] + init_interval
    while x_right < scope[1]:
        left, right = kde1(x_left)[0] - kde2(x_left)[0], kde1(x_right)[0] - kde2(x_right)[0]
        if left * right < 0:
            if init_interval <= convergence:
                return x_right
            return find_intersection(kde1, kde2, init_interval / 10, [x_left, x_right])
        x_left, x_right = x_right, x_right + init_interval
    return scope[0]

In [3]:

congresses = ['095', '096', '097','098', '099', '100', '101', '102', '103','104',
              '105', '106', '107','108', '109', '110', '111', '112','113', '114',
              '115', '116','117','118']

# Create Images directory if it doesn't exist
os.makedirs("Images", exist_ok=True)

for congress in congresses:
    input_votes = f"Data/USA/Filtered/H{congress}_filtered_USA_votes.csv"
    try:
        votes_df = pd.read_csv(input_votes)
        edges_df = make_all_edges(votes_df)
        edges_df, sp_pdf, cp_pdf = make_pdfs(edges_df, votes_df)

        x_values = np.linspace(0, 1, 1000)
        y_sp = sp_pdf(x_values)
        y_cp = cp_pdf(x_values)
        y_intersection = np.minimum(y_sp, y_cp)

        plt.figure(figsize=(8, 5))
        plt.plot(x_values, y_sp, label="Same-party", color="black")
        plt.plot(x_values, y_cp, label="Cross-party", color="gray")
        plt.xlabel("Normalized co-vote count")
        plt.ylabel("Density")
        plt.title(f"Density distributions of co-vote counts by party type for {congress}th Congress")
        plt.legend()
        plt.tight_layout()

        output_path = f"Images/Congress_{congress}_density_plot.png"
        plt.savefig(output_path)
        plt.close()

        print(f"Saved: {output_path}")
    except Exception as e:
        print(f"Error processing Congress {congress}: {e}")

  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_095_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_096_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_097_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_098_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_099_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_100_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_101_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_102_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_103_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_104_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_105_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_106_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_107_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_108_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_109_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_110_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_111_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_112_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_113_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_114_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_115_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_116_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_117_density_plot.png


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Congress_118_density_plot.png


In [3]:
def save_network(edges, congress, threshold, output_folder):
    edges = edges[edges["nij"] > threshold]
    edges = edges[["src", "trg"]].astype(int)
    edges.columns = ["Source", "Target"]
    edges_output = os.path.join(output_folder, f"congress{congress}_edges.csv")
    edges.to_csv(edges_output, sep=",", index=False, header=True)
    
    print(f"Network saved: {edges_output}")

def process_congresses(congress_list, input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    for congress in congress_list:
        print(f"Processing Congress {congress}...")
        input_votes = os.path.join(input_folder, f"H{congress}_filtered_USA_votes.csv")
        
        if not os.path.exists(input_votes):
            print(f"Warning: Data file for Congress {congress} not found, skipping.")
            continue

        # Load data
        votes_df = pd.read_csv(input_votes)

        # Create edgelist
        edges_df = make_all_edges(votes_df)

        # Generate PDFs
        edges_df, sp_pdf, cp_pdf = make_pdfs(edges_df, votes_df)

        # Compute intersection (threshold)
        threshold = find_intersection(sp_pdf, cp_pdf)

        # Save network
        save_network(edges_df, congress, threshold, output_folder)

# List of congress numbers
congresses = ['095', '096', '097','098', '099', '100', '101', '102', '103','104',
              '105', '106', '107','108', '109', '110', '111', '112','113', '114',
              '115', '116','117','118']

# Run the process
input_folder = "Data/USA/Filtered/"
output_folder = "Data/USA/Micheles/"

process_congresses(congresses, input_folder, output_folder)

Processing Congress 095...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress095_edges.csv
Processing Congress 096...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress096_edges.csv
Processing Congress 097...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress097_edges.csv
Processing Congress 098...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress098_edges.csv
Processing Congress 099...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress099_edges.csv
Processing Congress 100...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress100_edges.csv
Processing Congress 101...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress101_edges.csv
Processing Congress 102...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress102_edges.csv
Processing Congress 103...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress103_edges.csv
Processing Congress 104...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress104_edges.csv
Processing Congress 105...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress105_edges.csv
Processing Congress 106...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress106_edges.csv
Processing Congress 107...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress107_edges.csv
Processing Congress 108...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress108_edges.csv
Processing Congress 109...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress109_edges.csv
Processing Congress 110...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress110_edges.csv
Processing Congress 111...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress111_edges.csv
Processing Congress 112...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress112_edges.csv
Processing Congress 113...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress113_edges.csv
Processing Congress 114...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress114_edges.csv
Processing Congress 115...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress115_edges.csv
Processing Congress 116...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress116_edges.csv
Processing Congress 117...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress117_edges.csv
Processing Congress 118...


  df = df.groupby(by = ["rollnumber", "cast_code"]).apply(lambda x: pd.DataFrame(list(combinations(x["icpsr"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/USA/Micheles/congress118_edges.csv


In [3]:
def calc_pol(congress, data_path="data/USA/Raw/", edge_list_folder="Data/USA/Micheles/"):
    edge_list_file = os.path.join(edge_list_folder, f"congress{congress}_edges.csv")
    
    if not os.path.exists(edge_list_file):
        print(f"Warning: Edge list for Congress {congress} not found, skipping.")
        return None  # Skip if no edge list exists

    edge_df = pd.read_csv(edge_list_file)  # No need to assign column names as it's in the first row

    # Ensure the 'Source' and 'Target' are integers
    edge_df['Source'] = pd.to_numeric(edge_df['Source'], errors='coerce')
    edge_df['Target'] = pd.to_numeric(edge_df['Target'], errors='coerce')

    # Create the graph
    G = nx.from_pandas_edgelist(edge_df, 'Source', 'Target')

    # Load members' data
    members_file = os.path.join(data_path, f"H{congress}_members.csv")
    if not os.path.exists(members_file):
        print(f"Warning: Members file for Congress {congress} not found, skipping.")
        return None

    members_df = pd.read_csv(members_file).dropna(subset=["nominate_dim1"])
    members_df["icpsr"] = members_df["icpsr"].astype(int)

    # Create dictionary of opinions
    opinions_x = dict(zip(members_df["icpsr"], members_df["nominate_dim1"]))

    # Filter only existing nodes in the graph
    opinions = {node: opinions_x[node] for node in G.nodes if node in opinions_x}

    if not opinions:  # If no valid opinions exist, skip
        print(f"Warning: No valid opinions found for Congress {congress}, skipping.")
        return None

    # Normalize opinions between -1 and 1
    min_opinion, max_opinion = min(opinions.values()), max(opinions.values())
    opinions = {k: 2 * (v - min_opinion) / (max_opinion - min_opinion) - 1 for k, v in opinions.items()}

    # Compute polarization score
    pol_score = ps.ge(opinions, {}, G)
    
    return pol_score

# List of Congresses
congresses =  ['095', '096', '097','098', '099', '100', '101', '102', '103','104', '105', '106', '107','108', '109', '110', '111', '112','113', '114', '115', '116','117','118']

# Calculate polarization scores
pol_scores = {}
for congress in congresses:
    pol_score = calc_pol(congress)  # Run function
    if pol_score is not None:
        pol_scores[int(congress)] = pol_score  
        print(f"Congress {int(congress)}: Polarization Score = {pol_score}")


Congress 95: Polarization Score = 1.353654438679736
Congress 96: Polarization Score = 1.5003671745374347
Congress 97: Polarization Score = 1.4436064337827585
Congress 98: Polarization Score = 1.6961993573975824
Congress 99: Polarization Score = 2.4015813509771498
Congress 100: Polarization Score = 2.3365137361417174
Congress 101: Polarization Score = 2.024371232136686
Congress 102: Polarization Score = 2.4106411095814666
Congress 103: Polarization Score = 3.2636098914506704
Congress 104: Polarization Score = 4.12551220038706
Congress 105: Polarization Score = 4.791900821678918
Congress 106: Polarization Score = 3.424084005100225
Congress 107: Polarization Score = 3.982219346009936
Congress 108: Polarization Score = 8.399382169821628
Congress 109: Polarization Score = 8.170568849126683
Congress 110: Polarization Score = 8.871014515901612
Congress 111: Polarization Score = 3.809497581334263
Congress 112: Polarization Score = 4.022919027871767
Congress 113: Polarization Score = 10.4672238

In [3]:
import matplotlib.patches as mpatches

party_colors = {
    100: "blue",   # Democrat
    200: "red"     # Republican
}

congresses = ['095', '096', '097','098', '099', '100', '101', '102', '103','104',
              '105', '106', '107','108', '109', '110', '111', '112','113', '114',
              '115', '116','117','118']

for congress in congresses:
    # Load edge list
    edge_path = f"Data/USA/Micheles/congress{congress}_edges.csv"
    edges_df = pd.read_csv(edge_path)

    # Load node attributes (party_code)
    node_path = f"Data/USA/Filtered/H{congress}_filtered_USA_votes.csv"
    nodes_df = pd.read_csv(node_path)

    # Build graph — no edge_attr needed
    G = nx.from_pandas_edgelist(edges_df, source='Source', target='Target')

    # Map party codes to colors using 'icpsr'
    party_dict = nodes_df.set_index("icpsr")["party_code"].to_dict()
    color_map = [party_colors.get(party_dict.get(node, None), "gray") for node in G.nodes()]

    # Generate layout
    pos = nx.spring_layout(G, seed=42)

    # Draw the network
    plt.figure(figsize=(10, 10))
    nx.draw_networkx_nodes(G, pos, node_color=color_map, node_size=30)
    nx.draw_networkx_edges(G, pos, alpha=0.3, width=0.5)
    plt.title(f"Congress {congress} Network Graph")
    plt.axis("off")

    # Add legend
    legend_handles = [
        mpatches.Patch(color="blue", label="Democrat"),
        mpatches.Patch(color="red", label="Republican"),
        mpatches.Patch(color="gray", label="Other/Unknown")
    ]
    plt.legend(handles=legend_handles, loc="lower left", fontsize="small")

    # Save plot
    out_path = f"Images/USA_Networks_paper/congress_{congress}_network.png"
    plt.savefig(out_path, dpi=300, bbox_inches='tight')
    plt.close()


Saving networks using our method


In [8]:
import matplotlib.patches as mpatches

party_colors = {
    100: "blue",   # Democrat
    200: "red"     # Republican
}

congresses = ['095', '096', '097','098', '099', '100', '101', '102', '103','104',
              '105', '106', '107','108', '109', '110', '111', '112','113', '114',
              '115', '116','117','118']

for congress in congresses:
    # Load edge list
    edge_path = f"Data/USA/edgelist/H{congress}_USA_edgelist.csv"
    edges_df = pd.read_csv(edge_path)

    # Load node attributes (party_code)
    node_path = f"Data/USA/Filtered/H{congress}_filtered_USA_votes.csv"
    nodes_df = pd.read_csv(node_path)

    # Build graph — no edge_attr needed
    G = nx.from_pandas_edgelist(edges_df, source='Source', target='Target')

    # Map party codes to colors using 'icpsr'
    party_dict = nodes_df.set_index("icpsr")["party_code"].to_dict()
    color_map = [party_colors.get(party_dict.get(node, None), "gray") for node in G.nodes()]

    # Generate layout
    pos = nx.spring_layout(G, seed=42)

    # Draw the network
    plt.figure(figsize=(10, 10))
    nx.draw_networkx_nodes(G, pos, node_color=color_map, node_size=30)
    nx.draw_networkx_edges(G, pos, alpha=0.3, width=0.5)
    plt.title(f"Congress {congress} Network Graph")
    plt.axis("off")

    # Add legend
    legend_handles = [
        mpatches.Patch(color="blue", label="Democrat"),
        mpatches.Patch(color="red", label="Republican"),
        mpatches.Patch(color="gray", label="Other/Unknown")
    ]
    plt.legend(handles=legend_handles, loc="lower left", fontsize="small")

    # Save plot
    out_path = f"Images/USA_Networks_our/congress_{congress}_network.png"
    plt.savefig(out_path, dpi=300, bbox_inches='tight')
    plt.close()


## Denmark

In [4]:
def make_all_edges(df):
    df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value
    df.columns = ("src", "trg")
    df = df.groupby(by = ["src", "trg"]).size().reset_index().rename(columns = {0: "nij"})                        # Counts how many times a pair of congressmen appears in df (i.e. they co-voted)
    return df

def make_pdfs(edges, nodes):
    party_lookup = nodes.set_index("aktørid")["party"].to_dict()
    edges["party_src"] = edges["src"].map(party_lookup)
    edges["party_trg"] = edges["trg"].map(party_lookup)
    edges["same_party"] = edges["party_src"] == edges["party_trg"]
    edges["nij"] /= edges["nij"].max()  # Normalize co-vote counts

    sp_pdf = gaussian_kde(edges[edges["same_party"]]["nij"])
    cp_pdf = gaussian_kde(edges[~edges["same_party"]]["nij"])
    return edges, sp_pdf, cp_pdf

def find_intersection(kde1, kde2, init_interval=0.01, scope=[0.3,1], convergence=0.0001):
    x_left, x_right = scope[0], scope[0] + init_interval
    while x_right < scope[1]:
        left, right = kde1(x_left)[0] - kde2(x_left)[0], kde1(x_right)[0] - kde2(x_right)[0]
        if left * right < 0:
            if init_interval <= convergence:
                return x_right
            return find_intersection(kde1, kde2, init_interval / 10, [x_left, x_right])
        x_left, x_right = x_right, x_right + init_interval
    return scope[0]

In [5]:
congresses = ['01_05','05_07','07_11','11_15','15_19','19_22']
for congress in congresses:
    input_votes = f"Data/Denmark/Raw/P{congress}_DK.csv"
    try:
        votes_df = pd.read_csv(input_votes)
        edges_df = make_all_edges(votes_df)
        edges_df, sp_pdf, cp_pdf = make_pdfs(edges_df, votes_df)

        x_values = np.linspace(0, 1, 1000)
        y_sp = sp_pdf(x_values)
        y_cp = cp_pdf(x_values)
        y_intersection = np.minimum(y_sp, y_cp)

        plt.figure(figsize=(8, 5))
        plt.plot(x_values, y_sp, label="Same-party", color="black")
        plt.plot(x_values, y_cp, label="Cross-party", color="gray")
        plt.xlabel("Normalized co-vote count")
        plt.ylabel("Density")
        plt.title(f"Density distributions of co-vote counts by party type for {congress}th Congress Denmark")
        plt.legend()
        plt.tight_layout()

        output_path = f"Images/Denmark_Congress_{congress}_density_plot.png"
        plt.savefig(output_path)
        plt.close()

        print(f"Saved: {output_path}")
    except Exception as e:
        print(f"Error processing Congress {congress}: {e}")

  df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Denmark_Congress_01_05_density_plot.png


  df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Denmark_Congress_05_07_density_plot.png


  df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Denmark_Congress_07_11_density_plot.png


  df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Denmark_Congress_11_15_density_plot.png


  df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Denmark_Congress_15_19_density_plot.png


  df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Saved: Images/Denmark_Congress_19_22_density_plot.png


In [6]:
def save_network(edges, congress, threshold, output_folder):
    edges = edges[edges["nij"] > threshold]
    edges = edges[["src", "trg"]].astype(int)
    edges.columns = ["Source", "Target"]
    edges_output = os.path.join(output_folder, f"congress_{congress}_edges.csv")
    edges.to_csv(edges_output, sep=",", index=False, header=True)
    
    print(f"Network saved: {edges_output}")

def process_congresses(congress_list, input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    for congress in congress_list:
        print(f"Processing Congress {congress}...")
        input_votes = os.path.join(input_folder, f"P{congress}_DK.csv")
        
        if not os.path.exists(input_votes):
            print(f"Warning: Data file for Congress {congress} not found, skipping.")
            continue

        # Load data
        votes_df = pd.read_csv(input_votes)

        # Create edgelist
        edges_df = make_all_edges(votes_df)

        # Generate PDFs
        edges_df, sp_pdf, cp_pdf = make_pdfs(edges_df, votes_df)

        # Compute intersection (threshold)
        threshold = find_intersection(sp_pdf, cp_pdf)

        # Save network
        save_network(edges_df, congress, threshold, output_folder)

# List of congress numbers
congresses = ['01_05','05_07','07_11','11_15','15_19','19_22','22_present']

# Run the process
input_folder = "Data/Denmark/Raw"
output_folder = "Data/Denmark/Micheles/"

process_congresses(congresses, input_folder, output_folder)

Processing Congress 01_05...


  df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/Denmark/Micheles/congress_01_05_edges.csv
Processing Congress 05_07...


  df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/Denmark/Micheles/congress_05_07_edges.csv
Processing Congress 07_11...


  df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/Denmark/Micheles/congress_07_11_edges.csv
Processing Congress 11_15...


  df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/Denmark/Micheles/congress_11_15_edges.csv
Processing Congress 15_19...


  df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/Denmark/Micheles/congress_15_19_edges.csv
Processing Congress 19_22...


  df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/Denmark/Micheles/congress_19_22_edges.csv
Processing Congress 22_present...


  df = df.groupby(by = ["afstemningid", "typeid_x"]).apply(lambda x: pd.DataFrame(list(combinations(x["aktørid"], 2)))) # "combinations" makes all possible pairs of icpsr codes for every vote value


Network saved: Data/Denmark/Micheles/congress_22_present_edges.csv


In [7]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

congresses = ['01_05', '05_07', '07_11', '11_15', '15_19', '19_22', '22_present']

# Fixed color mapping for all unique parties
party_colors = {
    "Alternativet": "#66c2a5",
    "Borgernes Parti": "#d53e4f",
    "Danmarksdemokraterne": "#1f78b4",
    "Dansk Folkeparti": "#a6cee3",
    "Det Konservative Folkeparti": "#33a02c",
    "Enhedslisten": "#e31a1c",
    "Folkaflokkurin": "#fb9a99",
    "Frie Grønne": "#b2df8a",
    "Inuit Ataqatigiit": "#fdbf6f",
    "Javnaðarflokkurin": "#ff7f00",
    "Kristeligt Folkeparti": "#cab2d6",
    "Kristendemokraterne": "#6a3d9a",
    "Liberal Alliance": "#ffff99",
    "Moderaterne": "#b15928",
    "Nunatta Qitornai": "#8dd3c7",
    "Ny Alliance": "#ffffb3",
    "Radikale Venstre": "#bebada",
    "Sambandsflokkurin": "#fb8072",
    "Siumut": "#80b1d3",
    "Socialdemokratiet": "#e41a1c",
    "Socialistisk Folkeparti": "#4daf4a",
    "Tjóðveldi": "#ff69b4",
    "Tjóðveldisflokkurin": "#f781bf",
    "Uden for folketingsgrupperne": "#999999",
    "Venstre": "#377eb8"
}

for congress in congresses:
    # Load edge and node data
    edge_path = f"data/Denmark/Micheles/congress_{congress}_edges.csv"
    node_path = f"data/Denmark/Raw/P{congress}_DK.csv"

    edges_df = pd.read_csv(edge_path)
    nodes_df = pd.read_csv(node_path)

    # Create graph from edgelist
    G = nx.from_pandas_edgelist(edges_df, source='Source', target='Target')

    # Map node IDs to their party
    party_dict = nodes_df.set_index("aktørid")["party"].to_dict()

    # Generate color map for nodes
    color_map = [party_colors.get(party_dict.get(node, None), "#cccccc") for node in G.nodes()]

    # Determine which parties are present in this graph
    present_parties = set(party_dict.get(node, None) for node in G.nodes())
    present_parties = {party for party in present_parties if party in party_colors}

    # Spring layout for visualization
    pos = nx.spring_layout(G, seed=42)

    # Draw graph
    plt.figure(figsize=(12, 12))
    nx.draw_networkx_nodes(G, pos, node_color=color_map, node_size=30)
    nx.draw_networkx_edges(G, pos, alpha=0.3, width=0.5)
    plt.title(f"Danish Parliament {congress} Network", fontsize=14)
    plt.axis("off")

    # Build legend dynamically for only the present parties
    legend_elements = [
        Line2D([0], [0], marker='o', color='w', label=party,
               markerfacecolor=party_colors[party], markersize=10)
        for party in sorted(present_parties)
    ]
    plt.legend(handles=legend_elements, title="Party", loc='lower left', fontsize='small', frameon=True)

    # Save figure
    out_path = f"Images/Denmark_Networks_paper/dk_congress_{congress}_network.png"
    plt.savefig(out_path, dpi=300, bbox_inches='tight')
    plt.close()

