In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import os
from matplotlib.colors import ListedColormap, Normalize
import numpy as np

## USA Statistics Per Party

In [18]:
congresses =  ['095', '096', '097','098', '099', '100', '101', '102', '103','104',
               '105', '106', '107','108', '109', '110', '111', '112','113', '114',
               '115', '116','117','118'] 

input_folder = "Data/USA/Filtered"
output_folder = "Images/heatmaps_party_agreement_USA"
os.makedirs(output_folder, exist_ok=True)

for congress in congresses:
    file_path = f"{input_folder}/H{congress}_filtered_USA_votes.csv"
    df = pd.read_csv(file_path)

    # Only keep 'yes' and 'no' votes from two main parties
    df = df[df["cast_code"].isin([1, 2])]
    df = df[df["party_code"].isin([100, 200])]

    vote_counts = df.groupby(["party_code", "rollnumber", "cast_code"]).size().unstack(fill_value=0)
    vote_counts.columns = ["no_votes", "yes_votes"]
    vote_counts["total_votes"] = vote_counts["no_votes"] + vote_counts["yes_votes"]
    vote_counts["agreement_rate"] = vote_counts[["no_votes", "yes_votes"]].max(axis=1) / vote_counts["total_votes"]

    pivot_data = vote_counts.reset_index().pivot_table(
        index="rollnumber", columns="party_code", values="agreement_rate", fill_value=0
    )

    # Create a masked array to handle zeros
    masked_data = pivot_data.copy()
    mask = masked_data == 0

    # Create a colormap where values == 0 appear black
    cmap = sns.color_palette("coolwarm", as_cmap=True)
    cmap = cmap(np.linspace(0, 1, 256))
    cmap[0] = [0, 0, 0, 1]  # Set the first (lowest) color to black
    custom_cmap = ListedColormap(cmap)

    # Normalize the data, ensuring 0 maps to index 0 in colormap
    from matplotlib.colors import Normalize
    norm = Normalize(vmin=0, vmax=1)

    # Plot and save
    plt.figure(figsize=(12, 6))
    sns.heatmap(pivot_data, cmap=custom_cmap, norm=norm, cbar=True)
    plt.title(f"Party Agreement Rate by Roll Call – {congress}th Congress")
    plt.xlabel("Party Code")
    plt.ylabel("Roll Number")

    output_path = os.path.join(output_folder, f"agreement_heatmap_congress{congress}.png")
    plt.savefig(output_path, bbox_inches='tight')
    plt.close()

## Danish Statistics Per Party

In [17]:
congresses = ['01_05','05_07','07_11','11_15','15_19','19_22']
input_folder = "Data/Denmark/Raw"
output_folder = "Images/heatmaps_party_agreement_danish"
os.makedirs(output_folder, exist_ok=True)

for congress in congresses:
    input_votes = f"{input_folder}/P{congress}_DK.csv"
    df = pd.read_csv(input_votes)

    # Filter only Yes (1) and No (2) votes
    df = df[df["typeid_x"].isin([1, 2])]

    # Group and count votes
    vote_counts = df.groupby(["party", "afstemningid", "typeid_x"]).size().unstack(fill_value=0)

    # Rename columns
    vote_counts.columns = ["no_votes", "yes_votes"]

    # Calculate totals and agreement rates
    vote_counts["total_votes"] = vote_counts["no_votes"] + vote_counts["yes_votes"]
    vote_counts["agreement_rate"] = vote_counts[["no_votes", "yes_votes"]].max(axis=1) / vote_counts["total_votes"]

    # Pivot data for heatmap
    pivot_data = vote_counts.reset_index().pivot_table(
        index="afstemningid", columns="party", values="agreement_rate", fill_value=0
    )

    # Create a custom colormap: 0 values -> black, rest -> coolwarm
    base_cmap = sns.color_palette("coolwarm", as_cmap=True)
    base_array = base_cmap(np.linspace(0, 1, 256))
    base_array[0] = [0, 0, 0, 1]  # Set first color (0) to black
    custom_cmap = ListedColormap(base_array)

    norm = Normalize(vmin=0, vmax=1)

    # Plot and save
    plt.figure(figsize=(12, 6))
    sns.heatmap(pivot_data, cmap=custom_cmap, norm=norm, cbar=True)
    plt.title(f"Party Agreement Rate by Roll Call – Denmark {congress}")
    plt.xlabel("Party")
    plt.ylabel("Roll Number")

    output_path = os.path.join(output_folder, f"agreement_heatmap_dk_{congress}.png")
    plt.savefig(output_path, bbox_inches='tight')
    plt.close()


## Networks Statistics

In [38]:
USA_edgelist = pd.read_csv("USA_edgelist.csv")  
Denmark_edgelist = pd.read_csv("Denmark_edgelist.csv")
G_USA = nx.from_pandas_edgelist(USA_edgelist, source="Source", target="Target")
G_Denmark = nx.from_pandas_edgelist(Denmark_edgelist, source="Source", target="Target")

In [40]:

# Function to compute network statistics
def network_stats(G, name):
    print(f"\n=== {name} Network Statistics ===")
    print(f"Number of Nodes: {G.number_of_nodes()}")
    print(f"Number of Edges: {G.number_of_edges()}")
    print(f"Density: {nx.density(G):.4f}")

    # Degree Centrality
    degree_centrality = nx.degree_centrality(G)
    top_degrees = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print(f"Top 5 Most Connected Nodes (Degree Centrality): {top_degrees}")

    # Betweenness Centrality
    betweenness_centrality = nx.betweenness_centrality(G)
    top_betweenness = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print(f"Top 5 Most Influential Nodes (Betweenness Centrality): {top_betweenness}")

    # Eigenvector Centrality (Who is connected to important nodes?)
    eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=1000)
    top_eigenvector = sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print(f"Top 5 Most Influential Nodes (Eigenvector Centrality): {top_eigenvector}")

    # Clustering Coefficient (Measures local connectedness)
    avg_clustering = nx.average_clustering(G)
    print(f"Average Clustering Coefficient: {avg_clustering:.4f}")

# Compute statistics for both networks
network_stats(G_USA, "USA")
network_stats(G_Denmark, "Denmark")



=== USA Network Statistics ===
Number of Nodes: 442
Number of Edges: 27113
Density: 0.2782
Top 5 Most Connected Nodes (Degree Centrality): [(21993, 0.4557823129251701), (21948, 0.4557823129251701), (21375, 0.45351473922902497), (20519, 0.45351473922902497), (22161, 0.45124716553287986)]
Top 5 Most Influential Nodes (Betweenness Centrality): [(21993, 0.4991629862342669), (21718, 0.4965380768296815), (22383, 0.3934476466575691), (22334, 0.05156348527223958), (21367, 0.039756511082800726)]
Top 5 Most Influential Nodes (Eigenvector Centrality): [(21948, 0.0775347192328533), (22368, 0.07736423889976839), (21375, 0.07736084091397669), (31101, 0.07733350883193064), (21508, 0.07726795923561838)]
Average Clustering Coefficient: 0.8423

=== Denmark Network Statistics ===
Number of Nodes: 220
Number of Edges: 6657
Density: 0.2763
Top 5 Most Connected Nodes (Degree Centrality): [(12, 0.5525114155251141), (217, 0.5114155251141552), (20384, 0.5068493150684932), (20382, 0.502283105022831), (172, 0.5