In [34]:
import pandas as pd
import networkx as nx
from modules import ps

In [35]:
# Suppress warnings
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

In [36]:
G = nx.read_edgelist("USA_95_edge_list.csv", delimiter=",")

num_components = nx.number_connected_components(G)

# Get the connected components (as a list of sets of nodes)
connected_components = list(nx.connected_components(G))

# Print the nodes in each connected component
for i, component in enumerate(connected_components, 1):
    print(f"Component {i}: {component}")

Component 1: {'8672', '13013', '14271', '14222', '10606', '14265', '3078', '10739', '10553', '10713', '14229', '13032', '10726', '10573', '14287', '14016', '14415', '10749', '94454', '10625', '14439', '2525', '14239', '14243', '13008', '10599', '11063', '14242', '14268', '12037', '14039', '10710', '14270', '6845', '14257', '267', '10756', '14237', '13004', '9551', '14060', '10760', '14062', '14407', '14204', '14285', '14001', '14403', '13031', '5495', '14225', '14430', '11051', '3553', '14255', '10700', '14003', '14035', '14208', '14201', '6756', '14269', '14250', '2513', '14246', '14002', '10768', '14432', '10732', '14045', '10621', '14028', '12025', '13045', '10574', '10545', '10567', '10706', '13024', '14235', '14447', '6936', '14216', '13026', '14245', '14015', '13035', '14233', '11055', '14253', '14071', '8866', '14463', '14202', '14241', '1265', '12035', '99906', '14275', '12019', '14260', '14288', '7343', '14061', '12027', '14291', '14280', '11058', '12003', '14211', '14212', '1

In [37]:
from scipy.sparse import csr_matrix
edge_list_file = "USA_95_edge_list.csv"  # Replace with your actual file
vote_matrix_file = "sparse_matrix_us_95.csv"  # Replace with your actual file
sparse_df = pd.read_csv(vote_matrix_file, index_col=0)  # Assuming first column is an index
sparse_matrix_us = csr_matrix(sparse_df.values)  # Convert to sparse matrix

In [38]:
from sklearn.metrics import pairwise_distances
from sklearn.manifold import MDS
# Compute pairwise distances using cosine similarity
dist_matrix = pairwise_distances(sparse_matrix_us.toarray(), metric='cosine')

# Apply MDS with 2 components
n_components = 2
mds = MDS(n_components=n_components, dissimilarity="precomputed", normalized_stress='auto', random_state=42)
mds_transformed = mds.fit_transform(dist_matrix)

# Extract the first component
raw_values = mds_transformed[:, 0]

# Standardize to range [-1, 1]
min_val, max_val = raw_values.min(), raw_values.max()
standardized_values = 2 * (raw_values - min_val) / (max_val - min_val) - 1

# Store as dictionary with row indices as keys
opinions_x = {index: value for index, value in zip(sparse_df.index, mds_transformed[:, 0])}
opinions_x

{267: 0.02477234846428479,
 633: -0.02633667594888119,
 830: 0.08474330180554274,
 835: 0.2804496694738713,
 948: 0.12806642373132116,
 1077: 0.13503944261745682,
 1087: -0.22204523307484136,
 1265: 0.18506938655507113,
 1282: -0.06039532198226656,
 1610: -0.16662715907506948,
 2009: -0.05228096983489422,
 2476: 0.13559215444119102,
 2513: 0.442644368782808,
 2525: -0.20613112771615955,
 2533: -0.24313932468194438,
 2594: 0.16192686568315467,
 2605: 0.26174470820473816,
 3078: 0.1375438026379007,
 3224: 0.13615460204095803,
 3242: -0.01377073072910687,
 3321: -0.04606777549699499,
 3553: 0.27161098616561713,
 4957: 0.184326159295119,
 5101: 0.09410755492307488,
 5495: -0.20759069100206606,
 5924: 0.06090409030920346,
 6240: 0.14511657865182104,
 6455: -0.14578161958671534,
 6584: 0.10309252946965333,
 6661: 0.13491157508446786,
 6756: 0.33373655712758116,
 6845: 0.04923356478290302,
 6936: 0.296747582536401,
 7081: 0.8241737949731133,
 7333: 0.00447736258980321,
 7343: 0.08184779157745

In [None]:
def add_weights(edge_list_path, opinions):
    # Load the graph from the edge list
    G = nx.read_edgelist(edge_list_path, delimiter=",")
    # Compute edge weights based on the average opinions of connected nodes
    edge_weights = {
    e: (opinions[int(e[0])] + opinions[int(e[1])]) / 2
    for e in G.edges if int(e[0]) in opinions and int(e[1]) in opinions
}
    
    nx.set_edge_attributes(G, edge_weights, name="pol")

    # Save updated edgelist with weights
    weighted_edge_list_path = edge_list_path.replace(".csv", "_wpol.csv")
    nx.write_edgelist(G, weighted_edge_list_path, delimiter=",", data=["pol"])
    print(f"Weighted edgelist saved to {weighted_edge_list_path}")
    return weighted_edge_list_path
    
add_weights(edge_list_file, opinions_x)

Weighted edgelist saved to USA_95_edge_list_wpol.csv


## Polarization of pca on sparse matrix

In [40]:
# Function to compute polarization score
def calc_pol(edge_list_path, opinions):
    G = nx.read_edgelist(edge_list_path, delimiter=",")
    G = nx.relabel_nodes(G, lambda x: int(x))  # Convert all nodes to integers
    o = {int(node): opinions[int(node)] for node in G.nodes if int(node) in opinions}
    print("Filtered opinions:", o)  # Check if it's empty
    print("Graph has edges:", G.edges)  # Ensure the graph isn't empty
    import numpy as np

    opinion_values = list(o.values())
    print("Opinion Variance:", np.var(opinion_values))
    adjusted_opinions = {k: (v - np.mean(list(o.values()))) * 4 for k, v in o.items()}

    # Scale more extremely
    print("New Variance:", np.var(list(adjusted_opinions.values())))
    missing_opinions = [node for node in G.nodes if node not in o]
    print(f"Missing opinions for nodes: {missing_opinions}")

    return ps.ge(adjusted_opinions, {}, G)  

pol_score = calc_pol(edge_list_file, opinions_x)

print(f"Polarization score: {pol_score}")


Filtered opinions: {14404: 0.07290477990162574, 11023: 0.0445469015991457, 11007: -0.09274833575192301, 11037: -0.02895186545418769, 14219: -0.0460338149534036, 10788: 0.09046338825210933, 14276: -0.14813397026342764, 14410: -0.1560261584503136, 12014: -0.2568593053048343, 14467: 0.1993660384019166, 14026: 0.04036574995918072, 13048: 0.12234695027269502, 12038: 0.10199996863032958, 14016: 0.021851404533973818, 8032: -0.004172248994966094, 10446: -0.2608757935118161, 14451: 0.060065310666913584, 14429: 0.1323534694413563, 14227: -0.1758952486763769, 10587: 0.06454834548359409, 14455: 0.6983453440973869, 14000: -0.12745096169577344, 14259: -0.1316733384221094, 14004: -0.22309626751825948, 14234: 0.009103654315949816, 14061: -0.11611747794465173, 10625: 0.050806128598398906, 13027: 0.11894501153750286, 8507: 0.03431564890109319, 10075: 0.08770629859255412, 8564: 0.11302379581918622, 11005: -0.037308158186893176, 11036: -0.1515588476998805, 5924: 0.06090409030920346, 14419: 0.1715662678315

## Polarization on the nominate score

In [53]:
edge_list_path = "USA_95_edge_list.csv"
G = nx.read_edgelist(edge_list_path, delimiter=",")
G = nx.relabel_nodes(G, lambda x: int(x))  # Convert nodes to integers

# Load the member data
members_df = pd.read_csv("data/USA/H095_members.csv")

# Drop rows where nominate_dim1 is missing (NaN)
members_df = members_df.dropna(subset=["nominate_dim2"])

# Convert ICPSR to integer
members_df["icpsr"] = members_df["icpsr"].astype(int)

# Create opinions_x dictionary {icpsr: nominate_dim1}
opinions_x = dict(zip(members_df["icpsr"], members_df["nominate_dim2"]))

# Ensure only nodes present in the graph are included
opinions = {node: opinions_x[node] for node in G.nodes if node in opinions_x}
# Standardize opinions between -1 and 1 using Min-Max Scaling
min_opinion = min(opinions.values())
max_opinion = max(opinions.values())

opinions = {
    k: 2 * (v - min_opinion) / (max_opinion - min_opinion) - 1
    for k, v in opinions.items()
}


In [54]:
import numpy as np
# Check how many nodes are missing
missing_nodes = set(G.nodes) - set(opinions.keys())
print(f"Missing opinions for {len(missing_nodes)} nodes: {missing_nodes}")

# Standardize opinions to mean 0
mean_opinion = np.mean(list(opinions.values()))
adjusted_opinions = {k: (v - mean_opinion) * 4 for k, v in opinions.items()}

# Compute polarization score
polarization_score = ps.ge(adjusted_opinions, {}, G)
print(f"Polarization Score: {polarization_score}")

Missing opinions for 0 nodes: set()
Polarization Score: 5.595624379678832
