In [72]:
import pandas as pd
import numpy as np
import utils
from scipy.sparse import csr_matrix

In [73]:
name = "H113"

members = pd.read_csv(f"data/USA/Raw/{name}_members.csv")
votes = pd.read_csv(f"data/USA/Raw/{name}_votes.csv")


members['icpsr'] = members['icpsr'].astype('Int64')
votes['icpsr'] = votes['icpsr'].astype('Int64')

merged_118 = votes.merge(members, on='icpsr')
merged_118 = merged_118[["icpsr", "state_abbrev", "party_code", "cast_code", "rollnumber", "nominate_dim1", "nominate_dim2"]]
merged_118["cast_code"] = merged_118["cast_code"].apply(lambda x: 1 if 1 <= x <= 3 else (2 if 4 <= x <= 6 else x))
merged_118 = merged_118.drop(merged_118[merged_118["cast_code"]== 7].index)
merged_118 = merged_118.drop(merged_118[merged_118["cast_code"]== 9].index)

merged_118.to_csv(f"data/USA/Filtered/{name}_filtered.csv")

USA_data = merged_118

In [74]:
US_PA,US_PAV,US_PAVP,US_PP = utils.dict_create(USA_data,'icpsr','party_code','rollnumber','cast_code')

In [75]:


# Step 1: Extract unique people (rows) and bills (columns)
people = list(US_PAV.keys())  # List of people
votes = set()  # Store the distinct votes/bills

for voted_bills in US_PAV.values():
    for bill, _ in voted_bills:
        votes.add(bill)

votes = sorted(votes)  # Sort bills to ensure consistent column order
vote_to_index = {vote: idx for idx, vote in enumerate(votes)}  # Mapping from bill to column index

# Step 2: Construct the data for the sparse matrix
data = []
row_indices = []
col_indices = []

for person, voted_bills in US_PAV.items():
    for bill, vote in voted_bills:  # Iterate over list of (bill, vote) pairs
        if vote == 1:
            value = 1  # Vote 1 → 0
        elif vote == 2:
            value = 2  # Vote 2 → 1
        else:
            continue  # Skip other votes

        row_indices.append(people.index(person))  # Person's row index
        col_indices.append(vote_to_index[bill])  # Bill's column index
        data.append(value)  # Store transformed vote value

# Step 3: Create the sparse matrix (CSR format)
num_people_us = len(US_PAV)  # Number of people
num_votes_us = len(votes)  # Number of bills
sparse_matrix_us = csr_matrix((data, (row_indices, col_indices)), shape=(num_people_us, num_votes_us))


In [76]:
np.savetxt(f"data/USA/Sparsematrix/{name}_sparse_matrix.csv", sparse_matrix_us.toarray(), delimiter=",", fmt="%d")