In [1]:
import os, datetime, json, pandas as pd, pprint 

# Save and tag created versions of the data.
today = datetime.datetime.now().strftime("%y%m%d")

# Intermediate json directory
jdir = "data/c4_redux/"

# Raw S3 directory -- CSV files
cdir = "/media/jsaxon/brobdingnag/data/s3/res/"

os.makedirs(jdir + today, exist_ok = True)


# This function calculates/updates election outcomes 
# by merging tract to distrct assignments from simulation
# with tract-level voting data saved to voting/votes/*csv
def update_election_json(s, vote_mod = "", cut_mod = ""):
    
    # Options for modified files -- NC race and TX senate.
    if vote_mod: vote_mod = "_" + vote_mod
    if cut_mod:  cut_mod  = "_" + cut_mod

    print (s + vote_mod, "::", end = " ")

    # Pre-load all voting data for this state.
    votes = pd.read_csv("voting/votes/{}{}.csv".format(s, vote_mod), index_col = "rn")
    
    # Use that file to figure out the available years: e.g., D08 --> 2008.
    years = set(votes.columns.str.replace("R", "").str.replace("D", "").astype(int) + 2000)

    # Prepare the output directory.
    json_output = open(jdir + "/{}/{}{}{}_redux.json".format(today, s, cut_mod, vote_mod), "w")

    # This function will read in a json file of simulated districts.
    with open(jdir + "/{}{}_redux.json".format(s, cut_mod)) as fi:
        for li, line in enumerate(fi):

            if not li % 1000: print(li//1000, end = " ", flush = True)
            j = json.loads(line)

            # For each line, it uses the UID to access the appropriate CSV file from S3.
            fcsv = cdir + j["UID"] + "/final.csv"

            # It reads in this data, and join this with votes -- 
            #   the state's voting returns, at the tract level.
            # Aggregate by district to get vote and sat shares.
            df = pd.read_csv(fcsv, names = ["rn", "cd"], index_col = "rn")
            df = df.join(votes).groupby("cd").sum()

            DemSeats, RepFrac = {}, []
            for y in years:

                y = "{:02d}".format(y % 100)

                # Democratic Seats -- where Dems got more votes than Reps.
                DemSeats["20" + y] = (df["D" + y] > df["R" + y]).sum()

                # Total 2-party votes cast, per district...
                # This is for the N seats tables.
                df["T" + y] = df["D" + y] + df["R" + y]
                
                # Vote shares per race, over all seats (and years).
                # This is for the competitiveness table.
                RepFrac.extend(list((df["R" + y] / df["T" + y]).round(3)))


            # Prepare the dictionary
            jout = {"UID"                 : j["UID"],
                    "Score"               : round(j["Score"], 3),
                    "PopulationDeviation" : round(j["PopulationDeviation"], 3),
                    "DemSeats" : DemSeats, "RepFrac": RepFrac}

            # Write to file as json.
            json_output.write(str(jout).replace("'", '"') + "\n")

    json_output.close()
    
    print()
    

In [2]:
pol_states = ['fl', 'il', 'la', 'md', 'mn', 'nc', 'pa', 'tn', 'tx', 'va', 'wi']

for s in pol_states: update_election_json(s)

fl :: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 
il :: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 
la :: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 
md :: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 
mn :: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 
nc :: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 
pa :: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 
tn :: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 
tx :: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 
va :: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 
wi :: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22

In [3]:
update_election_json("nc", cut_mod = "race")
update_election_json("tx", vote_mod = "senate")

nc :: 0 1 2 3 4 5 6 7 8 9 10 11 12 
tx_senate :: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 
