In [11]:
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 20 10:57:47 2023

@author: robert
"""


import json
import pandas as pd
import os
import helper_methods as hm



def summarize_soln(folder_path, json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    # Get the state abbreviation from the file name
    state = os.path.basename(json_path)[:2].lower()

    # Construct the paths to the CSV files
    voter_data_2016_path = f'../data/county/PartisanData/{state}_cnty_census_2020_voter_data_2016.csv'
    voter_data_2020_path = f'../data/county/PartisanData/{state}_cnty_census_2020_voter_data_2020.csv'
    demographics_path = f'../data/county/PartisanData/{state}_cnty_census_2020_voter_data_2020_summarized.csv'

    # Define the columns that should be read as integers
    int_cols_2016 = ['R16', 'D16', 'L16']
    int_cols_2020 = ['R20', 'D20', 'L20']

    # Create a dictionary to hold the total demographics and voter data for each district
    district_totals = {}

    # Loop over the nodes in the JSON file
    for node in data['nodes']:
        # Get the district number for this node
        district = node['district']

        # Get the GEOID for this node
        geoid = node['GEOID']

        # If this district is not in the district_totals dictionary yet, add it
        if district not in district_totals:
            district_totals[district] = {
                'POP': 0,
                'BPOP': 0,
                'VAP': 0,
                'BVAP': 0,
                'BVAP_TOT': 0,
                'HVAP': 0,
                'ratio_BVAP': 0,
                'ratio_HVAP': 0,
                'R16': 0,
                'D16': 0,
                'L16': 0,
                'R20': 0,
                'D20': 0,
                'L20': 0,
                'Competiveness':0,
                'Compactness':0
            }

        # Read in the 2016 voter data CSV file and search for the row with the matching GEOID
        voter_data_2016 = pd.read_csv(voter_data_2016_path, dtype={col: float for col in int_cols_2016})
        voter_data_2016 = voter_data_2016.set_index('GEOID')
        if geoid in voter_data_2016.index:
            row = voter_data_2016.loc[geoid]
            district_totals[district]['R16'] += row['R16']
            district_totals[district]['D16'] += row['D16']
            district_totals[district]['L16'] += row['L16']

        # Read in the 2020 voter data CSV file and search for the row with the matching GEOID
        voter_data_2020 = pd.read_csv(voter_data_2020_path, dtype={col: float for col in int_cols_2020})
        voter_data_2020 = voter_data_2020.set_index('GEOID')
        if geoid in voter_data_2020.index:
            row = voter_data_2020.loc[geoid]
            district_totals[district]['R20'] += row['R20']
            district_totals[district]['D20'] += row['D20']
            district_totals[district]['L20'] += row['L20']


        # Read in the demographics CSV file and search for the row with the matching GEOID
        #demographics_path = f'../data/county/Demos/{state}_bg_demographics_2020.csv'
        demographics = pd.read_csv(demographics_path)
        demographics = demographics.set_index('GEOID')

        if geoid in demographics.index:
            row = demographics.loc[geoid]
            # Add the demographic data to the total for this district
            district_totals[district]['POP'] += row['POP']
            district_totals[district]['BPOP'] += row['BPOP']
            district_totals[district]['VAP'] += row['VAP']
            district_totals[district]['BVAP'] += row['BVAP']
            district_totals[district]['BVAP_TOT'] += row['BVAP_TOT']
            district_totals[district]['HVAP'] += row['HVAP']

    districts = district_totals.keys()

    for district in districts:
        district_totals[district]['ratio_BVAP'] += district_totals[district]['BVAP'] / district_totals[district]['VAP']
        district_totals[district]['ratio_HVAP'] += district_totals[district]['HVAP'] / district_totals[district]['VAP']

        district_totals[district]['BVAP_reps'] = hm.calculate_black_rep(district_totals[district]['ratio_BVAP'])
        district_totals[district]['Dem_PVI'] = hm.pvi(district_totals[district])
        district_totals[district]['Dem_reps'] = hm.calculate_dem_prob_value(district_totals[district]['Dem_PVI'])
        district_totals[district]['Rep_reps'] = hm.calculate_rep_prob_value(district_totals[district]['Dem_PVI'])

    district_totals['Totals'] = {'BVAP_reps':sum(district_totals[district]['BVAP_reps'] for district in districts),
                                 'Dem_reps':sum(district_totals[district]['Dem_reps'] for district in districts),
                                 'Rep_reps':sum(district_totals[district]['Rep_reps'] for district in districts)}

    # Write the district_totals dictionary to a JSON file
    with open(json_path[:-5] + '_summarized_data.json', 'w') as f:
        json.dump(district_totals, f, indent=4)
        
    return district_totals
district_totals = summarize_soln(folder_path, json_path)

In [16]:
pd.DataFrame.from_dict(district_totals, orient='index')

Unnamed: 0,POP,BPOP,VAP,BVAP,BVAP_TOT,HVAP,ratio_BVAP,ratio_HVAP,R16,D16,L16,R20,D20,L20,Competiveness,Compactness,BVAP_reps,Dem_PVI,Dem_reps,Rep_reps
0,803084.0,117926.0,629679.0,89262.0,184227.0,48202.0,0.141758,0.07655,205079.556853,101727.907512,8803.548139,242630.0,137248.0,5653.0,0.0,0.0,0.031488,-17.046793,0.000192,0.999808
1,672586.0,123159.0,537662.0,92355.0,190013.0,30922.0,0.171771,0.057512,156978.507443,89325.256076,5325.394202,203300.778904,126679.419261,3015.417476,0.0,0.0,0.049014,-14.361848,0.001386,0.998614
2,765446.0,144620.0,595591.0,108679.0,222985.0,30955.0,0.182473,0.051974,208944.932854,99714.821632,7494.190756,243037.12026,128183.24052,4429.134345,0.0,0.0,0.056888,-18.272015,7e-05,0.99993
3,762319.0,146770.0,583282.0,109767.0,225246.0,35751.0,0.188189,0.061293,181913.584927,101492.614173,6952.969066,224638.433352,137852.642013,4186.96288,0.0,0.0,0.061485,-14.769518,0.001046,0.998954
4,795496.0,172771.0,631354.0,131180.0,268745.0,43160.0,0.207776,0.068361,162365.492557,153052.743924,11597.605798,198830.221096,207311.580739,5514.582524,0.0,0.0,0.079459,-1.906058,0.345649,0.654351
5,659423.0,285939.0,517860.0,217520.0,442929.0,26643.0,0.420036,0.051448,104774.510293,157734.270856,5642.261105,117492.87974,188271.75948,2980.865655,0.0,0.0,0.51602,9.140657,0.971565,0.028435
6,660081.0,289348.0,519040.0,222581.0,450661.0,15491.0,0.428832,0.029845,135332.415073,152325.385827,3388.030934,155173.566648,165994.357987,2136.03712,0.0,0.0,0.53991,0.629139,0.55214,0.44786
Totals,,,,,,,,,,,,,,,,,1.334265,,1.872047,5.127953


In [12]:
import os
import fnmatch

def run_summarize():
    folder_path = ".." # specify the parent directory of the current directory
    for root, dirnames, filenames in os.walk(folder_path):
       # if not any('results' in dirname for dirname in dirnames):
           # continue
        for filename in filenames:
            if fnmatch.fnmatch(filename, '*.json') and 'summarize' not in filename:
                json_path = os.path.join(root, filename)
                if 'results-'in json_path or 'results_'in json_path:
                    if 'results\\results' not in json_path:
                        print(json_path)
                        summarize_soln(folder_path, json_path)
run_summarize()                

..\results-ALMSLASC_county_step-ordered\AL-county-step-ordered-25.json
..\results-ALMSLASC_county_step-ordered\AL-county-step-ordered-40.json
..\results-ALMSLASC_county_step-ordered\AL-county-step-ordered-90.json
..\results-ALMSLASC_county_step-ordered\LA-county-step-ordered-25.json
..\results-ALMSLASC_county_step-ordered\LA-county-step-ordered-40.json
..\results-ALMSLASC_county_step-ordered\LA-county-step-ordered-90.json
..\results-ALMSLASC_county_step-ordered\MS-county-step-ordered-25.json
..\results-ALMSLASC_county_step-ordered\MS-county-step-ordered-40.json
..\results-ALMSLASC_county_step-ordered\MS-county-step-ordered-90.json
..\results-ALMSLASC_county_step-ordered\SC-county-step-ordered-25.json
..\results-ALMSLASC_county_step-ordered\SC-county-step-ordered-40.json
..\results-ALMSLASC_county_step-ordered\SC-county-step-ordered-90.json
..\results-ALMSLA_county_step-expmax\AL-county-step-exp-10.json
..\results-ALMSLA_county_step-expmax\AL-county-step-exp-15.json
..\results-ALMSLA_co

KeyboardInterrupt: 

In [1]:
import os
import fnmatch
import json
import csv

def extract_summaries():
    folder_path = ".." # specify the parent directory of the current directory
    
    # create a set of the unique first two letters
    state_set = set()
    for root, dirnames, filenames in os.walk(folder_path):
        for filename in filenames:
            if fnmatch.fnmatch(filename, '*_summarized_data.json'):
                
                state = filename[:2]
                state_set.add(state)

    # create separate sorted files for each unique state and value (BVAP, Dem, Reps)
    for state in state_set:
        bvap_list = []
        dem_list = []
        rep_list = []
        for root, dirnames, filenames in os.walk(folder_path):
            for filename in filenames:
                if fnmatch.fnmatch(filename, '*_summarized_data.json') and filename[:2] == state:
                    base_filename = filename.replace('_summarized_data.json', '')
                    full_path = os.path.join(root, filename).replace('_summarized_data.json', '.json')
                    with open(os.path.join(root, filename), 'r') as f:
                        data = json.load(f)
                    if 'Totals' in data.keys():
                        bvap = data['Totals']['BVAP_reps']
                        dem = data['Totals']['Dem_reps']
                        rep = data['Totals']['Rep_reps']
                    bvap_list.append((full_path, base_filename, bvap, dem, rep))
                    dem_list.append((full_path, base_filename, bvap, dem, rep))
                    rep_list.append((full_path, base_filename, bvap, dem, rep))

        bvap_list.sort(key=lambda x: x[2], reverse=True)
        dem_list.sort(key=lambda x: x[3], reverse=True)
        rep_list.sort(key=lambda x: x[4], reverse=True)
        
        with open(f'{state}_bvap_sorted.csv', 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['full_path','filename', 'BVAP_reps', 'Dem_reps', 'Rep_reps'])
            for full_path, base_filename, bvap, dem, rep in bvap_list:
                writer.writerow([full_path, base_filename, bvap, dem, rep])
        
        with open(f'{state}_dem_sorted.csv', 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['full_path','filename', 'BVAP_reps', 'Dem_reps', 'Rep_reps'])
            for full_path, base_filename, bvap, dem, rep in dem_list:
                writer.writerow([full_path, base_filename, bvap, dem, rep])
        
        with open(f'{state}_rep_sorted.csv', 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['full_path','filename', 'BVAP_reps', 'Dem_reps', 'Rep_reps'])
            for full_path, base_filename, bvap, dem, rep in rep_list:
                writer.writerow([full_path, base_filename, bvap, dem, rep])


In [2]:
extract_summaries()