# Preparing County-Level Choropleth Map Data

To streamlit the Streamlit app's computations, we prepare the static data involved in the choropleth map.

In [1]:
import os
import pandas as pd
import numpy as np

import sys
sys.path.append("..") # Adds higher directory to python modules path.
from data_loaders import *

prefix = '../'
states_of_interest = ['GA', 'MA', 'WI']

## AP Performance Data from States of Interest

### Georgia

In [9]:
GA_raw = gimmeGA(prefix = prefix)
GA_raw.head()

Unnamed: 0,SCHOOL_DSTRCT_NM,TEST_CMPNT_TYP_NM,NUMBER_TESTS_TAKEN,NOTESTS_3ORHIGHER,Year,Pass Rate,County
0,Appling County,ALL Subjects,92.0,37.0,2019,0.402174,Appling
1,Appling County,Biology,12.0,7.0,2019,0.583333,Appling
2,Appling County,Eng. Literature & Comp,10.0,3.0,2019,0.3,Appling
3,Appling County,Geography: Human,30.0,7.0,2019,0.233333,Appling
4,Appling County,Psychology,23.0,13.0,2019,0.565217,Appling


In [10]:
GA_map_data = GA_raw[GA_raw['TEST_CMPNT_TYP_NM'] == 'ALL Subjects'][['County', 'Year', 'Pass Rate']]
GA_map_data['State_Abbreviation'] = 'GA'
GA_map_data[['County', 'State_Abbreviation', 'Year', 'Pass Rate']]

Unnamed: 0,County,State_Abbreviation,Year,Pass Rate
0,Appling,GA,2019,0.402174
5,Atkinson,GA,2019,0.216216
8,Baldwin,GA,2019,0.282609
11,Banks,GA,2019,0.432432
14,Barrow,GA,2019,0.468956
...,...,...,...,...
4574,Worth,GA,2022,0.351351
4576,Fulton,GA,2019,0.647980
4612,Fulton,GA,2020,0.508843
4644,Fulton,GA,2021,0.469993


### Massachusetts

In [11]:
MA_map_data

NameError: name 'MA_map_data' is not defined

### Wisconsin

In [12]:
WI_map_data

NameError: name 'WI_map_data' is not defined

## County-level Income Data

Reference: U.S. Bureau of Economic Analysis. Personal Income by County, Metro, and Other Areas. [CAINC1](https://apps.bea.gov/regional/zip/CAINC1.zip). 

In [13]:
incomes = gimmeCountyIncomes(prefix = prefix)
incomes = incomes[incomes['State_Abbreviation'].isin(states_of_interest)]
incomes.head()

Unnamed: 0,County,State_Abbreviation,2018,2019,2020,2021,2022
1277,Appling,GA,33662,35609,39134,42693,43602
1280,Atkinson,GA,29212,30449,32289,35210,36622
1283,Bacon,GA,31302,32293,37448,39057,39972
1286,Baker,GA,37763,40687,45061,50236,49696
1289,Baldwin,GA,33125,34522,37644,42144,42270


## Combining

In [None]:
# Georgia Income
def find_income_GA(county, state, year):
    if(county == 'Savannah-Chatham'):
        county = 'Chatham'
    elif(county == 'Griffin-Spalding'):
        county = 'Spalding'
    elif(county == 'Thomaston-Upson'):
        county = 'Upson'
    return float(incomes.loc[(incomes['County'] == county) & (incomes['State_Abbreviation'] == state)][str(year)].iloc[0])
GA_map_data['Income'] = GA_map_data.apply(lambda row: find_income_GA(row['County'], row['State_Abbreviation'], row['Year']), axis = 1)

# Massachusetts Income
def find_income_MA(county, state, year):
    return float(incomes.loc[(incomes['County'] == county) & (incomes['State_Abbreviation'] == state)][str(year)].iloc[0])
MA_map_data['Income'] = MA_map_data.apply(lambda row: find_income_MA(row['County'], row['State_Abbreviation'], row['Year']), axis = 1)

# Wisconsin Income
def find_income_WI(county, state, year):
    return float(incomes.loc[(incomes['County'] == county) & (incomes['State_Abbreviation'] == state)][str(year)].iloc[0])
WI_map_data['Income'] = WI_map_data.apply(lambda row: find_income_WI(row['County'], row['State_Abbreviation'], row['Year']), axis = 1)

In [None]:
# Concatenate all states of interest
states_map_data = pd.concat([GA_map_data, MA_map_data, WI_map_data])
states_map_data.to_csv('States_Counties_Map_Data.csv')