# County Election Data

- Contains a variaty of county-level variables from [here](https://github.com/evangambit/JsonOfCounties)

In [2]:
import numpy as np
import pandas as pd
import geopandas as gpd

cols = [ "fips", "noaa/prcp", "noaa/snow", "noaa/temp", "noaa/altitude", "noaa/temp-jan", "noaa/temp-apr", "noaa/temp-jul", "noaa/temp-oct", "race/non_hispanic_white_alone_male", "race/non_hispanic_white_alone_female", "race/black_alone_male", "race/black_alone_female", "race/asian_alone_male", "race/asian_alone_female", "race/hispanic_male", "race/hispanic_female", "age/0-4", "age/5-9", "age/10-14", "age/15-19", "age/20-24", "age/25-29", "age/30-34", "age/35-39", "age/40-44", "age/45-49", "age/50-54", "age/55-59", "age/60-64", "age/65-69", "age/70-74", "age/75-79", "age/80-84", "age/85+", "male", "female", "population/2019", "deaths/suicides", "deaths/firearm suicides", "deaths/homicides", "deaths/vehicle", "bls/2020/labor_force", "bls/2020/employed", "bls/2020/unemployed", "life-expectancy", "fatal_police_shootings/total-2020", "fatal_police_shootings/unarmed-2020", "fatal_police_shootings/firearmed-2020", "police_deaths", "avg_income", "elections/2020/total", "elections/2020/dem", "elections/2020/gop", "edu/less-than-high-school", "edu/high-school", "edu/some-college", "edu/bachelors+", "poverty-rate", "cost-of-living/living_wage", "cost-of-living/food_costs", "cost-of-living/medical_costs", "cost-of-living/housing_costs", "cost-of-living/tax_costs", "health/% Fair or Poor Health", "health/% Low birthweight", "health/% Smokers", "health/% Adults with Obesity", "health/% Physically Inactive", "health/% Children in Poverty", "health/80th Percentile Income", "health/20th Percentile Income"]

In [3]:
df = pd.read_csv("data/counties.csv", usecols=cols, dtype={"fips":str})

In [4]:
len(df.columns)

72

In [5]:
df['age-to-25'] = df[['age/0-4', 'age/5-9', 'age/10-14', 'age/15-19', 'age/20-24']].sum(axis=1)
df['age-25-65'] = df[['age/25-29', 'age/30-34', 'age/35-39', 'age/40-44', 'age/45-49', 'age/50-54', 'age/55-59', 'age/60-64']].sum(axis=1)
df['age-over-65'] = df[['age/65-69', 'age/70-74', 'age/75-79', 'age/80-84', 'age/85+']].sum(axis=1)

In [6]:
df['white'] = df[['race/non_hispanic_white_alone_male', 'race/non_hispanic_white_alone_female']].sum(axis=1)
df['black'] = df[['race/black_alone_male', 'race/black_alone_female']].sum(axis=1)
df['asian'] = df[['race/asian_alone_male', 'race/asian_alone_female']].sum(axis=1)
df['hispanic'] = df[['race/hispanic_male', 'race/hispanic_female']].sum(axis=1)

In [7]:
df['percentage_dem'] = (df['elections/2020/dem'] / df['elections/2020/total']) * 100

In [8]:
df = df.drop(columns=["race/non_hispanic_white_alone_male", "race/non_hispanic_white_alone_female", "race/black_alone_male", "race/black_alone_female", "race/asian_alone_male", "race/asian_alone_female", "race/hispanic_male", "race/hispanic_female", "age/0-4", "age/5-9", "age/10-14", "age/15-19", "age/20-24", "age/25-29", "age/30-34", "age/35-39", "age/40-44", "age/45-49", "age/50-54", "age/55-59", "age/60-64", "age/65-69", "age/70-74", "age/75-79", "age/80-84", "age/85+", 'health/% Low birthweight', 'deaths/firearm suicides', 'fatal_police_shootings/firearmed-2020', "elections/2020/total", "elections/2020/dem", "elections/2020/gop"])

In [9]:
column_mapping = {'edu/less-than-high-school':"cs_ed_below_highschool", 'edu/high-school':"cs_ed_highschool", 'edu/some-college':"cs_ed_some_college", 'edu/bachelors+':"cs_ed_above_college", 'cost-of-living/living_wage':'bls_living_wage', 'cost-of-living/food_costs':'bls_food_costs', 'cost-of-living/medical_costs':'bls_medical_costs', 'cost-of-living/housing_costs':'bls_housing_costs', 'cost-of-living/tax_costs':'bls_tax_costs', 'health/% Fair or Poor Health':'health_poor_health_pct', 'health/% Smokers':'health_smokers_pct', 'health/% Adults with Obesity':'health_obese_pct', 'health/% Physically Inactive':'health_phy_inactive_pct', 'health/% Children in Poverty':'health_children_poverty_pct', 'health/80th Percentile Income':'health_80th_perc_income_pct', 'health/20th Percentile Income':'health_20th_perc_income_pct', 'bls/2020/labor_force': 'bls_labor_force', 'bls/2020/employed': 'bls_employed', 'bls/2020/unemployed': 'bls_unemployed', 'noaa/prcp': 'noaa_prcp', 'noaa/snow': 'noaa_snow', 'noaa/temp': 'noaa_temp', 'noaa/altitude': 'noaa_altitude', 'noaa/temp-jan': 'noaa_temp_jan', 'noaa/temp-apr': 'noaa_temp_apr', 'noaa/temp-jul': 'noaa_temp_jul', 'noaa/temp-oct': 'noaa_temp_oct', 'male':'cs_male', 'female':'cs_female', 'population':'cs_population', 'white':'cs_white', 'black':'cs_black', 'asian':'cs_asian', 'hispanic':'cs_hispanic', 'age-to-25':'cs_age-to-25', 'age-25-65':'cs_age-25-65', 'age-over-65':'cs_age-over-65', 'percentage_dem':'election_dem_pct', 'deaths/suicides':"cdc_suicides", 'deaths/homicides':"cdc_homicides", 'deaths/vehicle':"cdc_vehicle_deaths", 'fatal_police_shootings/unarmed-2020':"cdc_police_deaths_unarmed", 'fatal_police_shootings/total-2020':"cdc_police_deaths_total"}

In [10]:
df = df.rename(columns=column_mapping)

In [11]:
len(df)

3142

In [12]:
df.columns

Index(['fips', 'noaa_prcp', 'noaa_snow', 'noaa_temp', 'noaa_altitude',
       'noaa_temp_jan', 'noaa_temp_apr', 'noaa_temp_jul', 'noaa_temp_oct',
       'cs_male', 'cs_female', 'population/2019', 'cdc_suicides',
       'cdc_homicides', 'cdc_vehicle_deaths', 'bls_labor_force',
       'bls_employed', 'bls_unemployed', 'life-expectancy',
       'cdc_police_deaths_total', 'cdc_police_deaths_unarmed', 'police_deaths',
       'avg_income', 'cs_ed_below_highschool', 'cs_ed_highschool',
       'cs_ed_some_college', 'cs_ed_above_college', 'poverty-rate',
       'bls_living_wage', 'bls_food_costs', 'bls_medical_costs',
       'bls_housing_costs', 'bls_tax_costs', 'health_poor_health_pct',
       'health_smokers_pct', 'health_obese_pct', 'health_phy_inactive_pct',
       'health_children_poverty_pct', 'health_80th_perc_income_pct',
       'health_20th_perc_income_pct', 'cs_age-to-25', 'cs_age-25-65',
       'cs_age-over-65', 'cs_white', 'cs_black', 'cs_asian', 'cs_hispanic',
       'election_dem_

In [13]:
df.to_csv("data/counties_election.csv", index=False)