In [7]:
from __future__ import division
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import matplotlib
# import seaborn as sbn
# import statsmodels.stats.proportion as sts
# import scipy
import sys

# Settings

%matplotlib inline
matplotlib.style.use('ggplot')
pd.set_option('display.max_columns', None)

sys.path.extend([r'C:\Users\michael\Documents\_python\tools_working'])

%config InlineBackend.figure_format = 'retina'

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import warnings
warnings.filterwarnings('ignore')

Get the county data

In [8]:
county_data = r'C:\Users\michael\Documents\_datasets\2016-us-election'\
              r'\CountyCharacteristics.csv'
    
county_df = pd.read_csv(county_data)

Get the voting data

In [9]:
voting_data = r'C:\Users\michael\Documents\_datasets\2016-us-election'\
              r'\PresidentialElectionResults2016.csv'
    
voting_df = pd.read_csv(voting_data)
voting_df.drop('State', axis=1, inplace=True)

Join the two dataframes

In [10]:
election_data = voting_df.join(county_df, lsuffix='_vte', rsuffix='_cty')
election_data.rename(columns={'County_vte': 'County'}, inplace=True)
election_data.index = election_data.County

Aggregate and create some features

In [11]:
election_data['vote_president'] = np.where(
    election_data.clinton > election_data.trump, 
    'clinton', 'trump')

election_data['age_00_20'] = \
    election_data.Age0_4 + election_data.Age5_9 + \
    election_data.Age10_14 + election_data.Age15_19
    
election_data['age_20_34'] = \
    election_data.Age20_24 + election_data.Age25_34

election_data['age_35_64'] = \
    election_data.Age35_44 + election_data.Age45_54 + \
    election_data.Age55_59 + election_data.Age60_64

election_data['age_65_99'] = \
    election_data.Age65_74 + election_data.Age75_84 + \
    election_data.Age85    

election_data['edu_no_hs'] = \
    election_data.EdK8 + election_data.Ed9_12
    
election_data['edu_hs'] = \
    election_data.EdHS + election_data.EdCollNoDegree
    
election_data['edu_uni_2_4'] = \
    election_data.EdAssocDegree + election_data.EdBachelorDegree

election_data['edu_uni_4+'] = election_data.EdGraduateDegree

election_data['ethn_other'] = election_data.OtherRace + \
    election_data.AmericanIndianAlaskaNative + \
    election_data.Asian + election_data.NativeHawaiianPacificIslander
    
election_data['pop_density_mi^2'] = \
    election_data.TotalPopulation / election_data.LandAreaSqMiles
    
election_data['income_median_disposable'] = \
    election_data.MedianHouseholdIncome - \
    election_data.MedianHousingCosts * 12

Rename some columns

In [12]:
rename_col_dict = {
    'MedianHouseholdIncome' : 'income_median_household',
    'TotalPopulation'       : 'pop_total',
    'Male'                  : 'sex_male',
    'Female'                : 'sex_female', 
    'MedianAge'             : 'age_median',
    'White'                 : 'ethn_white',
    'Black'                 : 'ethn_black',
    'Hispanic'              : 'ethn_hispanic',
    'MedianHousingCosts'    : 'housing_median_costs',
    'Married'               : 'mrg_married',
    'Widowed'               : 'mrg_widowed',
    'Divorced'              : 'mrg_divorced',
    'Separated'             : 'mrg_separated',
    'NeverMarried'          : 'mrg_never_married',
    'Uninsured'             : 'health_uninsured',
    'ForeignBorn'           : 'ntn_foreign_born',
    'NonCitizen'            : 'ntn_non_citizen',
    'Disability'            : 'emp_disabled',
    'Employment'            : 'emp_employed',
    'LaborForce'            : 'emp_labor_force',
    'Unemployment'          : 'emp_unemployed',
    'TotalSSI'              : 'income_ssi',
    'SSIPayments'           : 'income_ssi_payments',
    'NCHS_UrbanRural2013'   : 'pop_type',
    'clinton'               : 'vote_clinton',
    'trump'                 : 'vote_trump',
    'totalvotes'            : 'vote_total',
    'CountyName'            : '_county_name',
    'StateName'             : '_state_name'
}
election_data.rename(columns=rename_col_dict, inplace=True)

Drop some columns

In [13]:
drop_cols = ['State', 
             'SimpsonDiversityIndex', 'Population25Plus',
             'MfgEmp1970', 'MfgEmp1980', 'MfgEmp1990', 'MfgEmp2001',
             'TotalEmp1970', 'TotalEmp1980', 
             'TotalEmp1990', 'TotalEmp2001',
             'NCHS_UrbanRural2006', 'NCHS_UrbanRural1990',
             'dPct', 'rPct', 'leanD', 'leanR', 
             'otherPct', 'dDRPct', 'rDRPct',
             'TotalEmp1990', 'TotalEmp2001',
             'johnson', 'stein', 'other',
             'AgedSSI', 'BlindDisabledSSI', 'OASDI',
             'EdK8', 'Ed9_12', 'EdHS', 'EdCollNoDegree', 
             'EdAssocDegree', 'EdBachelorDegree', 'EdGraduateDegree',
             'Age0_4', 'Age5_9', 'Age10_14', 'Age15_19',
             'Age20_24', 'Age25_34', 'Age35_44', 'Age45_54', 
             'Age55_59', 'Age60_64', 'Age65_74', 'Age75_84', 'Age85',
             'AmericanIndianAlaskaNative', 'Asian', 
             'NativeHawaiianPacificIslander', 'OtherRace',
             'StateAbbr', 'LandAreaSqMiles',
             'MfgEmp2015', 'TotalEmp2015']

election_data.drop(drop_cols, axis=1, inplace=True)

In [14]:
election_data.sort_index(axis=1, inplace=True)
election_data[election_data.index == 8101]

Unnamed: 0_level_0,County,County_cty,_county_name,_state_name,age_00_20,age_20_34,age_35_64,age_65_99,age_median,edu_hs,edu_no_hs,edu_uni_2_4,edu_uni_4+,emp_disabled,emp_employed,emp_labor_force,emp_unemployed,ethn_black,ethn_hispanic,ethn_other,ethn_white,health_uninsured,housing_median_costs,income_median_disposable,income_median_household,income_ssi,income_ssi_payments,mrg_divorced,mrg_married,mrg_never_married,mrg_separated,mrg_widowed,ntn_foreign_born,ntn_non_citizen,pop_density_mi^2,pop_total,pop_type,sex_female,sex_male,vote_clinton,vote_president,vote_total,vote_trump
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1
8101,8101,8101,Pueblo,Colorado,42346,30958,61521,26694,38.8,59416,13035,27256,8540,28817,70742.0,73990.0,3248.0,5280,68312,30643,133366,17061,830,31326.0,41286.0,6254.0,3434000.0,18464,60724,39789,2341,8582,6119,3806,67.692403,161519,Small metro,81984,79535,35875,trump,78646,36265


Convert column counts to proportions

In [15]:
population_proportions = [
    'age_00_20', 'age_20_34', 'age_35_64', 'age_65_99',
    'edu_hs', 'edu_no_hs', 'edu_uni_2_4', 'edu_uni_4+',
    'ethn_black', 'ethn_hispanic', 'ethn_other', 'ethn_white',
    'health_uninsured', 'mrg_divorced', 'mrg_married', 
    'mrg_never_married', 'mrg_separated', 'mrg_widowed',
    'ntn_foreign_born', 'ntn_non_citizen', 'sex_female', 'sex_male',
    'emp_disabled']

for col in population_proportions:
    election_data[col] = \
        election_data[col] / election_data.pop_total
        
labor_force_proportions = \
    ['emp_employed', 'emp_unemployed']
    
for col in labor_force_proportions:
    election_data[col] = \
        election_data[col] / election_data.emp_labor_force
        
vote_proportions = ['vote_clinton', 'vote_trump']
for col in vote_proportions:
    election_data[col] = \
        election_data[col] / election_data.vote_total

In [16]:
election_data.sort_index(axis=1, inplace=True)
election_data[election_data.index == 8101]

Unnamed: 0_level_0,County,County_cty,_county_name,_state_name,age_00_20,age_20_34,age_35_64,age_65_99,age_median,edu_hs,edu_no_hs,edu_uni_2_4,edu_uni_4+,emp_disabled,emp_employed,emp_labor_force,emp_unemployed,ethn_black,ethn_hispanic,ethn_other,ethn_white,health_uninsured,housing_median_costs,income_median_disposable,income_median_household,income_ssi,income_ssi_payments,mrg_divorced,mrg_married,mrg_never_married,mrg_separated,mrg_widowed,ntn_foreign_born,ntn_non_citizen,pop_density_mi^2,pop_total,pop_type,sex_female,sex_male,vote_clinton,vote_president,vote_total,vote_trump
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1
8101,8101,8101,Pueblo,Colorado,0.262173,0.191668,0.38089,0.165268,38.8,0.367858,0.080703,0.168748,0.052873,0.178412,0.956102,73990.0,0.043898,0.03269,0.422935,0.189718,0.825699,0.105628,830,31326.0,41286.0,6254.0,3434000.0,0.114315,0.375956,0.246343,0.014494,0.053133,0.037884,0.023564,67.692403,161519,Small metro,0.507581,0.492419,0.456158,trump,78646,0.461117


TODO: add primary data, scale dollar cols