In [2]:
import numpy as np
import pandas as pd
import altair as alt
from vega_datasets import data

In [47]:
election_pd = pd.read_csv("data/president_election.csv")

In [34]:
state2id = {'AL':1, 'AK':2, 'AZ':4, 'AR':5, 'CA':6, 'CO':8, 'CT':9, 'DE':10, 'DC':11, 'FL':12, 'GA':13, 'HI':15, 
           'ID':16, 'IL':17, 'IN':18, 'IA':19, 'KS':20, 'KY':21, 'LA':22, 'ME':23, 'MD':24, 'MA':25, 'MI':26, 'MN':27,
           'MS':28, 'MO':29, 'MT':30, 'NE':31, 'NV':32, 'NH':33, 'NJ':34, 'NM':35, 'NY':36, 'NC':37, 'ND':38, 'OH':39,
           'OK':40, 'OR':41, 'PA':42, 'RI':44, 'SC':45, 'SD':46, 'TN':47, 'TX':48, 'UT':49, 'VT':50, 'VA':51, 'WA':53,
           'WV':54, 'WI':55, 'WY':56
}

In [48]:
election_pd['county_name']

0          Autauga County
1          Baldwin County
2          Barbour County
3             Bibb County
4           Blount County
              ...        
3108    Sweetwater County
3109         Teton County
3110         Uinta County
3111      Washakie County
3112        Weston County
Name: county_name, Length: 3113, dtype: object

In [4]:
election_pd['vote_diff_portion'] = (election_pd['votes_dem_2016'] - election_pd['votes_gop_2016']) / election_pd['total_votes_2016'] * 100

In [27]:
state_vote = election_pd.groupby(['state_abbr'], as_index=False) \
    .agg({'votes_dem_2016': 'sum', 'votes_gop_2016': 'sum', 'total_votes_2016': 'sum'})

In [28]:
state_vote['vote_diff_percent'] = (state_vote['votes_dem_2016'] - state_vote['votes_gop_2016']) / state_vote['total_votes_2016'] * 100

In [49]:
state_vote['id'] = state_vote.apply(lambda row: state2id[row['state_abbr']], axis=1)

In [41]:
states = alt.topo_feature(data.us_10m.url, 'states')

In [78]:
alt.Chart(states).mark_geoshape().encode(
    color=alt.Color('vote_diff_percent:Q', scale=alt.Scale(type='linear',domain=[-48, 48],scheme='redblue')),
    tooltip=['state_abbr:N', 'vote_diff_percent:Q'],
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(state_vote, 'id', ['state_abbr','vote_diff_percent']),
).properties(
    width=500,
    height=300
).project(
    type='albersUsa'
)

In [53]:
state_vote

Unnamed: 0,state_abbr,votes_dem_2016,votes_gop_2016,total_votes_2016,vote_diff_percent,id
0,AK,116454.0,163387.0,318608.0,-14.730641,2
1,AL,718084.0,1306925.0,2078165.0,-28.334661,1
2,AR,378729.0,677904.0,1108615.0,-26.986375,5
3,AZ,936250.0,1021154.0,2062810.0,-4.115939,4
4,CA,7230699.0,3841134.0,11733523.0,28.887871,6
5,CO,1212209.0,1137455.0,2564185.0,2.915312,8
6,CT,884432.0,668266.0,1623542.0,13.314469,9
7,DC,260223.0,11553.0,280272.0,88.724525,11
8,DE,235581.0,185103.0,441535.0,11.432389,10
9,FL,4485745.0,4605515.0,9386750.0,-1.275947,12


In [17]:
pop = data.population_engineers_hurricanes()
pop

Unnamed: 0,state,id,population,engineers,hurricanes
0,Alabama,1,4863300,0.003422,22
1,Alaska,2,741894,0.001591,0
2,Arizona,4,6931071,0.004774,0
3,Arkansas,5,2988248,0.00244,0
4,California,6,39250017,0.007126,0
5,Colorado,8,5540545,0.008088,0
6,Connecticut,9,3576452,0.005897,10
7,Delaware,10,952065,0.004989,2
8,District of Columbia,11,681170,0.011759,0
9,Florida,12,20612439,0.0033,110


In [3]:
census_df = pd.read_csv("data/acs2017_county_data.csv")

In [5]:
census_df.columns

Index(['CountyId', 'State', 'County', 'TotalPop', 'Men', 'Women', 'Hispanic',
       'White', 'Black', 'Native', 'Asian', 'Pacific', 'VotingAgeCitizen',
       'Income', 'IncomeErr', 'IncomePerCap', 'IncomePerCapErr', 'Poverty',
       'ChildPoverty', 'Professional', 'Service', 'Office', 'Construction',
       'Production', 'Drive', 'Carpool', 'Transit', 'Walk', 'OtherTransp',
       'WorkAtHome', 'MeanCommute', 'Employed', 'PrivateWork', 'PublicWork',
       'SelfEmployed', 'FamilyWork', 'Unemployment'],
      dtype='object')

In [7]:
counties = alt.topo_feature(data.us_10m.url, 'counties')

In [44]:
alt.Chart(counties).mark_geoshape().encode(
    color=alt.Color('TotalPop:Q', scale=alt.Scale(type='linear')),
    tooltip=['County:N', 'TotalPop:Q'],
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(census_df[census_df['State'] == 'New York'], 'CountyId', ['County','TotalPop']),
).properties(
    width=500,
    height=300
).project(
    type='albersUsa'
)

In [12]:
census_df = census_df[["CountyId", "State", "County", "TotalPop", "Hispanic", "White", \
                        "Black", "Native", "Asian", "Pacific", "Income", "Poverty"]]

In [20]:
census_df = census_df[census_df["State"] != "Puerto Rico"]
census_df["Other Race"] = census_df["Native"] + census_df["Asian"] + census_df["Pacific"]
census_df = census_df.drop(columns=["Native", "Asian", "Pacific"])

In [21]:
census_df

Unnamed: 0,CountyId,State,County,TotalPop,Hispanic,White,Black,Income,Poverty,Other Race
0,1001,Alabama,Autauga County,55036,2.7,75.4,18.9,55317,13.7,1.2
1,1003,Alabama,Baldwin County,203360,4.4,83.1,9.5,52562,11.8,1.5
2,1005,Alabama,Barbour County,26201,4.2,45.7,47.8,33368,27.2,0.8
3,1007,Alabama,Bibb County,22580,2.4,74.6,22.0,43404,15.2,0.4
4,1009,Alabama,Blount County,57667,9.0,87.4,1.5,47412,15.6,0.4
...,...,...,...,...,...,...,...,...,...,...
3137,56037,Wyoming,Sweetwater County,44527,16.0,79.6,0.8,71083,12.0,1.7
3138,56039,Wyoming,Teton County,22923,15.0,81.5,0.5,80049,6.8,2.5
3139,56041,Wyoming,Uinta County,20758,9.1,87.7,0.1,54672,14.9,1.0
3140,56043,Wyoming,Washakie County,8253,14.2,82.2,0.3,51362,12.8,0.5


In [22]:
census_df["Total Hispanic"] = census_df["TotalPop"] * census_df["Hispanic"]
census_df["Total White"] = census_df["TotalPop"] * census_df["White"]
census_df["Total Black"] = census_df["TotalPop"] * census_df["Black"]
census_df["Total Income"] = census_df["TotalPop"] * census_df["Income"]
census_df["Total Poverty"] = census_df["TotalPop"] * census_df["Poverty"]
census_df["Total Other Race"] = census_df["TotalPop"] * census_df["Other Race"]

In [28]:
state_census = census_df.groupby("State", as_index=False).agg({"TotalPop":"sum", "Total White":"sum", "Total Hispanic": "sum",\
                    "Total Black":"sum", "Total Income":"sum", "Total Poverty":"sum", "Total Other Race":"sum"}) 

In [29]:
state_census["Hispanic"] = state_census["Total Hispanic"] / state_census["TotalPop"]
state_census["White"] = state_census["Total White"] / state_census["TotalPop"]
state_census["Black"] = state_census["Total Black"] / state_census["TotalPop"]
state_census["Income"] = state_census["Total Income"] / state_census["TotalPop"]
state_census["Poverty"] = state_census["Total Poverty"] / state_census["TotalPop"]
state_census["Other Race"] = state_census["Total Other Race"] / state_census["TotalPop"]
state_census = state_census.drop(columns=["Total Hispanic", "Total White", "Total Black", \
                                          "Total Income", "Total Poverty", "Total Other Race"])

In [36]:
state2abbr = {"Alabama": "AL", "Alaska": "AK", "Arizona": "AZ", "Arkansas": "AR", "California": "CA",
    "Colorado": "CO", "Connecticut": "CT", "Delaware": "DE", "Florida": "FL", "Georgia": "GA", "Hawaii": "HI",
    "Idaho": "ID", "Illinois": "IL", "Indiana": "IN", "Iowa": "IA", "Kansas": "KS", "Kentucky": "KY",
    "Louisiana": "LA", "Maine": "ME", "Maryland": "MD", "Massachusetts": "MA", "Michigan": "MI", "Minnesota": "MN",
    "Mississippi": "MS", "Missouri": "MO", "Montana": "MT", "Nebraska": "NE", "Nevada": "NV", "New Hampshire": "NH",
    "New Jersey": "NJ", "New Mexico": "NM", "New York": "NY", "North Carolina": "NC", "North Dakota": "ND", "Ohio": "OH",
    "Oklahoma": "OK", "Oregon": "OR", "Pennsylvania": "PA", "Rhode Island": "RI", "South Carolina": "SC", "South Dakota": "SD",
    "Tennessee": "TN", "Texas": "TX", "Utah": "UT", "Vermont": "VT", "Virginia": "VA", "Washington": "WA", "West Virginia": "WV",
    "Wisconsin": "WI", "Wyoming": "WY", "District of Columbia": "DC",
}
    
# invert the dictionary
abbr2state = dict(map(reversed, state_to_abbr.items()))

In [37]:
state_census['id'] = state_census.apply(lambda row: state2id[state2abbr[row['State']]], axis=1)

In [43]:
alt.Chart(states).mark_geoshape().encode(
    color=alt.Color('TotalPop:Q', scale=alt.Scale(type='linear')),
    tooltip=['State:N', 'TotalPop:Q'],
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(state_census, 'id', ['State','TotalPop']),
).properties(
    width=500,
    height=300
).project(
    type='albersUsa'
)

In [46]:
state_census['TotalPop'].abs().max()

38982847