In [28]:
import pandas as pd
import geopandas as gpd
from IPython.display import display

import psycopg2
from netrc import netrc
from fiona.crs import from_epsg

user, acct, passwd = netrc().authenticators("harris")

seats, fips, epsg = 8, 27, 3594

%matplotlib inline
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 10000

In [42]:
mnf = "/media/jsaxon/brobdingnag/data/el_dv/mn_site/"
map_08 = gpd.read_file(mnf + "elec2008.shp")
map_08.rename(columns = {"USPRESDFL" : "D08", "USPRESR" : "R08"}, inplace = True)
map_08 = map_08[["D08", "R08", "geometry"]]
map_08 = map_08.to_crs(epsg = epsg)

map_12 = gpd.read_file(mnf + "elec2012.shp")
map_12.rename(columns = {"USPRSDFL" : "D12", "USPRSR" : "R12"}, inplace = True)
map_12 = map_12[["D12", "R12", "geometry"]]
map_12 = map_12.to_crs(epsg = epsg)

map_16 = gpd.read_file(mnf + "elec2016.shp")
map_16.rename(columns = {"USPRSDFL" : "D16", "USPRSR" : "R16"}, inplace = True)
map_16 = map_16[["D16", "R16", "geometry"]]
map_16 = map_16.to_crs(epsg = epsg)

In [49]:
query = """SELECT 
              rn, ST_Transform(tr.geom, epsg) geometry
           FROM census_tracts_2015 AS tr
           JOIN (SELECT state, county, tract,
                        row_number() over 
                          (PARTITION BY state ORDER BY county, tract NULLS LAST) - 1 as rn
                 FROM census_tracts_2015) rn ON
                   tr.state  = rn.state  AND
                   tr.county = rn.county AND
                   tr.tract  = rn.tract
           JOIN states AS st ON st.fips = tr.state
           WHERE tr.state = {} ORDER BY rn;
           """

con = psycopg2.connect(database = "census", user = user, password = passwd,
                       host = "saxon.harris.uchicago.edu", port = 5432)

tr_rn = gpd.GeoDataFrame.from_postgis(query.format(fips), con,
                                      geom_col = "geometry", crs = from_epsg(epsg))

In [97]:
votes_08 = gpd.sjoin(map_08.set_geometry(map_08.centroid), tr_rn, op = "within", how = "left")
votes_12 = gpd.sjoin(map_12.set_geometry(map_12.centroid), tr_rn, op = "within", how = "left")
votes_16 = gpd.sjoin(map_16.set_geometry(map_16.centroid), tr_rn, op = "within", how = "left")

In [102]:
def merge_tract_number(gdf, trdf):

    for pri, row in gdf[gdf.rn.isnull()].iterrows():
        ctr = row.geometry.centroid
        distances = [(xi, pt.distance(ctr)) for xi, pt in enumerate(tr_rn.geometry)]
        match = min(distances, key=lambda item:item[1])
        gdf.loc[pri, "rn"] = match[0]
        if (pri % 100) == 0: print(pri, end = " ")

merge_tract_number(votes_08, tr_rn)
merge_tract_number(votes_12, tr_rn)
merge_tract_number(votes_16, tr_rn)

votes_08.rn = votes_08.rn.astype(int)
votes_12.rn = votes_12.rn.astype(int)
votes_16.rn = votes_16.rn.astype(int)

votes_08 = votes_08.groupby("rn").sum().filter(regex = '[RD]08')
votes_12 = votes_12.groupby("rn").sum().filter(regex = '[RD]12')
votes_16 = votes_16.groupby("rn").sum().filter(regex = '[RD]16')

In [104]:
mn_votes = pd.concat([votes_08, votes_12, votes_16], axis = 1).fillna(0)
mn_votes.to_csv("mn_votes.csv")
mn_votes.head()

Unnamed: 0_level_0,D08,R08,D12,R12,D16,R16
rn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,692.0,769.0,651.0,789.0,474.0,913.0
1,624.0,539.0,614.0,515.0,402.0,746.0
2,706.0,793.0,668.0,777.0,512.0,908.0
3,937.0,839.0,924.0,796.0,644.0,975.0
4,625.0,592.0,534.0,580.0,334.0,686.0
