In [1]:
import census_mapper
from zipfile import ZipFile
import io
import shapefile
from shapely.geometry import shape
import osr
import geopandas as gpd
import matplotlib.pyplot as plt
import csv
import numpy as np
import folium
import random
import pysal

In [2]:
blocks = ZipFile('tl_2015_13_bg.zip')
filenames = [y for y in sorted(blocks.namelist()) for ending in ['dbf', 'prj', 'shp', 'shx'] if y.endswith(ending)] 
dbf, prj, shp, shx = [io.BytesIO(blocks.read(filename)) for filename in filenames]

r = shapefile.Reader(shp=shp, shx=shx, dbf=dbf)

attributes, geometry = [], []
field_names = [field[0] for field in r.fields[1:]]  
for row in r.shapeRecords():  
    geometry.append(shape(row.shape.__geo_interface__))  
    attributes.append(dict(zip(field_names, row.record)))  

proj4_string = osr.SpatialReference(prj.read().decode('UTF-8')).ExportToProj4()
print(proj4_string)

blockgdf = gpd.GeoDataFrame(data = attributes, geometry = geometry, crs = proj4_string)
print(blockgdf.head())

+proj=longlat +ellps=GRS80 +no_defs 
     ALAND  AWATER BLKGRPCE COUNTYFP FUNCSTAT         GEOID     INTPTLAT  \
0  2213449   79182        2      089        S  130890212022  +33.9151116   
1   412353       0        1      089        S  130890212041  +33.8884823   
2  1124764       0        2      089        S  130890212082  +33.9104582   
3  1000826       0        3      021        S  130210131023  +32.7925035   
4   807552       0        4      021        S  130210131024  +32.7897447   

       INTPTLON  MTFCC       NAMELSAD STATEFP TRACTCE  \
0  -084.3275107  G5030  Block Group 2      13  021202   
1  -084.2911951  G5030  Block Group 1      13  021204   
2  -084.3053062  G5030  Block Group 2      13  021208   
3  -083.6973228  G5030  Block Group 3      13  013102   
4  -083.7020427  G5030  Block Group 4      13  013102   

                                            geometry  
0  POLYGON ((-84.338275 33.918159, -84.337988 33....  
1  POLYGON ((-84.294645 33.891111, -84.294631 33.... 

In [3]:
blocks = {}
with open("Income.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        blocks[row['Id2']] = [row['Median']]
print(len(blocks))
i = 0
with open("Race.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row['Id2'] in blocks:
            blocks[row['Id2']] += [row['Estimate'], row['White'], row['Black'], row['Other']]
ageids = []
for i in range(50):
    text = 'HD01_VD'
    if i < 10:
        text += '0'    
    text += str(i)
    ageids += [text]
#print(ageids)
with open("Age.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        #print(row.keys())
        if row['GEO.id2'] in blocks:
            #print("hey")
            male = int(row['HD01_VD02'])
            female = int(row['HD01_VD26'])
            #print(ageids[7:13]+ageids[31:37])
            young = 0
            mid = 0
            old = 0
            for ind in ageids[7:13]+ageids[31:37]:
                young += int(row[ind])
            for ind in ageids[13:20]+ageids[37:44]:
                mid += int(row[ind])
            for ind in ageids[20:26]+ageids[44:]:
                old += int(row[ind])
            blocks[row['GEO.id2']] += [male, female, young, mid, old]
            #print(blocks[row['GEO.id2']])
for block in blocks:
    for i, ent in enumerate(blocks[block]):
        #print(ent)
        blocks[block][i] = int(str(ent).replace("+","").replace(",","").replace("-","0"))
    #for i, ent in enumerate(blocks[block]):
    #    if i > 1 and blocks[block][1] > 0:
    #        blocks[block][i] = 100 * blocks[block][i] / blocks[block][1]
        
#print(blocks)
#for geoid in blockgdf['geoid']

909


In [19]:
arr = []

for geoid in blockgdf.GEOID:
    #print(geoid)
    if str(geoid) in blocks:
        arr.append(blocks[str(geoid)])
    else:
        arr.append([0]*10)
    
#print(len(blockgdf.GEOID))
#print(len(arr))

arr = np.asarray(arr)

feats = ['INCOME', 'POP', 'WHITE', 'BLACK', 'OTHER_RACE', 'MALE', 'FEMALE', 'YOUNG', 'MIDAGE', 'OLD']

for i, feat in enumerate(feats):
    blockgdf[feat] = arr[:, i]
    blockgdf[feat+"A"] = arr[:, i]/(blockgdf.ALAND+blockgdf.AWATER)
    if i > 1:
        q = [0]*len(arr[:, i])
        for ind, a in enumerate(q):
            if arr[ind, 1] == 0:
                q[ind] = 0
            else:
                q[ind] = arr[ind, i]/arr[ind,1]
        blockgdf[feat+"P"] = q

candidates = {}
with open("poll.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        candidates[row['Candidate']] = [row['Total'], row['Young'], row['Mid'], row['Old'], row['White'], row['Black'], row['Other']]
        for i, c in enumerate(candidates[row['Candidate']]):
            candidates[row['Candidate']][i] = float(candidates[row['Candidate']][i])
#print(candidates)



candidates['Weight'] = [c/candidates['Weight'][0] for c in candidates['Weight']]
print(candidates['Weight'])
for dude in candidates:
    if not dude == "Weight":
        cage = [0]*len(blockgdf)
        crace = [0]*len(blockgdf)
        c = [0]*len(blockgdf)
        print(dude + str(candidates[dude]))
        for i in range(len(blockgdf)):
            if blockgdf['POP'][i] == 0:
                c[i] = 0
            else:
                #print(candidates[dude])
                cage[i] = candidates[dude][1]*blockgdf['YOUNG'][i]
                cage[i] += candidates[dude][2]*blockgdf['MIDAGE'][i]
                cage[i] += candidates[dude][3]*blockgdf['OLD'][i]
                #cage[i] = cage[i] / candidates['Weight'][0]
                crace[i] = candidates[dude][4]*blockgdf['WHITE'][i]
                crace[i] += candidates[dude][5]*blockgdf['BLACK'][i]
                crace[i] += candidates[dude][6]*blockgdf['OTHER_RACE'][i]
                #crace[i] = crace[i] / candidates['Weight'][0]
                #print(cage[i])
                #print(crace[i])
                cage[i] = cage[i] / blockgdf['POP'][i]
                crace[i] = crace[i] / blockgdf['POP'][i]
                c[i] = (cage[i] + crace[i])/200
                #print(c[i])
            
        #print(len(blockgo))
        #print(len(c))
        blockgdf[dude] = c
        
blockgo = blockgdf[blockgdf.geometry.intersects(atlgdf.geometry[0])]
blockgo = blockgo[blockgo.POP > 0]
blockgo = blockgo[blockgo.GEOID != 131210103042]



[1.0, 0.19416666666666665, 0.5041666666666667, 0.3016666666666667, 0.46166666666666667, 0.4841666666666667, 0.05416666666666667]
Mary Norwood[28.6, 21.5, 30.2, 30.4, 38.5, 20.5, 16.9]
Vincent Fort[9.3, 13.9, 8.3, 8.0, 5.1, 13.3, 7.7]
Ceasar Mitchell[8.0, 7.8, 9.3, 6.4, 4.9, 10.3, 13.8]
John Eaves[3.8, 5.2, 3.1, 3.9, 3.6, 4.1, 3.1]
Kwanza Hall[5.8, 4.8, 7.8, 3.0, 6.3, 4.8, 9.2]
Cathy Woolard[6.1, 7.8, 6.1, 4.7, 10.8, 1.6, 6.2]
Keisha Lance-Bottoms[8.6, 7.8, 7.4, 11.0, 3.3, 14.3, 4.6]
Peter Aman[1.8, 0.0, 3.0, 1.1, 3.8, 0.0, 0.0]
Undecided[28.0, 31.2, 24.8, 31.5, 23.7, 31.1, 38.5]
Not Mary[71.4, 78.5, 69.8, 69.6, 61.5, 79.5, 83.1]
Not Mary of Decided[61.82336182, 68.75, 62.32686981, 56.52818991, 52.13793103, 70.2467344, 72.5203252]
Fort of Not Mary[13.02521008, 17.70700637, 11.89111748, 11.49425287, 8.292682927, 16.72955975, 9.265944645]
Black of Decided[49.30555556, 57.4127907, 47.7393617, 47.15328467, 30.40629096, 67.9245283, 62.43902439]
Fort of Black Decided[18.86197183, 24.21063291,

In [6]:
atl = ZipFile('Atlanta_City_Limits.zip')
filenames = [y for y in sorted(atl.namelist()) for ending in ['dbf', 'prj', 'shp', 'shx'] if y.endswith(ending)] 
dbf, prj, shp, shx = [io.BytesIO(atl.read(filename)) for filename in filenames]

r = shapefile.Reader(shp=shp, shx=shx, dbf=dbf)

attributes, geometry = [], []
field_names = [field[0] for field in r.fields[1:]]  
for row in r.shapeRecords():  
    geometry.append(shape(row.shape.__geo_interface__))  
    attributes.append(dict(zip(field_names, row.record)))  

proj4_string = osr.SpatialReference(prj.read().decode('UTF-8')).ExportToProj4()
print(proj4_string)

atlgdf = gpd.GeoDataFrame(data = attributes, geometry = geometry, crs = proj4_string)
print(atlgdf.head())

+proj=longlat +ellps=WGS84 +no_defs 
     ACRES                                GLOBALID  ID    LABEL  OBJECTID  \
0  86643.1  {6999F328-8F07-4EE0-BF8C-4DB531A3898C}   0  Atlanta         1   

   OBJECTID_1    SHAPE_Area     SHAPE_Leng  SQ_MILES  \
0           1  5.087264e+08  195314.168371    135.38   

                                            geometry  
0  (POLYGON ((-84.42135081494511 33.6588738469827...  


In [24]:
m = folium.Map(location=[33.5, -84.4], zoom_start=10, 
               tiles='Stamen Toner')

#threshold_scale = np.linspace(blockgdf['INCOME'].min(),
#                              blockgdf['INCOME'].max(), 6, dtype=int).tolist()
geo_str = (blockgo).to_json()

#linear = folium.colormap.LinearColormap(['green', 'yellow', 'red'])
m.choropleth(geo_str=geo_str,
             data=blockgo,columns=['GEOID','Undecided'],
             key_on='feature.properties.GEOID',
             fill_color="BuGn", 
             fill_opacity=0.8, line_opacity=0.8,
             legend_name='area')

#print(blockgo['Vincent Fort'])

atlgeo = atlgdf.to_json()
m.choropleth(geo_str=atlgeo, fill_color='None', line_color='black', line_weight=2)

m

In [None]:
plt.clf()

fig, ax = plt.subplots(figsize=(10,10))
blockgdf['rand'] = [random.randint(0, 10) for x in range(0, len(blockgdf))]
#print(blockgdf['rand'])

#blockgdf = blockgdf[blockgdf.GEOID != 131210103042]
#mask2 = 
#print(mask)

blockgo.plot(ax = ax, column='INCOME', edgecolor='grey', scheme='QUANTILES', legend=True)
#blockgdf[blockgdf.geometry.within(atlgdf.geometry[0])].plot(ax = ax)
atlgdf.plot(ax=ax, facecolor='none', alpha=0.4, ls='--')

plt.xlim((-84.58,-84.28))
plt.ylim((33.62,33.92))

plt.show()