In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geoplot as gplt
import shapefile
import osr
import dbf
import requests
import io

from urllib.request import urlopen
from zipfile import ZipFile
from shapely.geometry import shape, Point, Polygon


%matplotlib inline

### Weighted Centroids of several States

In [4]:
## Function definition: Read Blocks Shapefile within a State
def Blocks_Shapefile(doc_path):
    
    state_blocks = ZipFile(doc_path, 'r') 

    filenames = [y for y in sorted(state_blocks.namelist())
                 for ending in ['dbf', 'prj', 'shp', 'shx'] if y.endswith(ending)]
    dbf, prj, shp, shx = [io.BytesIO(state_blocks.read(filename)) for filename in filenames]
    r = shapefile.Reader(shp=shp, shx=shx, dbf=dbf)

    attributes, geometry = [], []
    field_names = [field[0] for field in r.fields[1:]]
    for row in r.shapeRecords():
        geometry.append(shape(row.shape.__geo_interface__))
        attributes.append(dict(zip(field_names,row.record)))

    prj = io.TextIOWrapper(prj, encoding='utf-8')
    proj4 = osr.SpatialReference(prj.read()).ExportToProj4()

    gdf = gpd.GeoDataFrame(data=attributes, geometry=geometry, crs=proj4)
    gdf[['INTPTLON10', 'INTPTLAT10']] = gdf[['INTPTLON10', 'INTPTLAT10']].apply(pd.to_numeric)
    gdf.sort_values(['COUNTYFP10', 'BLOCKCE10', 'TRACTCE10'], ascending=[True, True, True], inplace=True)
    gdf.reset_index(drop=True, inplace=True)
    
    return gdf;

## gdf01 = Blocks_Shapefile('/home/jinli/PycharmProjects/tl_2010_01_tabblock10.zip')

In [7]:
## Function definition: Read Population by Blocks within a State
def Blocks_Population(doc_path):
    
    pop = pd.read_csv(doc_path, header=1)
    pop['GEOID10'] = pop['id'].map(lambda x: x[9:])
    ### cols = pop.columns.tolist()   ## ['id', 'Geographic Area Name', 'Total', 'GEOID10']
    pop = pop[['id', 'Geographic Area Name', 'GEOID10', 'Total']]
    
    return pop;

## pop01 = Blocks_Population('/home/jinli/PycharmProjects/DECENNIALSF12010.P1_data_State_01.csv')

In [14]:
## Function definition: Read County Shapefile of USA
def USA_County_Shapefile(doc_path):
    
    allcounties = ZipFile(doc_path, 'r')

    filenames = [y for y in sorted(allcounties.namelist())
                     for ending in ['dbf', 'prj', 'shp', 'shx'] if y.endswith(ending)]
    dbf, prj, shp, shx = [io.BytesIO(allcounties.read(filename)) for filename in filenames]
    r = shapefile.Reader(shp=shp, shx=shx, dbf=dbf)

    attributes, geometry = [], []
    field_names = [field[0] for field in r.fields[1:]]
    for row in r.shapeRecords():
        geometry.append(shape(row.shape.__geo_interface__))
        attributes.append(dict(zip(field_names,row.record)))

    prj = io.TextIOWrapper(prj, encoding='utf-8')
    proj4 = osr.SpatialReference(prj.read()).ExportToProj4()

    gdf = gpd.GeoDataFrame(data=attributes, geometry=geometry, crs=proj4)
    gdf.sort_values(by =['STATEFP10', 'COUNTYFP10'], inplace=True)
    gdf.reset_index(drop=True, inplace=True)
    gdf[['INTPTLON10', 'INTPTLAT10']] = gdf[['INTPTLON10', 'INTPTLAT10']].apply(pd.to_numeric)

    gdf = gdf[(gdf.STATEFP10 != '02') & (gdf.STATEFP10 != '72') & (gdf.STATEFP10 != '15')]
    
    return gdf;

## allcounties = USA_County_Shapefile('/home/jinli/PycharmProjects/tl_2010_us_county10(NEW).zip')