In [1]:
import json
import numpy as np
import pandas as pd
import psycopg2

## Import and format the data

First, connect to the database.

In [2]:
with open("config.json") as f:
    conf = json.load(f)

In [3]:
conn_str = "host={} dbname={} user={} password={}".format(conf["host"], conf["database"], conf["user"], conf["password"])

In [4]:
conn = psycopg2.connect(conn_str)

Query the database to join the tables containing the areas and populations of census blocks.

In [19]:
pd.read_sql("""CREATE TABLE IF NOT EXISTS block_pops AS
SELECT population.state AS state,
       population.county AS county,
       population.name AS BLOCK,
       population.population,
       censusblocks.aland10 AS sq_meters,
       censusblocks.aland10 * 0.000000386102 AS sq_miles,
       (population.population / NULLIF((censusblocks.aland10 * 0.000000386102), 0)) AS pop_per_sq_mile,
       population.statea AS state_fips,
       population.countya AS county_fips,
       population.tracta AS tract_fips,
       population.blocka AS blocks_fips,
       censusblocks.geoid10 AS full_fips
FROM population
RIGHT JOIN censusblocks ON population.statea = censusblocks.statefp10
AND population.countya = censusblocks.countyfp10
AND population.tracta = censusblocks.tractce10
AND population.blocka = censusblocks.blockce10;


SELECT *
FROM block_pops
LIMIT 5;""", con=conn)

Unnamed: 0,state,county,block,population,sq_meters,sq_miles,pop_per_sq_mile,state_fips,county_fips,tract_fips,blocks_fips,full_fips
0,Alabama,Autauga County,Block 1000,61,482628.0,0.186344,327.352204,1,1,20100,1000,10010201001000
1,Alabama,Autauga County,Block 1001,0,482.0,0.000186,0.0,1,1,20100,1001,10010201001001
2,Alabama,Autauga County,Block 1002,0,3495.0,0.001349,0.0,1,1,20100,1002,10010201001002
3,Alabama,Autauga County,Block 1003,75,511820.0,0.197615,379.526373,1,1,20100,1003,10010201001003
4,Alabama,Autauga County,Block 1004,0,748.0,0.000289,0.0,1,1,20100,1004,10010201001004


Query the database to join the tables containing the areas and populations of census blocks with the 2012 broadband deployment data.

In [None]:
pd.read_sql("""CREATE TABLE IF NOT EXISTS deployment_pops_12 AS
SELECT deployment_2012.fullfipsid,
       block_pops.full_fips,
       block_pops.state,
       block_pops.county,
       block_pops.population,
       block_pops.sq_miles,
       block_pops.pop_per_sq_mile,
       count(DISTINCT frn) AS num_providers
FROM block_pops
FULL OUTER JOIN deployment_2012 ON block_pops.full_fips = deployment_2012.fullfipsid
WHERE deployment_2012.end_user_cat = '1'
  AND deployment_2012.provider_type = '1'
  AND (deployment_2012.maxaddown = '8'
       OR deployment_2012.maxaddown = '9'
       OR deployment_2012.maxaddown = '10'
       OR deployment_2012.maxaddown = '11')
  AND (deployment_2012.maxadup = '5'
       OR deployment_2012.maxadup = '6'
       OR deployment_2012.maxadup = '7'
       OR deployment_2012.maxadup = '8'
       OR deployment_2012.maxadup = '9'
       OR deployment_2012.maxadup = '10'
       OR deployment_2012.maxadup = '11')
  AND (deployment_2012.transtech = '10'
       OR deployment_2012.transtech = '11'
       OR deployment_2012.transtech = '20'
       OR deployment_2012.transtech = '30'
       OR deployment_2012.transtech = '40'
       OR deployment_2012.transtech = '41'
       OR deployment_2012.transtech = '50')
GROUP BY deployment_2012.fullfipsid,
         block_pops.full_fips,
         block_pops.state,
         block_pops.county,
         block_pops.population,
         block_pops.sq_miles,
         block_pops.pop_per_sq_mile;


SELECT *
FROM deployment_pops_12
LIMIT 5;""", con=conn)

Unnamed: 0,fullfipsid,full_fips,state,county,population,sq_miles,pop_per_sq_mile,num_providers
0,20200003001096,20200003001096,Alaska,Anchorage Municipality,0,0.33324,0.0,1
1,20200003001139,20200003001139,Alaska,Anchorage Municipality,0,0.058716,0.0,1
2,20200003001143,20200003001143,Alaska,Anchorage Municipality,0,0.050253,0.0,1
3,20200003001144,20200003001144,Alaska,Anchorage Municipality,1592,0.448081,3552.925798,1
4,20200003001145,20200003001145,Alaska,Anchorage Municipality,26,0.027289,952.754263,1


Query the database to join the tables containing the areas and populations of census blocks with the 2014 broadband deployment data.

In [None]:
pd.read_sql("""CREATE TABLE IF NOT EXISTS deployment_pops_14 AS
SELECT deployment_2014.blockcode,
       block_pops.full_fips,
       block_pops.state,
       block_pops.county,
       block_pops.population,
       block_pops.sq_miles,
       block_pops.pop_per_sq_mile,
       count(DISTINCT provider_id) AS num_providers
FROM block_pops
FULL OUTER JOIN deployment_2014 ON block_pops.full_fips = deployment_2014.blockcode
WHERE deployment_2014.consumer = '1'
  AND deployment_2014.maxaddown >= 25
  AND deployment_2014.maxadup >= 3
  AND (deployment_2014.techcode = '10'
       OR deployment_2014.techcode = '11'
       OR deployment_2014.techcode = '12'
       OR deployment_2014.techcode = '20'
       OR deployment_2014.techcode = '30'
       OR deployment_2014.techcode = '40'
       OR deployment_2014.techcode = '41'
       OR deployment_2014.techcode = '42'
       OR deployment_2014.techcode = '43'
       OR deployment_2014.techcode = '50')
GROUP BY deployment_2014.blockcode,
         block_pops.full_fips,
         block_pops.state,
         block_pops.county,
         block_pops.population,
         block_pops.sq_miles,
         block_pops.pop_per_sq_mile;


SELECT *
FROM deployment_pops_14
LIMIT 5;""", con=conn)

Query the database to join the tables containing the areas and populations of census blocks with the 2016 broadband deployment data.

In [None]:
pd.read_sql("""CREATE TABLE IF NOT EXISTS deployment_pops_16 AS
SELECT deployment_2016.blockcode,
       block_pops.full_fips,
       block_pops.state,
       block_pops.county,
       block_pops.population,
       block_pops.sq_miles,
       block_pops.pop_per_sq_mile,
       count(DISTINCT provider_id) AS num_providers
FROM block_pops
FULL OUTER JOIN deployment_2016 ON block_pops.full_fips = deployment_2016.blockcode
WHERE deployment_2016.consumer = '1'
  AND deployment_2016.maxaddown >= 25
  AND deployment_2016.maxadup >= 3
  AND (deployment_2016.techcode = '10'
       OR deployment_2016.techcode = '11'
       OR deployment_2016.techcode = '12'
       OR deployment_2016.techcode = '20'
       OR deployment_2016.techcode = '30'
       OR deployment_2016.techcode = '40'
       OR deployment_2016.techcode = '41'
       OR deployment_2016.techcode = '42'
       OR deployment_2016.techcode = '43'
       OR deployment_2016.techcode = '50')
GROUP BY deployment_2016.blockcode,
         block_pops.full_fips,
         block_pops.state,
         block_pops.county,
         block_pops.population,
         block_pops.sq_miles,
         block_pops.pop_per_sq_mile;


SELECT *
FROM deployment_pops_16
LIMIT 5;""", con=conn)