In [1]:
import json
import numpy as np
import pandas as pd
import psycopg2

# Connect to the PostgreSQL database

First, connect to the database.

In [2]:
with open("config.json") as f:
    conf = json.load(f)

In [3]:
conn_str = "host={} dbname={} user={} password={}".format(conf["host"], conf["database"], conf["user"], conf["password"])

In [4]:
conn = psycopg2.connect(conn_str)
conn.autocommit = True # Allow the notebook to commit transactions (like creating a table) to the connected database.

# Join the tables

Query the database to join the tables containing the areas and populations of census blocks. We'll also convert the land area field from square meters to square miles and then calculate the population per square mile for each block.

In [5]:
pd.read_sql("""CREATE TABLE IF NOT EXISTS block_pops AS
SELECT population.state AS state,
       population.county AS county,
       population.name AS block,
       population.population,
       censusblocks.aland10 AS sq_meters,
       censusblocks.aland10 * 0.000000386102 AS sq_miles,
       (population.population / NULLIF((censusblocks.aland10 * 0.000000386102), 0)) AS pop_per_sq_mile,
       population.statea AS state_fips,
       population.countya AS county_fips,
       population.tracta AS tract_fips,
       population.blocka AS block_fips,
       censusblocks.geoid10 AS full_fips
FROM population
INNER JOIN censusblocks ON population.statea = censusblocks.statefp10
AND population.countya = censusblocks.countyfp10
AND population.tracta = censusblocks.tractce10
AND population.blocka = censusblocks.blockce10;


SELECT *
FROM block_pops
LIMIT 5;""", con=conn)

Unnamed: 0,state,county,block,population,sq_meters,sq_miles,pop_per_sq_mile,state_fips,county_fips,tract_fips,block_fips,full_fips
0,Alabama,Baldwin County,Block 0001,0,0.0,0.0,,1,3,990000,1,10039900000001
1,Alabama,Mobile County,Block 0001,0,0.0,0.0,,1,97,990000,1,10979900000001
2,California,Alameda County,Block 0001,0,0.0,0.0,,6,1,990000,1,60019900000001
3,California,Contra Costa County,Block 0001,0,0.0,0.0,,6,13,990000,1,60139900000001
4,California,Del Norte County,Block 0001,0,0.0,0.0,,6,15,990000,1,60159900000001


Query the database to join the tables containing the areas and populations of census blocks with the 2012 broadband deployment data.

In [6]:
pd.read_sql("""CREATE TABLE IF NOT EXISTS deployment_pops_12 AS
SELECT deployment_2012.fullfipsid,
       block_pops.full_fips,
       block_pops.state,
       block_pops.county,
       block_pops.population,
       block_pops.sq_miles,
       block_pops.pop_per_sq_mile,
       count(DISTINCT frn) AS num_providers
FROM block_pops
INNER JOIN deployment_2012 ON block_pops.full_fips = deployment_2012.fullfipsid
WHERE deployment_2012.end_user_cat = '1'
  AND deployment_2012.provider_type = '1'
  AND (deployment_2012.maxaddown = '8'
       OR deployment_2012.maxaddown = '9'
       OR deployment_2012.maxaddown = '10'
       OR deployment_2012.maxaddown = '11')
  AND (deployment_2012.maxadup = '5'
       OR deployment_2012.maxadup = '6'
       OR deployment_2012.maxadup = '7'
       OR deployment_2012.maxadup = '8'
       OR deployment_2012.maxadup = '9'
       OR deployment_2012.maxadup = '10'
       OR deployment_2012.maxadup = '11')
  AND (deployment_2012.transtech = '10'
       OR deployment_2012.transtech = '11'
       OR deployment_2012.transtech = '20'
       OR deployment_2012.transtech = '30'
       OR deployment_2012.transtech = '40'
       OR deployment_2012.transtech = '41'
       OR deployment_2012.transtech = '50')
GROUP BY deployment_2012.fullfipsid,
         block_pops.full_fips,
         block_pops.state,
         block_pops.county,
         block_pops.population,
         block_pops.sq_miles,
         block_pops.pop_per_sq_mile;


SELECT *
FROM deployment_pops_12
LIMIT 5;""", con=conn)

Unnamed: 0,fullfipsid,full_fips,state,county,population,sq_miles,pop_per_sq_mile,num_providers
0,20200003001096,20200003001096,Alaska,Anchorage Municipality,0,0.33324,0.0,1
1,20200003001139,20200003001139,Alaska,Anchorage Municipality,0,0.058716,0.0,1
2,20200003001143,20200003001143,Alaska,Anchorage Municipality,0,0.050253,0.0,1
3,20200003001144,20200003001144,Alaska,Anchorage Municipality,1592,0.448081,3552.925798,1
4,20200003001145,20200003001145,Alaska,Anchorage Municipality,26,0.027289,952.754263,1


Query the database to join the tables containing the areas and populations of census blocks with the 2014 broadband deployment data.

In [7]:
pd.read_sql("""CREATE TABLE IF NOT EXISTS deployment_pops_14 AS
SELECT deployment_2014.blockcode,
       block_pops.full_fips,
       block_pops.state,
       block_pops.county,
       block_pops.population,
       block_pops.sq_miles,
       block_pops.pop_per_sq_mile,
       count(DISTINCT provider_id) AS num_providers
FROM block_pops
INNER JOIN deployment_2014 ON block_pops.full_fips = deployment_2014.blockcode
WHERE deployment_2014.consumer = '1'
  AND deployment_2014.maxaddown >= 25
  AND deployment_2014.maxadup >= 3
  AND (deployment_2014.techcode = '10'
       OR deployment_2014.techcode = '11'
       OR deployment_2014.techcode = '12'
       OR deployment_2014.techcode = '20'
       OR deployment_2014.techcode = '30'
       OR deployment_2014.techcode = '40'
       OR deployment_2014.techcode = '41'
       OR deployment_2014.techcode = '42'
       OR deployment_2014.techcode = '43'
       OR deployment_2014.techcode = '50')
GROUP BY deployment_2014.blockcode,
         block_pops.full_fips,
         block_pops.state,
         block_pops.county,
         block_pops.population,
         block_pops.sq_miles,
         block_pops.pop_per_sq_mile;


SELECT *
FROM deployment_pops_14
LIMIT 5;""", con=conn)

Unnamed: 0,blockcode,full_fips,state,county,population,sq_miles,pop_per_sq_mile,num_providers
0,10010201001000,10010201001000,Alabama,Autauga County,61,0.186344,327.352204,2
1,10010201001001,10010201001001,Alabama,Autauga County,0,0.000186,0.0,1
2,10010201001002,10010201001002,Alabama,Autauga County,0,0.001349,0.0,1
3,10010201001003,10010201001003,Alabama,Autauga County,75,0.197615,379.526373,1
4,10010201001004,10010201001004,Alabama,Autauga County,0,0.000289,0.0,1


Query the database to join the tables containing the areas and populations of census blocks with the 2016 broadband deployment data.

In [8]:
pd.read_sql("""CREATE TABLE IF NOT EXISTS deployment_pops_16 AS
SELECT deployment_2016.blockcode,
       block_pops.full_fips,
       block_pops.state,
       block_pops.county,
       block_pops.population,
       block_pops.sq_miles,
       block_pops.pop_per_sq_mile,
       count(DISTINCT provider_id) AS num_providers
FROM block_pops
INNER JOIN deployment_2016 ON block_pops.full_fips = deployment_2016.blockcode
WHERE deployment_2016.consumer = '1'
  AND deployment_2016.maxaddown >= 25
  AND deployment_2016.maxadup >= 3
  AND (deployment_2016.techcode = '10'
       OR deployment_2016.techcode = '11'
       OR deployment_2016.techcode = '12'
       OR deployment_2016.techcode = '20'
       OR deployment_2016.techcode = '30'
       OR deployment_2016.techcode = '40'
       OR deployment_2016.techcode = '41'
       OR deployment_2016.techcode = '42'
       OR deployment_2016.techcode = '43'
       OR deployment_2016.techcode = '50')
GROUP BY deployment_2016.blockcode,
         block_pops.full_fips,
         block_pops.state,
         block_pops.county,
         block_pops.population,
         block_pops.sq_miles,
         block_pops.pop_per_sq_mile;


SELECT *
FROM deployment_pops_16
LIMIT 5;""", con=conn)

Unnamed: 0,blockcode,full_fips,state,county,population,sq_miles,pop_per_sq_mile,num_providers
0,10010201001000,10010201001000,Alabama,Autauga County,61,0.186344,327.352204,2
1,10010201001001,10010201001001,Alabama,Autauga County,0,0.000186,0.0,1
2,10010201001002,10010201001002,Alabama,Autauga County,0,0.001349,0.0,1
3,10010201001003,10010201001003,Alabama,Autauga County,75,0.197615,379.526373,1
4,10010201001004,10010201001004,Alabama,Autauga County,0,0.000289,0.0,1


Query the database to join the 2012 and 2014 deployment and population tables.

In [9]:
pd.read_sql("""CREATE TABLE IF NOT EXISTS deployment_pops_12_14 as
SELECT deployment_pops_14.blockcode,
       deployment_pops_14.full_fips,
       deployment_pops_14.state,
       deployment_pops_14.county,
       deployment_pops_14.population,
       deployment_pops_14.sq_miles,
       deployment_pops_14.pop_per_sq_mile,
       deployment_pops_14.num_providers AS num_providers_2014,
       deployment_pops_12.num_providers AS num_providers_2012
FROM deployment_pops_14
LEFT JOIN deployment_pops_12 ON deployment_pops_14.blockcode = deployment_pops_12.full_fips
LIMIT 5;


SELECT *
FROM deployment_pops_12_14
LIMIT 5;""", con=conn)

Unnamed: 0,blockcode,full_fips,state,county,population,sq_miles,pop_per_sq_mile,num_providers_2014,num_providers_2012
0,10010201001002,10010201001002,Alabama,Autauga County,0,0.001349,0.0,1,
1,10010201002003,10010201002003,Alabama,Autauga County,0,0.045992,0.0,1,
2,10010201002030,10010201002030,Alabama,Autauga County,100,0.016882,5923.630981,2,
3,10010202001012,10010202001012,Alabama,Autauga County,248,0.109535,2264.112216,2,
4,10010202002010,10010202002010,Alabama,Autauga County,9,0.003643,2470.316084,2,


Query the database to join the 2014 and 2016 deployment and population tables.

In [10]:
pd.read_sql("""CREATE TABLE IF NOT EXISTS deployment_pops_14_16 as
SELECT deployment_pops_16.blockcode,
       deployment_pops_16.full_fips,
       deployment_pops_16.state,
       deployment_pops_16.county,
       deployment_pops_16.population,
       deployment_pops_16.sq_miles,
       deployment_pops_16.pop_per_sq_mile,
       deployment_pops_16.num_providers AS num_providers_2014,
       deployment_pops_14.num_providers AS num_providers_2012
FROM deployment_pops_16
LEFT JOIN deployment_pops_14 ON deployment_pops_16.blockcode = deployment_pops_14.full_fips
LIMIT 5;


SELECT *
FROM deployment_pops_14_16
LIMIT 5;""", con=conn)

Unnamed: 0,blockcode,full_fips,state,county,population,sq_miles,pop_per_sq_mile,num_providers_2014,num_providers_2012
0,10010201002003,10010201002003,Alabama,Autauga County,0,0.045992,0.0,1,1
1,10010203001006,10010203001006,Alabama,Autauga County,69,0.013661,5050.994969,2,2
2,10010204002022,10010204002022,Alabama,Autauga County,46,0.013075,3518.071814,2,2
3,10010208011056,10010208011056,Alabama,Autauga County,4,0.072149,55.440565,2,2
4,10010209004010,10010209004010,Alabama,Autauga County,0,0.032962,0.0,1,1
