To generate the csv files for the decennial datasets needed for this notebook, run this sql query against the 2020 and 2010 tables

```
select d.variable, d.value, d.geoid, g.geotype from decennial."2020" d
join support_geoids g
on d.geoid = g.geoid
where d.variable in ('popu18_1','popu18_1p','pop1','popperacre','wnh','bnh','anh','hsp1','wnhp','bnhp','anhp','hsp1p')
and g.geotype like '%NTA%'
```

In [39]:
import pandas as pd
import geopandas as gp

In [40]:
# Import decennial datasets
twenty = pd.read_csv('decennial_2020.csv')
ten = pd.read_csv('decennial_2010.csv')


In [41]:
# Pivot variables, indexing on geoid
twenty_pivot = pd.pivot_table(twenty, values='value', columns='variable', index=['geoid']).reset_index()
ten_pivot = pd.pivot_table(ten, values='value', columns='variable', index=['geoid']).reset_index()


In [42]:
base_variables = ['popu18_1','popu18_1p','pop1','popperacre','wnh','bnh','anh','hsp1','wnhp','bnhp','anhp','hsp1p']


In [43]:
twenty_column_name_map = {}
ten_column_name_map = {}
for variable in base_variables:
  twenty_column_name_map[variable] = variable+'_2020'
  ten_column_name_map[variable] = variable+'_2010'

twenty_column_name_map

{'popu18_1': 'popu18_1_2020',
 'popu18_1p': 'popu18_1p_2020',
 'pop1': 'pop1_2020',
 'popperacre': 'popperacre_2020',
 'wnh': 'wnh_2020',
 'bnh': 'bnh_2020',
 'anh': 'anh_2020',
 'hsp1': 'hsp1_2020',
 'wnhp': 'wnhp_2020',
 'bnhp': 'bnhp_2020',
 'anhp': 'anhp_2020',
 'hsp1p': 'hsp1p_2020'}

In [44]:
# Append year to variables so that 2010 and 2020 datasets can be combined
ten_pivot.rename(columns=ten_column_name_map, inplace=True)
twenty_pivot.rename(columns=twenty_column_name_map, inplace=True)

In [45]:
data = ten_pivot.merge(twenty_pivot, on="geoid")

In [46]:
count_variables = ['popu18_1','pop1','popperacre','wnh','bnh','anh','hsp1']

In [47]:
# Calculate change over time
for variable in count_variables:
  data[variable+"_c"] = data[variable+"_2020"] - data[variable+"_2010"]

In [48]:
# Calculate percent change over time
for variable in count_variables:
  data[variable+"_pc"] = data[variable+"_c"] / data[variable+"_2010"] * 100

To generate the ACS datasets needed for this notebook, run this query

```
SELECT 
 _popu181.geoid,
 support_geoids."label",
 _popu181.popu181,
 _mdgr.mdgr,
 _pbwpv.pbwpv,
 _pbwpv.pbwpv_p,
 _lgoenlep1.lgoenlep1,
 _fb1.fb1_p,
 _ea_bchdh.ea_bchdh,
 _ea_bchdh.ea_bchdh_p,
 _pop65pl1.pop65pl1
FROM (
 SELECT geoid, e as popu181
 FROM acs."2019"
 WHERE geotype LIKE 'NTA%'
 AND variable = 'popu181'
) _popu181
LEFT JOIN (
 SELECT geoid, e as mdgr
 FROM acs."2019"
 WHERE geotype LIKE 'NTA%'
 AND variable = 'mdgr'
) _mdgr ON _popu181.geoid = _mdgr.geoid
LEFT JOIN (
 SELECT geoid, e as pbwpv, p as pbwpv_p
 FROM acs."2019"
 WHERE geotype LIKE 'NTA%'
 AND variable = 'pbwpv'
) _pbwpv ON _popu181.geoid = _pbwpv.geoid
LEFT JOIN (
 SELECT geoid, e as lgoenlep1
 FROM acs."2019"
 WHERE geotype LIKE 'NTA%'
 AND variable = 'lgoenlep1'
) _lgoenlep1 ON _popu181.geoid = _lgoenlep1.geoid
LEFT JOIN (
 SELECT geoid, p as fb1_p
 FROM acs."2019"
 WHERE geotype LIKE 'NTA%'
 AND variable = 'fb1'
) _fb1 ON _popu181.geoid = _fb1.geoid
LEFT JOIN (
 SELECT geoid, e as ea_bchdh, p as ea_bchdh_p
 FROM acs."2019"
 WHERE geotype LIKE 'NTA%'
 AND variable = 'ea_bchdh'
) _ea_bchdh ON _popu181.geoid = _ea_bchdh.geoid
LEFT JOIN (
 SELECT geoid, e as pop65pl1
 FROM acs."2019"
 WHERE geotype LIKE 'NTA%'
 AND variable = 'pop65pl1'
) _pop65pl1 ON _popu181.geoid = _pop65pl1.geoid
LEFT JOIN support_geoids
ON _popu181.geoid = support_geoids.geoid
WHERE support_geoids.geotype LIKE 'NTA%';
```

In [49]:
# Import acs data and merge to decennial data (the sql above takes care of shaping this dataset for us)
acs = pd.read_csv('acs.csv')
data = data.merge(acs, on="geoid")

In [51]:
# Drop 2010 columns and rename 2020 ones
data.drop(ten_column_name_map.values(),axis=1, inplace=True)
data.rename(columns={v: k for k, v in twenty_column_name_map.items()}, inplace=True)

In [52]:
from shapely import wkt

Run this query against the NTA geographies in PostGIS to generate a geopandas compatible dataset
```
select ST_AsText(wkb_geometry) as geometry, nta2020, ntaname from dcp_ntaboundaries
```

In [53]:
# Import geographies and merge into geopandas dataframe
_boundaries = pd.read_csv('nta_boundaries.csv')
geometry = _boundaries['geometry'].map(wkt.loads)
_boundaries = _boundaries.drop('geometry', axis=1).rename(columns={'nta2020': 'geoid'})
ntas = gp.GeoDataFrame(_boundaries, crs="EPSG:4326", geometry=geometry)

In [54]:
ntas = ntas.merge(data, on='geoid')

In [56]:
# Export fully formed geojson to json file. The contents of this file can be copie and pasted into the `data`
# property of the json found in `/data/sources`
ntas.to_file('ntas.json', driver="GeoJSON")