In [15]:
import polars as pl
import pyarrow as pa
import pyarrow.parquet as pq
from census import Census
from us import states
import os
from dotenv import load_dotenv
load_dotenv()
ckey = os.environ.get("CENSUS_KEY")

The [Census Python Package](https://pypi.org/project/census/) is highlighted by [PyGIS](https://pygis.io/docs/d_access_census.html) is great for pulling data from the Census.  It requirs an [API key](http://api.census.gov/data/key_signup.html). You can read about the available [ACS variables](https://api.census.gov/data/2019/acs/acs5/variables.html) and 

In [16]:
# https://pygis.io/docs/d_access_census.html
# https://api.census.gov/data/2019/acs/acs5/variables.html
c = Census(ckey)
states.ID.fips


'16'

In [26]:
idaho_pop = c.acs5.state_county_tract(fields = ('NAME', 'B01003_001E'),
                                      state_fips = states.ID.fips,
                                      county_fips = "*",
                                      tract = "*",
                                      year = 2019)
id_df = pl.DataFrame(idaho_pop)\
    .with_columns((pl.col("state") + pl.col("county") + pl.col("tract")).alias("GEOID"))\
    .select("GEOID", "B01003_001E").rename({"B01003_001E":"population", "GEOID":"tract"})


In [34]:
id_df.head(5)

tract,population
str,f64
"""16019970700""",5589.0
"""16025970100""",1048.0
"""16027021700""",11701.0
"""16027020700""",3901.0
"""16027022100""",5059.0


In [28]:
id_df.describe()

describe,tract,population
str,str,f64
"""count""","""298""",298.0
"""null_count""","""0""",0.0
"""mean""",,5764.261745
"""std""",,3837.701373
"""min""","""16001000100""",26.0
"""25%""",,3434.0
"""50%""",,4985.0
"""75%""",,6828.0
"""max""","""16087970300""",28516.0


In [31]:
rexburg_tracts = ["16065950100", "16065950200", "16065950400", "16065950301", "16065950500", "16065950302"]
courd_tracts = ["16055000402", "16055000401", "16055001200", "16055000900"]

In [32]:
id_df.filter(pl.col("tract").is_in(rexburg_tracts))

tract,population
str,f64
"""16065950200""",4602.0
"""16065950302""",13852.0
"""16065950301""",3208.0
"""16065950400""",6531.0
"""16065950100""",6035.0
"""16065950500""",4872.0


In [33]:
id_df.filter(pl.col("tract").is_in(courd_tracts))

tract,population
str,f64
"""16055000900""",5978.0
"""16055001200""",6535.0
"""16055000401""",5808.0
"""16055000402""",8894.0


In [35]:
id_df.write_parquet("population.parquet")