# pycen
Lightweight Python package for exploring and acquiring U.S. Census data with intuitive spatial integration.

In [1]:
import pycen
from pycen import explore, acquire

## 0. Configurations
- optional set API key for higher rate limits
- manage metadat and cache settings
- customize display for `explore` variable view
- general helper functions

### 0.1 API key
Anonymous access by default. If limits hit, request and set a key: https://api.census.gov/data/key_signup.html

In [2]:
import pycen
pycen.set_api_key("YOUR_KEY") # set an API key
pycen.get_api_key()           # verify set success

'YOUR_KEY'

### 0.2 metadata cache controls
customize cache for efficient spatial data flow

#### 0.2.1 metadata
Default disable `explore` metadata cache. When enabled without a path, it uses ~/.pycen/metadata.db

In [3]:
import pycen
print(pycen.get_metadata_cache_settings()) # check current setting
# if enabled with no path, you’ll see (True, '<home>/.pycen/metadata.db')

(False, None)


In [4]:
pycen.enable_metadata_cache() # if enabled, expect faster metadata browsing on subsequent runs

In [5]:
pycen.enable_metadata_cache('/tmp/meta.db') # set a custom cache path, e.g. /tmp directory

In [6]:
pycen.disable_metadata_cache() # turns off cache; does not clear the stored ones

#### 0.2.2 cache
Default cache `get` requests (boundaries, csvs) and save to project root by default for efficient local data processing

##### 0.2.2.1 `cache_info` - check existing cache status

In [7]:
print(pycen.cache_info()) # default cache to project directory, separate by file type

{'cache_dir': 'pycen_cache', 'exists': True, 'file_count': 15, 'total_size_bytes': 50168987}


##### 0.2.2.2 `clear_cache` - manage cache for disk space

In [8]:
pycen.clear_cache()           # clear all cache

In [9]:
pycen.clear_cache(which='csv')  # clear csv files only
pycen.clear_cache(which='geo')  # clear boundaries only
pycen.clear_cache(cache_dir='/tmp/pycen_cache', which='all')  # full form with path and type specified 

##### 0.2.2.3 other cache calls

In [10]:
from pycen.utils.cache import get_cache_key
params = {"dataset": "acs5", "year": 2023, "geography": "tract", "state": "06"}
get_cache_key(params) # generates a deterministic cache key (a 16‑character MD5 hash)
                      # by JSON‑encoding it with sorted key from params
                      # helpful for high-level cache managing

'5863fbfb8bfcc89a'

In [11]:
from pycen.utils.cache import get_cache_path
cache_dir = "./pycen_cache" # default path
params = {"dataset": "acs5", "year": 2023, "geography": "tract", "state": "06", "county": "001"}
get_cache_path(cache_dir, "api", params) # api contains csv files; geometries boundary

PosixPath('pycen_cache/api/acs5_2023_tract_06_001_0592007a883bff78.csv')

### 0.3 display theme
add intuitive visual aids for variable exploration (see `1_explore.ipynb` for the `explore` module)

#### 0.3.1 set a built-in theme

In [12]:
print(pycen.list_themes()) # list built-in themes

['default', 'digital_divide', 'humob', 'none', 'sdoh', 'snap']


In [13]:
print(pycen.get_theme_settings()) # check current theme; if unspecified, set to 'default'

default


In [14]:
from pycen import explore
print(explore.get_theme('default')) # check parameter specs for the 'default' theme

{'name': 'default', 'description': 'General useful variables across domains.', 'highlight_vars': ['B01003_001E', 'B02001_002E', 'B02001_003E', 'B02001_005E', 'B03003_003E', 'B03003_002E', 'B09019_001E', 'B19013_001E', 'B19083_001E', 'B25002_001E', 'B25002_003E', 'B25064_001E', 'B25003_001E', 'B25003_002E', 'B25003_003E', 'B08101_049E']}


In [15]:
# select a built-in theme. 
# e.g., 'sdoh' curates variables for contextualizing social determinants of health

pycen.set_theme('sdoh') # built-ins: 'default', (useful variables across topics)
                        #            'snap', (relevant to SNAP food-stamp program)
                        #            'humob', (daily mobility and commutes)
                        #            'digital_divide', (internet access and computer ownership gaps)
                        #            'null' (no highilights, minimal)

#### 0.3.2 customize your own theme

In [16]:
# define a custom theme and activate it for this session
custom = {
    "name": "user-select",                              # choose a name
    "description": "for personal use",                  # optional description
    "highlight_vars": ["B19013_001E", "B01003_001E"],   # list of variable codes to highlight
}
pycen.set_theme(custom)
# then: explore.search(...).show() or explore.browse(...).show()

### 0.4 general helper functions

In [17]:
pycen.get_product() # check data product availability

product  label          years                 desc                               
-------  -------------  --------------------  -----------------------------------
acs1     ACS 1-year     2005-2019, 2021-2023  annual; large geos only            
acs5     ACS 5-year     2009-2023             most commonly used                 
dec_pl   Decennial PL   2010, 2020            block-level counts                 
dec_sf1  Decennial SF1  2000, 2010            population + housing (discontinued)


In [18]:
pycen.get_geography() # check supported geography resolution

product  lowest              available (select)
-------  ------------------  ------------------
acs1     county subdivision  place, county subdivision, county, PUMA
                             congressional district, urban area, state, CBSA, CSA
                             division, region, us
acs5     block group         block group, tract, place, county subdivision, county
                             ZCTA, PUMA, congressional district, urban area, state
                             CBSA, CSA, division, region, us
dec_pl   block               block, block group, tract, place, ZCTA, county, state
dec_sf1  block               block, block group, tract, place, ZCTA, county, state
Note: full lists are available at https://api.census.gov/data/<year>/<dataset>/geography.html
