In [1]:
import censusdis.data as ced
import pandas as pd
import string

In [3]:
pd.set_option('display.max_colwidth', None)

## Census categories

Here are some common census categories or classifications:

Demographic Characteristics:

* Age
* Sex
* Race
* Ethnicity (Hispanic or Latino origin)
* Marital status
* Household relationship

Economic Characteristics:

* Employment status
* Industry and occupation
* Income and earnings
* Poverty status
* Employment status

Social Characteristics:

* Educational attainment
* School enrollment
* Language spoken at home
* Ancestry
* Disability status
* Migration/Residence one year ago

Housing Characteristics:

* Housing occupancy (owned or rented)
* Housing value
* Rent
* Number of rooms and bedrooms
* Year structure built
* Plumbing and kitchen facilities

Geographical Characteristics:

* Urban and rural classification
* Geographic regions and divisions
* States, counties, and cities
* Census tracts and blocks

Family and Household Characteristics:

* Family composition
* Household size and type
* Presence of children
* Presence of elderly

In [4]:
def validate_xyear(var_variable, var_group, var_dataset, var_years):
    var_df = list() 
    for year in var_years:
        # identify whether group is a dictionary or string
        if isinstance(var_group, dict):
            group = var_group[year]
        elif isinstance(var_group, str):
            group = var_group
        # identify whether dataset is a dictionary or string
        if isinstance(var_dataset, dict):
            dataset = var_dataset[year]
        elif isinstance(var_dataset, str):
            dataset = var_dataset
        # identify whether variable is a dictionary or string
        if isinstance(var_variable, dict):
            variable = var_variable[year]
        elif isinstance(var_variable, str):
            variable = var_variable
        # make request
        var = ced.variables.all_variables(dataset, year, group)
        # make sure variable is in dataset
        if var[var.VARIABLE == variable].shape[0] == 0:
            print(f'{variable} not found in {dataset} for {year}')
            return []
        var = var[var.VARIABLE == variable]
        var_df.append(var)
    var_df = pd.concat(var_df)
    label = var_df.LABEL
    for token in string.punctuation:
        label = label.str.replace(token, ' ')
    # Remove multiple spaces
    label = label.str.replace(' +', ' ', regex=True)
    # Remove leading and trailing spaces
    label = label.str.strip()
    # Convert to small caps
    label = label.str.lower()
    return label.unique().tolist()

## ACS5 Datasets (year 2009)

In [None]:
df_datasets = ced.variables.all_data_sets(year=2009)
df_datasets[df_datasets['DATASET'].str.contains('acs5')][['SYMBOL', 'DATASET', 'TITLE']]

## ACS1 Datasets (year 2009)

In [None]:
df_datasets[df_datasets['DATASET'].str.contains('acs1')][['SYMBOL', 'DATASET', 'TITLE']]

## Decennial Datasets (year 2000)

In [None]:
df_datasets = ced.variables.all_data_sets(year=2000)
df_datasets[df_datasets['DATASET'].str.contains('dec')][['SYMBOL', 'DATASET', 'TITLE']]

## Decennial Datasets (year 2010)

In [None]:
df_datasets = ced.variables.all_data_sets(year=2010)
df_datasets[df_datasets['DATASET'].str.contains('dec')][['SYMBOL', 'DATASET', 'TITLE']]

## Decennial Datasets (year 2020)

In [None]:
df_datasets = ced.variables.all_data_sets(year=2020)
df_datasets[df_datasets['DATASET'].str.contains('dec')][['SYMBOL', 'DATASET', 'TITLE']]

## ACS5 Groups (year 2009)

In [None]:
acs_acs5_groups_2009 = ced.variables.all_groups('acs/acs5', year=2009)[['GROUP', 'DESCRIPTION']]
acs_acs5_groups_2009.to_csv('acs_acs5_groups_2009.csv', index=False)
acs_acs5_groups_2009

In [None]:
ced.variables.all_groups('acs/acs5/pums', year=2009)[['GROUP', 'DESCRIPTION']]

In [None]:
ced.variables.all_groups('acs5', year=2009)[['GROUP', 'DESCRIPTION']]

## ACS1 Groups (year 2007)

In [None]:
acs_acs1_groups_2007 = ced.variables.all_groups('acs/acs1', year=2007)[['GROUP', 'DESCRIPTION']]
acs_acs1_groups_2007.to_csv('acs_acs1_groups_2007.csv', index=False)
acs_acs1_groups_2007

In [None]:
ced.variables.all_groups('acs/acs1/profile', year=2007)[['GROUP', 'DESCRIPTION']]

In [None]:
ced.variables.all_groups('acs/acs1/pums', year=2007)[['GROUP', 'DESCRIPTION']]

In [None]:
# acs/acs1/spp is not available for 2007
ced.variables.all_groups('acs/acs1/spp', year=2008)[['GROUP', 'DESCRIPTION']]

## Dec groups

In [None]:
datasets = {
    2000: ['sf1', 'sf2', 'sf3', 'sf4'],
    2010: ['sf1', 'sf2'],
    2020: ['dhc']
}
for year in [2000, 2010, 2020]:
    for dataset in datasets[year]:
        print(f'{dataset} {year}')
        df = ced.variables.all_groups(f'dec/{dataset}', year=year)[['GROUP', 'DESCRIPTION']]
        filename = f'dec_{dataset}_groups_{year}.csv'
        df.to_csv(filename, index=False)

In [19]:
dec_sf1_groups_2000 = ced.variables.all_groups('dec/sf1', year=2000)[['GROUP', 'DESCRIPTION']]

# Census variables

### `population`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B01003'])
ced.variables.all_variables('acs/acs5', 2009, 'B01003')

In [None]:
print(acs_acs1_groups_2007[acs_acs1_groups_2007['GROUP'] == 'B01003'])
ced.variables.all_variables('acs/acs1', 2007, 'B01003')

In [None]:
var_group = {}
var_group[2000] = 'P001'
var_group[2010] = 'P1'
var_group[2020] = 'P1'

var_dataset = {}
var_dataset[2000] = 'dec/sf1'
var_dataset[2010] = 'dec/sf1'
var_dataset[2020] = 'dec/dhc'

var_df = []
for year in [2000, 2010, 2020]:
    dataset = var_dataset[year]
    group = var_group[year]
    print(f'{dataset} {year} {group}')
    var = ced.variables.all_variables(dataset, year, group)
    var_df.append(var)
var_df = pd.concat(var_df)
var_df.to_csv('dec_population.csv', index=False)

## `median_age`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B01002'])
ced.variables.all_variables('acs/acs5', 2010, 'B01002')

In [None]:
print(acs_acs1_groups_2007[acs_acs1_groups_2007['GROUP'] == 'B01002'])
ced.variables.all_variables('acs/acs1', 2005, 'B01002')

In [None]:
var_group = {}
var_group[2000] = 'P013'
var_group[2010] = 'P13'
var_group[2020] = 'P13'

var_dataset = {}
var_dataset[2000] = 'dec/sf1'
var_dataset[2010] = 'dec/sf1'
var_dataset[2020] = 'dec/dhc'

var_df = []
for year in [2000, 2010, 2020]:
    dataset = var_dataset[year]
    group = var_group[year]
    print(f'{dataset} {year} {group}')
    var = ced.variables.all_variables(dataset, year, group)
    var_df.append(var)
var_df = pd.concat(var_df)
var_df.to_csv('dec_median_age.csv', index=False)

## `pct_under_20_years`, `pct_20_35_years`, `pct_35_50_years`, `pct_50_65_years`, `pct_more_65_years`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B01001'])
ced.variables.all_variables('acs/acs5', 2009, 'B01001')[['VARIABLE', 'LABEL']]

In [None]:
valid_codes = [len(validate_xyear(f'B01001_{i:03}E', 'B01001', 'acs/acs5', range(2009, 2022 + 1))) == 1 for i in range(1, 50)]
print(valid_codes)
print("total codes:", len(valid_codes))
print("total valid codes:", sum(valid_codes))

In [None]:
print(acs_acs1_groups_2007[acs_acs1_groups_2007['GROUP'] == 'B01001'])
df = ced.variables.all_variables('acs/acs1', 2007, 'B01001')
df = df[df.VARIABLE.str.contains('B01001_')]
df[['VARIABLE', 'LABEL']]

In [None]:
acs1_years = list(i for i in range(2007, 2022 + 1) if i != 2020)
valid_codes = [len(validate_xyear(f'B01001_{i:03}E', 'B01001', 'acs/acs1', acs1_years)) == 1 for i in range(1, 50)]
print(valid_codes)
print("total codes:", len(valid_codes))
print("total valid codes:", sum(valid_codes))

In [None]:
var_group = {}
var_group[2000] = 'P012'
var_group[2010] = 'P12'
var_group[2020] = 'P12'

var_dataset = {}
var_dataset[2000] = 'dec/sf1'
var_dataset[2010] = 'dec/sf1'
var_dataset[2020] = 'dec/dhc'

var_df = []
for year in [2000, 2010, 2020]:
    dataset = var_dataset[year]
    group = var_group[year]
    print(f'{dataset} {year} {group}')
    var = ced.variables.all_variables(dataset, year, group)
    var_df.append(var)
var_df = pd.concat(var_df)
var_df.to_csv('dec_sex_age.csv', index=False)


In [None]:
var_variable_prefix = {}
var_variable_prefix[2000] = 'P012'
var_variable_prefix[2010] = 'P012'
var_variable_prefix[2020] = 'P12_'

var_variable_suffix = {}
var_variable_suffix[2000] = ''
var_variable_suffix[2010] = ''
var_variable_suffix[2020] = 'N'

var_variable_list = []
for i in range(1, 49+1):
    var_variable = {}
    for year in [2000, 2010, 2020]:
        var_variable[year] = f'{var_variable_prefix[year]}{i:03d}{var_variable_suffix[year]}'
    var_variable_list.append(var_variable)
var_variable_list[-5:]

In [None]:
valid_codes = []
for i in range(49):
    val = validate_xyear(var_variable_list[i], var_group, var_dataset, [2000,2010,2020])
    valid_codes.append(len(val) == 1)
print("total codes:", len(valid_codes))
print("total valid codes:", sum(valid_codes))

In [33]:
# i = 1
# var_df = list()
# for year in [2000, 2010, 2020]:
#     dataset = var_dataset[year]
#     group = var_group[year]
#     variable_prefix = var_variable_prefix[year]
#     variable_suffix = var_variable_suffix[year]
#     variable = f'{variable_prefix}{i:03d}{variable_suffix}'
#     var = ced.variables.all_variables(dataset, year, group)
#     var = var[var.VARIABLE == variable]
#     if var.shape[0] == 0:
#         print(f'{variable} not found in {dataset} for {year}')
#     var_df.append(var)
# var_df = pd.concat(var_df)
# var_df

### `pct_white`, `pct_black`, `pct_asian`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B02001'])
ced.variables.all_variables('acs/acs5', 2009, 'B02001')[['VARIABLE', 'LABEL']]

In [None]:
valid_codes = [len(validate_xyear(f'B02001_{i:03}E', 'B02001', 'acs/acs5', range(2009, 2022+1))) == 1 for i in range(1, 11)]
print(valid_codes)
print("total codes:", len(valid_codes))
print("total valid codes:", sum(valid_codes))

In [None]:
print(acs_acs1_groups_2007[acs_acs1_groups_2007['GROUP'] == 'B02001'])
ced.variables.all_variables('acs/acs1', 2007, 'B02001')[['VARIABLE', 'LABEL']]

In [None]:
valid_codes = [len(validate_xyear(f'B02001_{i:03}E', 'B02001', 'acs/acs1', acs1_years)) == 1 for i in range(1, 11)]
print(valid_codes)
print("total codes:", len(valid_codes))
print("total valid codes:", sum(valid_codes))

In [None]:
ced.variables.all_variables('dec/sf1', 2010, 'P6')

In [None]:
var_group = {}
var_group[2000] = 'P007'
var_group[2010] = 'P8'
var_group[2020] = 'P8'

var_dataset = {}
var_dataset[2000] = 'dec/sf1'
var_dataset[2010] = 'dec/sf1'
var_dataset[2020] = 'dec/dhc'

var_df = []
for year in [2000, 2010, 2020]:
    dataset = var_dataset[year]
    group = var_group[year]
    print(f'{dataset} {year} {group}')
    var = ced.variables.all_variables(dataset, year, group)
    var_df.append(var)
var_df = pd.concat(var_df)
var_df.to_csv('dec_race.csv', index=False)

In [None]:
var_variable_prefix = {}
var_variable_prefix[2000] = 'P007'
var_variable_prefix[2010] = 'P008'
var_variable_prefix[2020] = 'P8_'

var_variable_suffix = {}
var_variable_suffix[2000] = ''
var_variable_suffix[2010] = ''
var_variable_suffix[2020] = 'N'

var_variable_list = []
for i in range(1, 8+1):
    var_variable = {}
    for year in [2000, 2010, 2020]:
        var_variable[year] = f'{var_variable_prefix[year]}{i:03d}{var_variable_suffix[year]}'
    var_variable_list.append(var_variable)
var_variable_list[-5:]

In [None]:
var_df = list()
for i in range(1, 9):
    for year in [2000, 2010, 2020]:
        dataset = var_dataset[year]
        group = var_group[year]
        variable_prefix = var_variable_prefix[year]
        variable_suffix = var_variable_suffix[year]
        variable = f'{variable_prefix}{i:03d}{variable_suffix}'
        var = ced.variables.all_variables(dataset, year, group)
        var = var[var.VARIABLE == variable]
        if var.shape[0] == 0:
            print(f'{variable} not found in {dataset} for {year}')
        label = var.LABEL
        for token in string.punctuation:
            label = label.str.replace(token, ' ')
        # Remove multiple spaces
        label = label.str.replace(' +', ' ', regex=True)
        # Remove leading and trailing spaces
        label = label.str.strip()
        # Convert to small caps
        label = label.str.lower()
        var['LABEL'] = label
        var_df.append(var[['YEAR', 'DATASET', 'GROUP', 'VARIABLE', 'LABEL']])
var_df = pd.concat(var_df)
var_df

### `pct_hispanic`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B03003'])
ced.variables.all_variables('acs/acs5', 2010, 'B03003')[['VARIABLE', 'LABEL']]

In [None]:
valid_codes = [len(validate_xyear(f'B03003_{i:03}E', 'B03003', 'acs/acs5', range(2010, 2022+1))) == 1 for i in range(1, 4)]
print(valid_codes)
print("total codes:", len(valid_codes))
print("total valid codes:", sum(valid_codes))

In [None]:
# hispanic is not available until 2009
ced.variables.all_variables('acs/acs1', 2009, 'B03003')[['VARIABLE', 'LABEL']]

In [None]:
hispanic_acs1_years = list(i for i in range(2009, 2022 + 1) if i != 2020)
valid_codes = [len(validate_xyear(f'B03003_{i:03}E', 'B03003', 'acs/acs5', hispanic_acs1_years)) == 1 for i in range(1, 4)]
print(valid_codes)
print("total codes:", len(valid_codes))
print("total valid codes:", sum(valid_codes))

In [None]:
var_group = {}
var_group[2000] = 'P011'
var_group[2010] = 'P4'
var_group[2020] = 'P4'

var_dataset = {}
var_dataset[2000] = 'dec/sf1'
var_dataset[2010] = 'dec/sf1'
var_dataset[2020] = 'dec/dhc'

var_df = []
for year in [2000, 2010, 2020]:
    dataset = var_dataset[year]
    group = var_group[year]
    print(f'{dataset} {year} {group}')
    var = ced.variables.all_variables(dataset, year, group)
    var_df.append(var)
var_df = pd.concat(var_df)
var_df.to_csv('dec_hispanic.csv', index=False)

### `pct_non_us_citizen`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B05001'])
ced.variables.all_variables('acs/acs5', 2009, 'B05001')[['VARIABLE', 'LABEL']]

In [None]:
valid_codes = [len(validate_xyear(f'B05001_{i:03}E', 'B05001', 'acs/acs5', range(2009, 2022+1))) == 1 for i in range(1, 7)]
print(valid_codes)
print("total codes:", len(valid_codes))
print("total valid codes:", sum(valid_codes))

In [None]:
print(acs_acs1_groups_2007[acs_acs1_groups_2007['GROUP'] == 'B05001'])
ced.variables.all_variables('acs/acs1', 2007, 'B05001')[['VARIABLE', 'LABEL']]

In [None]:
valid_codes = [len(validate_xyear(f'B05001_{i:03}E', 'B05001', 'acs/acs1', acs1_years)) == 1 for i in range(1, 7)]
print(valid_codes)
print("total codes:", len(valid_codes))
print("total valid codes:", sum(valid_codes))

In [None]:
# In most recent censuses, 
# citizenship data is Not collected through the Decennial Census short form, 
# which is the primary form sent to all households.

ced.variables.all_variables('dec/sf3', 2000, 'P021')

### `pct_higher_education`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B14001'])
ced.variables.all_variables('acs/acs5', 2010, 'B14001')

In [None]:
valid_codes = [len(validate_xyear(f'B14001_{i:03}E', 'B14001', 'acs/acs5', range(2010, 2022+1))) == 1 for i in range(1, 11)]
print(valid_codes)
print("total codes:", len(valid_codes))
print("total valid codes:", sum(valid_codes))

In [None]:
print(acs_acs1_groups_2007[acs_acs1_groups_2007['GROUP'] == 'B14001'])
ced.variables.all_variables('acs/acs1', 2007, 'B14001')[['VARIABLE', 'LABEL']]

In [None]:
valid_codes = [len(validate_xyear(f'B14001_{i:03}E', 'B14001', 'acs/acs1', acs1_years)) == 1 for i in range(1, 11)]
print(valid_codes)
print("total codes:", len(valid_codes))
print("total valid codes:", sum(valid_codes))

In [None]:
ced.variables.all_variables('dec/sf3', 2000, 'P036')

## pop_higher_education

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B15001'])
df = ced.variables.all_variables('acs/acs5', 2009, 'B15001')
df.to_csv('acs_acs5_education.csv', index=False)

In [None]:
print(acs_acs1_groups_2007[acs_acs1_groups_2007['GROUP'] == 'B15001'])
#ced.variables.all_variables('acs/acs1', 2007, 'B15001')[['VARIABLE', 'LABEL']]

In [None]:
ced.variables.all_variables('dec/sf3', 2000, 'P037')

### `pct_poverty`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B17001'])
ced.variables.all_variables('acs/acs5', 2010, 'B17001')[['VARIABLE', 'LABEL']]

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B17025'])
ced.variables.all_variables('acs/acs5', 2010, 'B17025')[['VARIABLE', 'LABEL']]

In [None]:
# The variable B17025_002E has a unique label across years
validate_xyear(f'B17025_002E', 'B17025', 'acs/acs5', range(2010, 2022+1))

In [None]:
print(acs_acs1_groups_2007[acs_acs1_groups_2007['GROUP'] == 'B17025'])
ced.variables.all_variables('acs/acs1', 2007, 'B17025')[['VARIABLE', 'LABEL']]

In [None]:
# The variable B17025_002E has a unique label across years
validate_xyear(f'B17025_002E', 'B17025', 'acs/acs1', acs1_years)

In [None]:
#P087,POVERTY STATUS IN 1999 BY AGE [17]
ced.variables.all_variables('dec/sf3', 2000, 'P087')[['VARIABLE', 'LABEL']]

### `pct_high_income`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B19001'])
ced.variables.all_variables('acs/acs5', 2010, 'B19001')[['VARIABLE', 'LABEL']]

In [None]:
valid_codes = [len(validate_xyear(f'B19001_{i:03}E', 'B19001', 'acs/acs5', range(2010, 2022+1))) == 1 for i in range(1, 18)]
print(valid_codes)
print("total codes:", len(valid_codes))
print("total valid codes:", sum(valid_codes))

### `median_household_income`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B19013'])
ced.variables.all_variables('acs/acs5', 2010, 'B19013')[['VARIABLE', 'LABEL']]

In [None]:
# the variable name changes because income is not discounted for inflation
validate_xyear(f'B19013_001E', 'B19013', 'acs/acs5', range(2010, 2022+1))

In [None]:
# In most recent censuses, 
# income data is Not collected through the Decennial Census short form, 
# which is the primary form sent to all households.

ced.variables.all_variables('dec/sf3', 2000, 'P053')

### `pct_family_high_income`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B19101'])
ced.variables.all_variables('acs/acs5', 2009, 'B19101')

In [None]:
# In most recent censuses, 
# income data is Not collected through the Decennial Census short form, 
# which is the primary form sent to all households.

ced.variables.all_variables('dec/sf3', 2000, 'P052')

### `median_family_income`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B19113'])
ced.variables.all_variables('acs/acs5', 2009, 'B19113')[['VARIABLE', 'LABEL']]

### `elder_footstamp`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B22001'])
ced.variables.all_variables('acs/acs5', 2009, 'B22001')[['VARIABLE', 'LABEL']]

In [None]:
# The variable B22001_002E has a unique label across years
validate_xyear(f'B22001_002E', 'B22001', 'acs/acs5', range(2009, 2022+1))

In [None]:
print(acs_acs1_groups_2007[acs_acs1_groups_2007['GROUP'] == 'B22001'])
ced.variables.all_variables('acs/acs1', 2007, 'B22001')[['VARIABLE', 'LABEL']]

In [None]:
# The variable B22001_002E has a label change, but not representative of a different concept
validate_xyear(f'B22001_002E', 'B22001', 'acs/acs1', acs1_years)

### `median_household_income`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B19013'])
ced.variables.all_variables('acs/acs5', 2009, 'B19013')[['VARIABLE', 'LABEL']]

In [None]:
validate_xyear(f'B19013_001E', 'B19013', 'acs/acs5', range(2009, 2022+1))

In [None]:
print(acs_acs1_groups_2007[acs_acs1_groups_2007['GROUP'] == 'B19013'])
ced.variables.all_variables('acs/acs1', 2007, 'B19013')[['VARIABLE', 'LABEL']]


In [None]:
validate_xyear(f'B19013_001E', 'B19013', 'acs/acs1', acs1_years)

In [None]:
ced.variables.all_variables('dec/sf3', 2000, 'P053')
# Starting with the 2010 Census, the long-form survey was discontinued, 
# and the data previously collected under SF3 became part of the American Community Survey (ACS).

### `median_home_value`

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B25077'])
ced.variables.all_variables('acs/acs5', 2009, 'B25077')[['VARIABLE', 'LABEL']]

In [None]:
validate_xyear('B25077_001E', 'B25077', 'acs/acs5', range(2009, 2022+1))

In [None]:
print(acs_acs1_groups_2007[acs_acs1_groups_2007['GROUP'] == 'B25077'])
ced.variables.all_variables('acs/acs1', 2007, 'B25077')[['VARIABLE', 'LABEL']]

In [None]:
validate_xyear(f'B25077_001E', 'B25077', 'acs/acs1', acs1_years)

In [None]:
ced.variables.all_variables('dec/sf3', 2000, 'H085')

# Starting with the 2010 Census, the long-form survey was discontinued, 
# and the data previously collected under SF3 became part of the American Community Survey (ACS).

In [None]:
ced.variables.all_variables('dec/sf1', 2010, 'H1')

In [None]:
ced.variables.all_variables('dec/dhc', 2020, 'H1')

## Unweighted tables

The unweighted tables in the American Community Survey (ACS) 

* Provide context for other ACS estimates by showing the base sample size. For instance, when presenting data on economic characteristics from the ACS, you can reference the unweighted sample count to give users a sense of the underlying sample from which estimates are derived. 
* Comparing the unweighted sample counts across different geographic areas can help in understanding the distribution and density of the survey samples.
* They are particularly useful in the following situations:

    - Understanding Sample Size: Use this table to determine the sample size of the population surveyed in a specific area. This can help assess the reliability and precision of the estimates in other ACS tables.
    - Evaluating Data Quality: When analyzing ACS data, it’s important to understand the sample size because areas with small sample sizes may have less reliable estimates. This table helps in identifying such areas.
    - Weighting and Estimation: Use this table to understand the raw, unweighted sample counts before weights are applied to produce population estimates. This is crucial for researchers performing advanced statistical analyses.



In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B00001'])
ced.variables.all_variables('acs/acs5', 2010, 'B00001')[['VARIABLE', 'LABEL']]

In [None]:
print(acs_acs5_groups_2009[acs_acs5_groups_2009['GROUP'] == 'B00002'])
ced.variables.all_variables('acs/acs5', 2010, 'B00002')