# Scrape ACS Census Data

In [1]:
import pandas as pd
from census import Census
import us
import os
from dotenv import load_dotenv

In [2]:
chunk_size = 10**6
chunk_size

1000000

In [3]:
df__chunks = {}
chunk_index = 0
for chunk in pd.read_csv('usa_00006.csv', chunksize=chunk_size):
  df__chunks.update({
    chunk_index: chunk
  })
  chunk_index = chunk_index+1
len(df__chunks)

176

### FIPS Info

- 37	  FIPS for NC
- 5750	Raleigh, NC

In [4]:
def stitch_chunks_by_state(chunk_length, all_chunks, state_fip):
  
  list_of_dfs = []
  for chunk in range(0, chunk_length):
    list_of_dfs.append(all_chunks[chunk].loc[all_chunks[chunk].STATEFIP == state_fip])

  df__merged_chunks = pd.concat(
    list_of_dfs,
    ignore_index=True,
  )

  return df__merged_chunks

In [5]:
df_merged_NC_chunks = stitch_chunks_by_state(
  chunk_length=len(df__chunks),
  all_chunks=df__chunks,
  state_fip=37
)

In [6]:
df_merged_NC_chunks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5510689 entries, 0 to 5510688
Data columns (total 48 columns):
 #   Column     Dtype  
---  ------     -----  
 0   YEAR       int64  
 1   MULTYEAR   int64  
 2   SAMPLE     int64  
 3   SERIAL     int64  
 4   CBSERIAL   int64  
 5   HHWT       float64
 6   CLUSTER    int64  
 7   REGION     int64  
 8   STATEFIP   int64  
 9   COUNTYFIP  int64  
 10  CITY       int64  
 11  HOMELAND   int64  
 12  STRATA     int64  
 13  GQ         int64  
 14  RENT       int64  
 15  RENTGRS    int64  
 16  PERNUM     int64  
 17  PERWT      float64
 18  SEX        int64  
 19  AGE        int64  
 20  MARST      int64  
 21  DIVINYR    int64  
 22  WIDINYR    int64  
 23  RACE       int64  
 24  RACED      int64  
 25  HISPAN     int64  
 26  HISPAND    int64  
 27  CITIZEN    int64  
 28  EDUC       int64  
 29  EDUCD      int64  
 30  EMPSTAT    int64  
 31  EMPSTATD   int64  
 32  OCC        int64  
 33  IND        int64  
 34  INCTOT     int64  

In [7]:
df_merged_NC_chunks.columns

Index(['YEAR', 'MULTYEAR', 'SAMPLE', 'SERIAL', 'CBSERIAL', 'HHWT', 'CLUSTER',
       'REGION', 'STATEFIP', 'COUNTYFIP', 'CITY', 'HOMELAND', 'STRATA', 'GQ',
       'RENT', 'RENTGRS', 'PERNUM', 'PERWT', 'SEX', 'AGE', 'MARST', 'DIVINYR',
       'WIDINYR', 'RACE', 'RACED', 'HISPAN', 'HISPAND', 'CITIZEN', 'EDUC',
       'EDUCD', 'EMPSTAT', 'EMPSTATD', 'OCC', 'IND', 'INCTOT', 'INCSS',
       'INCWELFR', 'POVERTY', 'VETDISAB', 'DIFFREM', 'DIFFPHYS', 'DIFFMOB',
       'DIFFCARE', 'DIFFSENS', 'DIFFEYE', 'DIFFHEAR', 'VETSTAT', 'VETSTATD'],
      dtype='object')

In [8]:
YEARS_LIST = [
  2012,
  2013,
  2014,
  2015,
  2016,
  2017,
  2018,
  2019,
  2020,
  2021,
  2022,
  2023,
]

In [9]:
for year in YEARS_LIST:
  df_merged_NC_chunks.loc[df_merged_NC_chunks.YEAR == year].to_csv(f'NC_ACS_RENTAL_{year}.csv', index=False)

In [None]:
load_dotenv()
CENSUS_API_KEY = os.getenv('CENSUS_API_KEY')

In [None]:
c = Census(CENSUS_API_KEY)

## ACS - American Community Survey

src: [https://depts.washington.edu/csscr/acs/acs5yr/](https://depts.washington.edu/csscr/acs/acs5yr/)

### About the ACS 5-Year Estimates

Started in 2010, the Census Bureau releases *5-year averages estimates* for all geographic areas, regardless of population size. 

- Data collected over a 5-year period of time and therefore they describe the average characteristics for that 5-year time period. 
- Geographic areas in 5-year averages estimates are available for every state, county, city, town, place, American Indian Area, Alaska Native Area, and Hawaiian Home Land, as well as for census tracts and block groups. 
- 5-yr Estimates replace estimates from the decennial census long form started in 2010.

### Data Notes

- 2009 ACS data are in the 2000 boundaries
- 2010 ACS data are in the new 2010 boundaries
- Beginning in 2011, the Zip Code Tabulation Areas have been added as one of the geographic levels, and they are only in the US national level file.

## SNAP ACS Codes

- **S2201**: Food Stamps/Supplemental Nutrition Assistance Program (SNAP)
- **B09010**: Receipt of Supplemental Security Income (SSI), Cash Public Assistance Income, or *Food Stamps/SNAP* in the Past 12 Months by Household Type for Children Under 18 Years in Households

ACS IPLUM

- 2020: You are viewing this message because you have selected the ACS 2020 PUMS 1-year file. Please note that the ACS 2020 PUMS 1-year file uses experimental weights to account for the effects of the COVID-19 pandemic on the ACS 2020 data products. Users should proceed with caution when using the ACS 2020 PUMS 1-year file and the Census Bureau advises against comparing it to other ACS PUMS sample years. For more information about this file and the corresponding experimental weights, please visit this page: ACS and COVID-19: Guidance for Using the PUMS with Experimental Weights

## FIPS Lookup Functions

In [None]:
US_STATES_ABBR = us.states.mapping('abbr', 'name')
US_STATES_FIPS = us.states.mapping('fips', 'abbr')
YEARS = [
  2010,
  2011,
  2012,
  2013,
  2014,
  2015,
  2016,
  2017,
  2018,
  2019,
  2020,
  2021,
  2022,
  2023,
  2024,
]

print(
  US_STATES_FIPS
)

In [None]:
us.states.lookup('NC').fips

In [None]:
def get_shapes(mapping_dict):
  list_dict_state_shapes = []
  for state_abbr in mapping_dict:
    us_state = us.states.lookup(state_abbr)
    shapes = us_state.shapefile_urls()
    shapes.update({
      'state_abbr': state_abbr,
      'state_name': mapping_dict[state_abbr],
      'state_fip': us.states.lookup(state_abbr).fips,
    })
    list_dict_state_shapes.append(shapes)
  df_shapes = pd.DataFrame(list_dict_state_shapes)
  return df_shapes

In [None]:
df_US_shapes = get_shapes(US_STATES_ABBR)

df_US_shapes.head()

In [None]:
df_US_shapes.loc[df_US_shapes.state_abbr == "NC"].tract.values