In [1]:
import pandas as pd
import numpy as np
from ipumspy import IpumsApiClient, MicrodataExtract
from Credentials import MyCredentials
from Functions import Paths as P
from pathlib import Path

DataDir = Path(P['data'])
AcsDir = Path(P['acs'])
AcsPreDir = Path(P['preperiod'])

# Connect to API
ipums = IpumsApiClient(MyCredentials['IpumsApiKey'])

# IPUMS Pull
See the [documentation](https://ipumspy.readthedocs.io/en/stable/getting_started.html) for how to use ipumspy. In order to do this in a tractable manner I will make IPUMS extracts for every survey wave individually. I use ACS one year samples from 2005 to 2023 and 1% decennial Census sample from 1950 and prior.

# For County Data

In [23]:
# Create sample list
Years1 = list(range(1950,1960,10))
Years2 = list(range(2005,2024))

# Create variable list
Vars = ['STATEFIP','COUNTYFIP', 'CITIZEN', 'EDUC', 'OCC', 'OCC1950','OCC1990', 'OCC2010',
        'INDNAICS','UHRSWORK', 'PERWT', 'MET2013']
SampleList = ['us' + str(year) + 'a' for year in Years1 + Years2] # Census 1% and ACS samples (from 2001 onward)

# Create and download extracts
for samp in SampleList:
    print('Creating and Downloading Extract: ' + samp)
    try:
        extract = MicrodataExtract('usa', [samp], Vars)
        ipums.submit_extract(extract)
        ipums.wait_for_extract(extract)
        ipums.download_extract(extract,download_dir=DataDir)
    except Exception as e:
        '''
        If a given list of variables is not available in the sample, remove them from
        the extract. The first line iterates through the lines of the error message
        and extracts the missing variable name from each line by splitting at the
        ":" character and taking the first element of the resulting list.
        '''     
        RemoveThese = [line.split(':')[0] for line in str(e).splitlines()]
        NewVars = [V for V in Vars if V not in RemoveThese]
        extract = MicrodataExtract('usa',[samp],NewVars)
        ipums.submit_extract(extract)
        ipums.wait_for_extract(extract)
        ipums.download_extract(extract,download_dir=DataDir)

Creating and Downloading Extract: us1950a
Creating and Downloading Extract: us2005a
Creating and Downloading Extract: us2006a
Creating and Downloading Extract: us2007a
Creating and Downloading Extract: us2008a
Creating and Downloading Extract: us2009a
Creating and Downloading Extract: us2010a
Creating and Downloading Extract: us2011a
Creating and Downloading Extract: us2012a
Creating and Downloading Extract: us2013a
Creating and Downloading Extract: us2014a
Creating and Downloading Extract: us2015a
Creating and Downloading Extract: us2016a
Creating and Downloading Extract: us2017a
Creating and Downloading Extract: us2018a
Creating and Downloading Extract: us2019a
Creating and Downloading Extract: us2020a
Creating and Downloading Extract: us2021a
Creating and Downloading Extract: us2022a
Creating and Downloading Extract: us2023a


# For State Level Analysis

I pull the 2000 - 2023 ACS samples and save them as Stata datasets.

In [3]:
Vars = ['STATEFIP',        # Geographic
        'CITIZEN', 'BPL',  # Citizenship/nativity
        'AGE',             # Demographics
        'OCC','IND1990','INDNAICS','OCC1990', 'OCC2010', # Work
        'UHRSWORK',                        # Work
        'EDUC'                             # Education
        ]
SampleList =  ['us' + str(year) + 'a' for year in range(2000,2023)] # Only going to 2022 because 2023 doesn't have all the variables we want

for samp in SampleList:
    print('Creating and Downloading Extract: ' + samp)
    extract = MicrodataExtract('usa', [samp], Vars)
    ipums.submit_extract(extract)
    ipums.wait_for_extract(extract)
    ipums.download_extract(extract,download_dir=AcsDir)

Creating and Downloading Extract: us2000a
Creating and Downloading Extract: us2001a
Creating and Downloading Extract: us2002a
Creating and Downloading Extract: us2003a
Creating and Downloading Extract: us2004a
Creating and Downloading Extract: us2005a
Creating and Downloading Extract: us2006a
Creating and Downloading Extract: us2007a
Creating and Downloading Extract: us2008a
Creating and Downloading Extract: us2009a
Creating and Downloading Extract: us2010a
Creating and Downloading Extract: us2011a
Creating and Downloading Extract: us2012a
Creating and Downloading Extract: us2013a
Creating and Downloading Extract: us2014a
Creating and Downloading Extract: us2015a
Creating and Downloading Extract: us2016a
Creating and Downloading Extract: us2017a
Creating and Downloading Extract: us2018a
Creating and Downloading Extract: us2019a
Creating and Downloading Extract: us2020a
Creating and Downloading Extract: us2021a
Creating and Downloading Extract: us2022a


### Read In the "Pre-Period" Data

In [6]:
Vars = ['STATEFIP',        # Geographic
        'BPL',  # Citizenship/nativity
        'AGE'              # Demographics 
        ]
SampleList = ['us' + str(year) + 'a' for year in range(1920,1970,10)]
for samp in SampleList:
    print('Creating and Downloading Extract: ' + samp)
    extract = MicrodataExtract('usa', [samp], Vars)
    ipums.submit_extract(extract)
    ipums.wait_for_extract(extract)
    ipums.download_extract(extract,download_dir=AcsPreDir)

Creating and Downloading Extract: us1920a
Creating and Downloading Extract: us1930a
Creating and Downloading Extract: us1940a
Creating and Downloading Extract: us1950a
Creating and Downloading Extract: us1960a
