# What This Notebook Does
This notebook creates the state level analysis file for the period 1994 to 2023 by combining data from ACS and CPS IPUMS extracts for those years. State level GDP over the corresponding period come from the BEA's [United States Regional Economic Analysis Project](https://united-states.reaproject.org/), which I downloaded manually from their site.

In [36]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
from ipumspy import readers, ddi
from Credentials import MyCredentials
from Functions import *
from pathlib import Path

def ImmigrantGroup(col):
    '''
    This function assigns observations to Immigrant Groups following the
    classification in Peri (2012), for CPS and ACS data.
    '''
    if col in ['20000']:
        return 'Mexico'
    elif col[0] == '0':
        return 'United States'
    elif col in ['11000'] or (col[0:2] in ['21','25','26','30'] and col != '26030'):
        return 'Latin America'
    elif col[0:2] in ['41','42','43'] or col in ['45300','45000']:
        return 'Western Europe'
    elif col[0:2] in ['45','46'] and col not in ['45300','45000']:
        return 'Russia and Eastern Europe'
    elif col in ['15000','70020','70010']:
        return 'Canada-Australia-New Zealand'
    elif col in ['50000']:
        return 'China'
    elif col in ['52100']:
        return 'India'
    elif col[0:2] in ['50','51','52','55','53','54'] and col not in ['52100','50000']:
        return 'Rest of Asia'
    elif col[0:2] == '60':
        return 'Africa'
    else:
        return 'Other'


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# ACS

In [37]:
# Initialize
StateDfAcs = pd.DataFrame()

# Create a path object to the data
DataDir = Path(Paths['acs'])

# Construct file list
Files = list(DataDir.glob("*.xml"))

for f in Files:
    
    # Get the Data dictionary
    ddi = readers.read_ipums_ddi(f)
    df = readers.read_microdata(ddi, DataDir / ddi.file_description.filename)
    print('\n**********************************************************************\n' +
    f'Working file {f} corresponding to sample ' + str(df['YEAR'][0]) +
    '\n**********************************************************************\n')
    # Read data in and do some cleaning
    acs_df = (df
            .rename(columns=lambda x: x.lower())
            .pipe(lambda x: x[~x['uhrswork'].isin([0])])          # Dropping all the observations with no hours, or unable to report hours
            .drop(columns=['bpl'])
            .rename(columns={'bpld':'bpl'})
            .assign(bpl = lambda x: x['bpl'].astype(str))         # Change this to string
            .assign(bpl = lambda x: x['bpl'].str.zfill(5))        # Uniform length 5
            .pipe(lambda x: x[~x['bpl'].str[0].isin(['8','9'])])  # Dropping those we can't identify a country of origin for
            .pipe(lambda x: x[x['uhrswork'] >= 35])               # Keep the full time workers
            .pipe(lambda x: x[x['age'] != 999])                   # Missing age
            .pipe(lambda x: x[x['age'] >= 16])                    # Drop if below the age of 16
            .pipe(lambda x: x[x['citizen'] != 9])                 # Drop not in universe for citizen variable
            .assign(incwage = lambda x: x['incwage'].replace([999999,999998],np.NaN))
            .assign(year = lambda x: pd.to_datetime(x['year'],format='%Y'))
            .pipe(lambda x: x[x['citizen'] != 9])                 # Drop the "not in univerese codes"
            .assign(ImmigrantGroup = lambda x: x['bpl'].apply(ImmigrantGroup)) # Assign immigrant groups following Peri 2012
            .assign(foreign = lambda x: (x['citizen'] != 0).astype(int))
            .pipe(lambda x: x.loc[:,['perwt','uhrswork','foreign','statefip','year','ImmigrantGroup','incwage']])
            )

    # Create State-ImmigrantGroup table
    acs_collapse = (acs_df
                    .groupby(['ImmigrantGroup','foreign','statefip','year'])
                    .apply(lambda x: pd.Series({
                        'HoursSupplied': np.dot(x['uhrswork'] * 52,x['perwt'])/1e+6,  # Units, millions of hours
                        'BodiesSupplied': np.dot(np.ones(len(x['perwt'])),x['perwt']),
                        'Wage': np.dot(x['incwage'],x['perwt'])/np.dot(np.ones(len(x['perwt'])),x['perwt'])}))
                    .reset_index()
                    )
    
    StateDfAcs = pd.concat([StateDfAcs,acs_collapse])


See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00282.xml corresponding to sample 2000
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00283.xml corresponding to sample 2001
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00284.xml corresponding to sample 2002
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00285.xml corresponding to sample 2003
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00286.xml corresponding to sample 2004
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00287.xml corresponding to sample 2005
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00288.xml corresponding to sample 2006
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00289.xml corresponding to sample 2007
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00290.xml corresponding to sample 2008
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00291.xml corresponding to sample 2009
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00292.xml corresponding to sample 2010
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00293.xml corresponding to sample 2011
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00294.xml corresponding to sample 2012
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00295.xml corresponding to sample 2013
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00296.xml corresponding to sample 2014
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00297.xml corresponding to sample 2015
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00298.xml corresponding to sample 2016
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00299.xml corresponding to sample 2017
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00300.xml corresponding to sample 2018
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00301.xml corresponding to sample 2019
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00302.xml corresponding to sample 2020
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00303.xml corresponding to sample 2021
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/acs/usa_00304.xml corresponding to sample 2022
**********************************************************************



  .apply(lambda x: pd.Series({


# CPS

In [38]:
# Initialize
StateDfCps = pd.DataFrame()

# Create a path object to the data folder
DataDir = Path(Paths['cps'])

# Construct a list of all files in the CPS folder
Files = list(DataDir.glob("*.xml")) 

for f in Files:

    # Get the Data dictionary
    ddi = readers.read_ipums_ddi(f)
    df = readers.read_microdata(ddi, DataDir / ddi.file_description.filename)
    print('\n**********************************************************************\n' +
    f'Working file {f} corresponding to sample ' + str(df['YEAR'][0]) +
    '\n**********************************************************************\n')

    # Read data in and do some cleaning
    cps_df = (df
            .rename(columns=lambda x: x.lower())
            .pipe(lambda x: x[~x['uhrsworkt'].isin([0,997,999])]) # Dropping all the observations with no hours, or unable to report hours
            .assign(bpl = lambda x: x['bpl'].astype(str))         # Change this to string
            .assign(bpl = lambda x: x['bpl'].str.zfill(5))        # Uniform length 5
            .pipe(lambda x: x[~x['bpl'].isin(['8','9'])])  # Dropping those we can't identify a country of origin for
            .pipe(lambda x: x[x['uhrsworkt'] >= 35])              # Keep the full time workers
            .pipe(lambda x: x[x['age'] >= 16])
            .pipe(lambda x: x[x['citizen'] != 9])                 # Drop not in universe for citizen variable
            .assign(incwage = lambda x: x['incwage'].replace([999999,999998],np.NaN))
            .assign(year = lambda x: pd.to_datetime(x['year'],format='%Y'))
            .pipe(lambda x: x[x['citizen'] != 9])                 # Drop the "not in univerese codes"
            .assign(ImmigrantGroup = lambda x: x['bpl'].apply(ImmigrantGroup)) # Assign immigrant groups following Peri 2012
            .assign(foreign = lambda x: x['citizen'].isin([3,4,5]).astype(int))
            .drop(columns=['cpsid','cpsidv','cpsidp','asecwth','asecflag', 'month','serial','pernum','age','bpl','citizen',
                            'occ','occ2010','occ1990','ind1990','educ'])
            )

    # Create State-ImmigrantGroup table
    cps_collapse = (cps_df
                    .groupby(['ImmigrantGroup','foreign','statefip','year'])
                    .apply(lambda x: pd.Series({
                        'HoursSupplied': np.dot(x['uhrsworkt'] * 52,x['asecwt'])/1e+6,  # Units, millions of hours
                        'BodiesSupplied': np.dot(np.ones(len(x['asecwt'])),x['asecwt']),
                        'Wage': np.dot(x['incwage'],x['asecwt'])/np.dot(np.ones(len(x['asecwt'])),x['asecwt'])})
                        )
                    .reset_index()
                    )
    
    StateDfCps = pd.concat([StateDfCps,cps_collapse])

See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/cps/cps_00030.xml corresponding to sample 1994
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/cps/cps_00031.xml corresponding to sample 1995
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/cps/cps_00032.xml corresponding to sample 1996
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/cps/cps_00033.xml corresponding to sample 1997
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/cps/cps_00034.xml corresponding to sample 1998
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/cps/cps_00035.xml corresponding to sample 1999
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/cps/cps_00036.xml corresponding to sample 2023
**********************************************************************



  .apply(lambda x: pd.Series({
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../../data/cps/cps_00037.xml corresponding to sample 2024
**********************************************************************



  .apply(lambda x: pd.Series({


# Merging ACS, CPS

In [39]:
StateDfCps.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied,Wage
0,Canada-Australia-New Zealand,1,1,1994-01-01,8.951427,4148.02,32500.0
1,Canada-Australia-New Zealand,1,2,1994-01-01,6.091535,2714.98,29623.315641
2,Canada-Australia-New Zealand,1,4,1994-01-01,20.506824,9182.54,14849.310878
3,Canada-Australia-New Zealand,1,6,1994-01-01,111.973079,46027.9,44158.675754
4,Canada-Australia-New Zealand,1,8,1994-01-01,33.771626,13001.83,40330.894232


In [40]:
StateDfAcs.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied,Wage
0,Africa,1,1,2000-01-01,5.76342,2605.0,29287.677543
1,Africa,1,2,2000-01-01,1.1011,467.0,31113.490364
2,Africa,1,4,2000-01-01,12.217868,5258.0,34984.459871
3,Africa,1,5,2000-01-01,2.83764,1178.0,52325.008489
4,Africa,1,6,2000-01-01,150.11438,64596.0,47216.771627


In [41]:
StateDf = pd.concat([StateDfCps,StateDfAcs])

In [42]:
StateDf.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied,Wage
0,Canada-Australia-New Zealand,1,1,1994-01-01,8.951427,4148.02,32500.0
1,Canada-Australia-New Zealand,1,2,1994-01-01,6.091535,2714.98,29623.315641
2,Canada-Australia-New Zealand,1,4,1994-01-01,20.506824,9182.54,14849.310878
3,Canada-Australia-New Zealand,1,6,1994-01-01,111.973079,46027.9,44158.675754
4,Canada-Australia-New Zealand,1,8,1994-01-01,33.771626,13001.83,40330.894232


In [43]:
StateDf.tail()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied,Wage
653,Western Europe,1,51,2022-01-01,136.488976,59691.0,103015.937076
654,Western Europe,1,53,2022-01-01,117.363636,51821.0,117866.262712
655,Western Europe,1,54,2022-01-01,12.12406,5424.0,58808.738938
656,Western Europe,1,55,2022-01-01,42.134768,18423.0,80931.114368
657,Western Europe,1,56,2022-01-01,5.25252,2342.0,48510.461144


In [44]:
StateDf['year'].max()

Timestamp('2024-01-01 00:00:00')

In [45]:
StateDf['year'].min()

Timestamp('1994-01-01 00:00:00')

In [46]:
StateDf.to_csv(Paths['data'] + '/AcsCpsStateData.csv', index=False)

# State GDP Data

In [47]:
Gdp63to96 = (pd.read_csv(Paths['gdp'] + '/SAGDP_SIC/SAGDP2S__ALL_AREAS_1963_1997.csv')
             .assign(statefip = lambda x: x['GeoFIPS'].str.replace('"','').str.replace(' ','').str[0:2])
             .pipe(lambda x: x[x['statefip'] != '00'])
             .pipe(lambda x: x[~x['GeoName'].isin([np.NaN])])
             .pipe(lambda x: x[~x['GeoName'].isin(['Far West', 'Rocky Mountain', 'Southwest',
                                                   'Southeast','Plains', 'Great Lakes','Mideast',
                                                   'New England'])])
            .pipe(lambda x: x[x['Description'] == 'All industry total'])
            .drop(columns=['GeoFIPS','Region','TableName','LineCode','IndustryClassification',
                           'Description', 'Unit'])
            .pipe(lambda x: pd.melt(x,id_vars=['GeoName','statefip']))
            .rename(columns={'variable':'year','value':'NGdp'})
            .pipe(lambda x: x[x['year'] != '1997'])
            .assign(year = lambda x: pd.to_datetime(x['year']))
            .assign(NGdp = lambda x: x['NGdp'].astype(float))
)

Gdp97to23 = (pd.read_csv(Paths['gdp'] + '/SAGDP/SAGDP2N__ALL_AREAS_1997_2023.csv')
            .assign(statefip = lambda x: x['GeoFIPS'].str.replace('"','').str.replace(' ','').str[0:2])
            .pipe(lambda x: x[x['statefip'] != '00'])
            .pipe(lambda x: x[~x['GeoName'].isin([np.NaN])])
            .pipe(lambda x: x[~x['GeoName'].isin(['Far West', 'Rocky Mountain', 'Southwest',
                                                   'Southeast','Plains', 'Great Lakes','Mideast',
                                                   'New England'])])
            .pipe(lambda x: x[x['Description'] == 'All industry total '])
            .drop(columns=['GeoFIPS','Region','TableName','LineCode','IndustryClassification',
                           'Description', 'Unit'])
            .pipe(lambda x: pd.melt(x,id_vars=['GeoName','statefip']))
            .rename(columns={'variable':'year','value':'NGdp'})
            .assign(year = lambda x: pd.to_datetime(x['year']))
            .assign(NGdp = lambda x: x['NGdp'].astype(float))
            )
GdpAllYears = pd.concat([Gdp63to96,Gdp97to23]).rename(columns={'GeoName':'StateName'})

In [48]:
GdpAllYears.dtypes

StateName            object
statefip             object
year         datetime64[ns]
NGdp                float64
dtype: object

# Merging GDP, ACS/CPS and State Capital Stock Estimates

In [49]:
AcsCpsDf = (pd.read_csv(Paths['data'] + '/AcsCpsStateData.csv', dtype={'statefip':'object'})
            .assign(statefip = lambda x: x['statefip'].str.zfill(2))
            .assign(year = lambda x: pd.to_datetime(x['year']))
            )
AcsCpsDf.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied,Wage
0,Canada-Australia-New Zealand,1,1,1994-01-01,8.951427,4148.02,32500.0
1,Canada-Australia-New Zealand,1,2,1994-01-01,6.091535,2714.98,29623.315641
2,Canada-Australia-New Zealand,1,4,1994-01-01,20.506824,9182.54,14849.310878
3,Canada-Australia-New Zealand,1,6,1994-01-01,111.973079,46027.9,44158.675754
4,Canada-Australia-New Zealand,1,8,1994-01-01,33.771626,13001.83,40330.894232


In [50]:
KSeries = (pd.read_stata(Paths['data'] + '/CapitalStockByState.dta')
           .assign(year = lambda x: x['year'].astype(str))
           .assign(year = lambda x: pd.to_datetime(x['year']))
)
KSeries.head()

Unnamed: 0,StateName,year,statefip,K
0,Alabama,1994-01-01,1,159398.3
1,Alaska,1994-01-01,2,58737.85
2,Arizona,1994-01-01,4,226487.6
3,Arkansas,1994-01-01,5,96565.74
4,California,1994-01-01,6,2157399.0


In [51]:
# The year 2024 are the only unmatched observations because Bea hasn't released those estimates yet
AcsCpsBeaMerged = pd.merge(AcsCpsDf,GdpAllYears, on =['year','statefip'], indicator=False, how='left') 
AcsCpsBeaMerged.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied,Wage,StateName,NGdp
0,Canada-Australia-New Zealand,1,1,1994-01-01,8.951427,4148.02,32500.0,Alabama,90098.5
1,Canada-Australia-New Zealand,1,2,1994-01-01,6.091535,2714.98,29623.315641,Alaska,23604.7
2,Canada-Australia-New Zealand,1,4,1994-01-01,20.506824,9182.54,14849.310878,Arizona,100374.2
3,Canada-Australia-New Zealand,1,6,1994-01-01,111.973079,46027.9,44158.675754,California,861360.0
4,Canada-Australia-New Zealand,1,8,1994-01-01,33.771626,13001.83,40330.894232,Colorado,104506.8


In [52]:
AcsCpsBeaCapital = pd.merge(AcsCpsBeaMerged,KSeries, on = ['statefip','year', 'StateName'], indicator=False, how = 'left')
AcsCpsBeaCapital.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied,Wage,StateName,NGdp,K
0,Canada-Australia-New Zealand,1,1,1994-01-01,8.951427,4148.02,32500.0,Alabama,90098.5,159398.3
1,Canada-Australia-New Zealand,1,2,1994-01-01,6.091535,2714.98,29623.315641,Alaska,23604.7,58737.85
2,Canada-Australia-New Zealand,1,4,1994-01-01,20.506824,9182.54,14849.310878,Arizona,100374.2,226487.6
3,Canada-Australia-New Zealand,1,6,1994-01-01,111.973079,46027.9,44158.675754,California,861360.0,2157399.0
4,Canada-Australia-New Zealand,1,8,1994-01-01,33.771626,13001.83,40330.894232,Colorado,104506.8,235920.6


# Add in the Price Deflators

In [53]:
PriceDf = (pd.read_csv(Paths['data'] + '/GdpPriceDeflator.csv')
           .rename(columns={'Unnamed: 0':'year','0':'P'})
           .assign(year = lambda x: pd.to_datetime(x['year']))
)
AcsCpsBeaMerged = (pd.merge(AcsCpsBeaCapital,PriceDf,how='left',on='year', indicator=True)
                   .pipe(lambda x: x[x['_merge'] != 'left_only']) # Dates in 2024 (data not avail yet)
                   .drop(columns = ['_merge'])
)
AcsCpsBeaMerged.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied,Wage,StateName,NGdp,K,PriceDeflator,InvestmentDeflator
0,Canada-Australia-New Zealand,1,1,1994-01-01,8.951427,4148.02,32500.0,Alabama,90098.5,159398.3,65.564,80.969
1,Canada-Australia-New Zealand,1,2,1994-01-01,6.091535,2714.98,29623.315641,Alaska,23604.7,58737.85,65.564,80.969
2,Canada-Australia-New Zealand,1,4,1994-01-01,20.506824,9182.54,14849.310878,Arizona,100374.2,226487.6,65.564,80.969
3,Canada-Australia-New Zealand,1,6,1994-01-01,111.973079,46027.9,44158.675754,California,861360.0,2157399.0,65.564,80.969
4,Canada-Australia-New Zealand,1,8,1994-01-01,33.771626,13001.83,40330.894232,Colorado,104506.8,235920.6,65.564,80.969


## Add in Pre-Period Shares

In [54]:
PrePeriod = (
    pd.read_stata(Paths['data'] + '/PrePeriod.dta')
    .assign(statefip = lambda x: x['statefip'].astype(str).str.zfill(2))
    )
AcsCpsBeaPreMerged = pd.merge(AcsCpsBeaMerged, PrePeriod,how='left',on = ['statefip'])

In [56]:
AcsCpsBeaPreMerged

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied,Wage,StateName,NGdp,K,...,Africa1990,AsiaOther1990,CaAuNz1990,China1990,EastEu1990,India1990,LA1990,Mexico1990,US1990,WestEu1990
0,Canada-Australia-New Zealand,1,01,1994-01-01,8.951427,4148.02,32500.000000,Alabama,90098.5,1.593983e+05,...,1765.0,15613.0,3061.0,984.0,3798.0,2061.0,6222.0,1006.0,3044410.0,17149.0
1,Canada-Australia-New Zealand,1,02,1994-01-01,6.091535,2714.98,29623.315641,Alaska,23604.7,5.873785e+04,...,192.0,10336.0,3088.0,358.0,1536.0,242.0,2455.0,1221.0,365716.0,6458.0
2,Canada-Australia-New Zealand,1,04,1994-01-01,20.506824,9182.54,14849.310878,Arizona,100374.2,2.264876e+05,...,2846.0,31154.0,18051.0,3351.0,13637.0,3386.0,14084.0,135059.0,2497952.0,42469.0
3,Canada-Australia-New Zealand,1,06,1994-01-01,111.973079,46027.90,44158.675754,California,861360.0,2.157399e+06,...,65994.0,1571776.0,178127.0,208066.0,201873.0,80497.0,747069.0,2184856.0,16691394.0,503355.0
4,Canada-Australia-New Zealand,1,08,1994-01-01,33.771626,13001.83,40330.894232,Colorado,104506.8,2.359206e+05,...,3768.0,33048.0,10245.0,3141.0,17100.0,1770.0,11952.0,30348.0,2351310.0,39222.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18474,Western Europe,1,51,2022-01-01,136.488976,59691.00,103015.937076,Virginia,666681.8,1.572744e+06,...,13116.0,116442.0,11997.0,6633.0,18156.0,11604.0,69558.0,7629.0,4499145.0,68049.0
18475,Western Europe,1,53,2022-01-01,117.363636,51821.00,117866.262712,Washington,742909.5,1.878955e+06,...,4224.0,111513.0,52845.0,8919.0,20394.0,3762.0,13074.0,39789.0,3371412.0,71079.0
18476,Western Europe,1,54,2022-01-01,12.124060,5424.00,58808.738938,West Virginia,98290.0,2.664985e+05,...,586.0,4205.0,1195.0,631.0,1700.0,1257.0,887.0,143.0,1378805.0,7523.0
18477,Western Europe,1,55,2022-01-01,42.134768,18423.00,80931.114368,Wisconsin,400621.4,9.652393e+05,...,1852.0,25224.0,7098.0,2415.0,23006.0,2588.0,11052.0,9153.0,3604290.0,37336.0


# Save

In [57]:
AcsCpsBeaPreMerged.to_stata(Paths['data'] + '/StateAnalysisFile.dta', write_index=False, convert_dates={'year':'ty'})