# What This Notebook Does
This notebook creates the state level analysis file for the period 1994 to 2023 by combining data from ACS and CPS IPUMS extracts for those years. State level GDP over the corresponding period come from the BEA's [United States Regional Economic Analysis Project](https://united-states.reaproject.org/), which I downloaded manually from their site.

In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
from ipumspy import readers, ddi
from Credentials import MyCredentials
from Functions import *
from pathlib import Path

def ImmigrantGroup(col):
    '''
    This function assigns observations to Immigrant Groups following the
    classification in Peri (2012), for CPS and ACS data.
    '''
    if col in ['20000']:
        return 'Mexico'
    elif col[0] == '0':
        return 'United States'
    elif col in ['11000'] or (col[0:2] in ['21','25','26','30'] and col != '26030'):
        return 'Latin America'
    elif col[0:2] in ['41','42','43'] or col in ['45300','45000']:
        return 'Western Europe'
    elif col[0:2] in ['45','46'] and col not in ['45300','45000']:
        return 'Russia and Eastern Europe'
    elif col in ['15000','70020','70010']:
        return 'Canada-Australia-New Zealand'
    elif col in ['50000']:
        return 'China'
    elif col in ['52100']:
        return 'India'
    elif col[0:2] in ['50','51','52','55','53','54'] and col not in ['52100','50000']:
        return 'Rest of Asia'
    elif col[0:2] == '60':
        return 'Africa'
    else:
        return 'Other'


# ACS

In [22]:
# Initialize
StateDfAcs = pd.DataFrame()

# Create a path object to the data
DataDir = Path(Paths['acs'])

# Construct file list
Files = list(DataDir.glob("*.xml"))

for f in Files:
    
    # Get the Data dictionary
    ddi = readers.read_ipums_ddi(f)
    df = readers.read_microdata(ddi, DataDir / ddi.file_description.filename)
    print('\n**********************************************************************\n' +
    f'Working file {f} corresponding to sample ' + str(df['YEAR'][0]) +
    '\n**********************************************************************\n')

    # Read data in and do some cleaning
    acs_df = (df
            .rename(columns=lambda x: x.lower())
            .pipe(lambda x: x[~x['uhrswork'].isin([0])])          # Dropping all the observations with no hours, or unable to report hours
            .drop(columns=['bpl'])
            .rename(columns={'bpld':'bpl'})
            .assign(bpl = lambda x: x['bpl'].astype(str))         # Change this to string
            .assign(bpl = lambda x: x['bpl'].str.zfill(5))        # Uniform length 5
            .pipe(lambda x: x[~x['bpl'].str[0].isin(['8','9'])])  # Dropping those we can't identify a country of origin for
            .pipe(lambda x: x[x['uhrswork'] >= 35])               # Keep the full time workers
            .pipe(lambda x: x[x['age'] != 999])                   # Missing age
            .pipe(lambda x: x[x['age'] >= 16])                    # Drop if below the age of 16
            .pipe(lambda x: x[x['citizen'] != 9])                 # Drop not in universe for citizen variable
            .assign(year = lambda x: pd.to_datetime(x['year'],format='%Y'))
            .pipe(lambda x: x[x['citizen'] != 9])                 # Drop the "not in univerese codes"
            .assign(ImmigrantGroup = lambda x: x['bpl'].apply(ImmigrantGroup)) # Assign immigrant groups following Peri 2012
            .assign(foreign = lambda x: (x['citizen'] != 0).astype(int))
            .pipe(lambda x: x.loc[:,['perwt','uhrswork','foreign','statefip','year','ImmigrantGroup']])
            )

    # Create State-ImmigrantGroup table
    acs_collapse = (acs_df
                    .groupby(['ImmigrantGroup','foreign','statefip','year'])
                    .apply(lambda x: pd.Series({
                        'HoursSupplied': np.dot(x['uhrswork'] * 52,x['perwt'])/1e+6,  # Units, millions of hours
                        'BodiesSupplied': np.dot(np.ones(len(x['perwt'])),x['perwt'])}))
                    .reset_index()
                    )
    
    StateDfAcs = pd.concat([StateDfAcs,acs_collapse])

See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00100.xml corresponding to sample 2000
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00101.xml corresponding to sample 2001
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00102.xml corresponding to sample 2002
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00103.xml corresponding to sample 2003
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00104.xml corresponding to sample 2004
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00105.xml corresponding to sample 2005
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00106.xml corresponding to sample 2006
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00107.xml corresponding to sample 2007
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00108.xml corresponding to sample 2008
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00109.xml corresponding to sample 2009
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00110.xml corresponding to sample 2010
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00111.xml corresponding to sample 2011
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00112.xml corresponding to sample 2012
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00113.xml corresponding to sample 2013
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00114.xml corresponding to sample 2014
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00115.xml corresponding to sample 2015
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00116.xml corresponding to sample 2016
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00117.xml corresponding to sample 2017
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00118.xml corresponding to sample 2018
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00119.xml corresponding to sample 2019
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00120.xml corresponding to sample 2020
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00121.xml corresponding to sample 2021
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/acs/usa_00122.xml corresponding to sample 2022
**********************************************************************



# CPS

In [16]:
# Initialize
StateDfCps = pd.DataFrame()

# Create a path object to the data folder
DataDir = Path(Paths['cps'])

# Construct a list of all files in the CPS folder
Files = list(DataDir.glob("*.xml")) 

for f in Files:

    # Get the Data dictionary
    ddi = readers.read_ipums_ddi(f)
    df = readers.read_microdata(ddi, DataDir / ddi.file_description.filename)
    print('\n**********************************************************************\n' +
    f'Working file {f} corresponding to sample ' + str(df['YEAR'][0]) +
    '\n**********************************************************************\n')

    # Read data in and do some cleaning
    cps_df = (df
            .rename(columns=lambda x: x.lower())
            .pipe(lambda x: x[~x['uhrsworkt'].isin([0,997,999])]) # Dropping all the observations with no hours, or unable to report hours
            .assign(bpl = lambda x: x['bpl'].astype(str))         # Change this to string
            .assign(bpl = lambda x: x['bpl'].str.zfill(5))        # Uniform length 5
            .pipe(lambda x: x[~x['bpl'].isin(['8','9'])])  # Dropping those we can't identify a country of origin for
            .pipe(lambda x: x[x['uhrsworkt'] >= 35])              # Keep the full time workers
            .pipe(lambda x: x[x['age'] >= 16])
            .pipe(lambda x: x[x['citizen'] != 9])                 # Drop not in universe for zitizen variable
            .assign(year = lambda x: pd.to_datetime(x['year'],format='%Y'))
            .pipe(lambda x: x[x['citizen'] != 9])                 # Drop the "not in univerese codes"
            .assign(ImmigrantGroup = lambda x: x['bpl'].apply(ImmigrantGroup)) # Assign immigrant groups following Peri 2012
            .assign(foreign = lambda x: x['citizen'].isin([3,4,5]).astype(int))
            .drop(columns=['cpsid','cpsidv','cpsidp','asecwth','asecflag', 'month','serial','pernum','age','bpl','citizen',
                            'occ','occ2010','occ1990','ind1990','educ'])
            )

    # Create State-ImmigrantGroup table
    cps_collapse = (cps_df
                    .groupby(['ImmigrantGroup','foreign','statefip','year'])
                    .apply(lambda x: pd.Series({
                        'HoursSupplied': np.dot(x['uhrsworkt'] * 52,x['asecwt'])/1e+6,  # Units, millions of hours
                        'BodiesSupplied': np.dot(np.ones(len(x['asecwt'])),x['asecwt'])}))
                    .reset_index()
                    )
    
    StateDfCps = pd.concat([StateDfCps,cps_collapse])

See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/cps/cps_00022.xml corresponding to sample 1994
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/cps/cps_00023.xml corresponding to sample 1995
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/cps/cps_00024.xml corresponding to sample 1996
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/cps/cps_00025.xml corresponding to sample 1997
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/cps/cps_00026.xml corresponding to sample 1998
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/cps/cps_00027.xml corresponding to sample 1999
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/cps/cps_00028.xml corresponding to sample 2023
**********************************************************************



See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.



**********************************************************************
Working file ../data/cps/cps_00029.xml corresponding to sample 2024
**********************************************************************



# Merging ACS, CPS

In [17]:
StateDfCps.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied
0,Canada-Australia-New Zealand,1,1,1994-01-01,8.951427,4148.02
1,Canada-Australia-New Zealand,1,2,1994-01-01,6.091535,2714.98
2,Canada-Australia-New Zealand,1,4,1994-01-01,20.506824,9182.54
3,Canada-Australia-New Zealand,1,6,1994-01-01,111.973079,46027.9
4,Canada-Australia-New Zealand,1,8,1994-01-01,33.771626,13001.83


In [14]:
StateDfAcs.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied
0,United States,0,1,2000-01-01,4039.549644,1765526.0
1,United States,0,2,2000-01-01,643.007872,265878.0
2,United States,0,4,2000-01-01,4042.443964,1750984.0
3,United States,0,5,2000-01-01,2453.606272,1066271.0
4,United States,0,6,2000-01-01,20963.590128,9043264.0


In [24]:
StateDf = pd.concat([StateDfCps,StateDfAcs])

In [25]:
StateDf.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied
0,Canada-Australia-New Zealand,1,1,1994-01-01,8.951427,4148.02
1,Canada-Australia-New Zealand,1,2,1994-01-01,6.091535,2714.98
2,Canada-Australia-New Zealand,1,4,1994-01-01,20.506824,9182.54
3,Canada-Australia-New Zealand,1,6,1994-01-01,111.973079,46027.9
4,Canada-Australia-New Zealand,1,8,1994-01-01,33.771626,13001.83


In [26]:
StateDf.tail()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied
653,Western Europe,1,51,2022-01-01,136.488976,59691.0
654,Western Europe,1,53,2022-01-01,117.363636,51821.0
655,Western Europe,1,54,2022-01-01,12.12406,5424.0
656,Western Europe,1,55,2022-01-01,42.134768,18423.0
657,Western Europe,1,56,2022-01-01,5.25252,2342.0


In [27]:
StateDf['year'].max()

Timestamp('2024-01-01 00:00:00')

In [28]:
StateDf['year'].min()

Timestamp('1994-01-01 00:00:00')

In [30]:
StateDf.to_csv(Paths['data'] + '/AcsCpsStateData.csv', index=False)

# State GDP Data

In [47]:
import glob
Files = glob.glob(Paths['gdp'] + "/*.csv")
GdpAllYears = pd.DataFrame()
for f in Files :
    filestring = str(f)
    startyear = int(filestring[-13:-9])
    endyear = int(filestring[-8:-4])
    state = filestring[-16:-14]
    
    GdpDf = (pd.read_csv(filestring,header=0)
        .pipe(lambda x: x[x['Unnamed: 0'] == 'All industry total']) # Keep just the all industry total
        .drop(columns=['Unnamed: 0'])
        .melt(value_vars=pd.Series([year for year in range(startyear,endyear+1)]).astype(str), id_vars=['Unnamed: 1'],
        var_name='year', value_name='NGdp')
        .drop(columns=['Unnamed: 1'])
        .assign(StateAbb = state)
    )
    GdpAllYears = pd.concat([GdpAllYears,GdpDf])

In [48]:
# Merge with fipscode
FipsCross = (pd.read_csv(Paths['data'] + '/state_abbrev_fips.csv', header = 0, 
                         dtype={'statefip':'object', 'StateAbb':'object'})
             .assign(statefip = lambda x: x['statefip'].str.zfill(2)))

GdpAllYears = (pd.merge(GdpAllYears, FipsCross, on='StateAbb', how='left', indicator=False)
              .assign(year = lambda x: pd.to_datetime(x['year'], format = '%Y'))
              .assign(NGdp = lambda x: x['NGdp'].str.replace(',','').astype('float'))
              )

In [49]:
GdpAllYears.head()

Unnamed: 0,year,NGdp,StateAbb,statefip
0,1963-01-01,7921.2,IA,19
1,1964-01-01,8349.6,IA,19
2,1965-01-01,9196.3,IA,19
3,1966-01-01,10202.2,IA,19
4,1967-01-01,10617.1,IA,19


# Merging GDP, ACS/CPS

In [50]:
AcsCpsDf = (pd.read_csv(Paths['data'] + '/AcsCpsStateData.csv', dtype={'statefip':'object'})
            .assign(statefip = lambda x: x['statefip'].str.zfill(2))
            .assign(year = lambda x: pd.to_datetime(x['year']))
            )
AcsCpsDf.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied
0,Canada-Australia-New Zealand,1,1,1994-01-01,8.951427,4148.02
1,Canada-Australia-New Zealand,1,2,1994-01-01,6.091535,2714.98
2,Canada-Australia-New Zealand,1,4,1994-01-01,20.506824,9182.54
3,Canada-Australia-New Zealand,1,6,1994-01-01,111.973079,46027.9
4,Canada-Australia-New Zealand,1,8,1994-01-01,33.771626,13001.83


In [51]:
# The year 2024 are the only unmatched observations because Bea hasn't released those estimates yet
AcsCpsBeaMerged = pd.merge(AcsCpsDf,GdpAllYears, on =['year','statefip'], indicator=False, how='left') 
AcsCpsBeaMerged.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied,NGdp,StateAbb
0,Canada-Australia-New Zealand,1,1,1994-01-01,8.951427,4148.02,90098.5,AL
1,Canada-Australia-New Zealand,1,2,1994-01-01,6.091535,2714.98,23604.7,AK
2,Canada-Australia-New Zealand,1,4,1994-01-01,20.506824,9182.54,100374.2,AZ
3,Canada-Australia-New Zealand,1,6,1994-01-01,111.973079,46027.9,861360.0,CA
4,Canada-Australia-New Zealand,1,8,1994-01-01,33.771626,13001.83,104506.8,CO


# Add in the Price Deflator

In [52]:
PriceDf = (pd.read_csv(Paths['data'] + '/GdpPriceDeflator.csv')
           .rename(columns={'Unnamed: 0':'year','0':'P'})
           .assign(year = lambda x: pd.to_datetime(x['year']))
)
AcsCpsBeaMerged = (pd.merge(AcsCpsBeaMerged,PriceDf,how='left',on='year', indicator=True)
                   .pipe(lambda x: x[x['_merge'] != 'left_only']) # Dates in 2024 (data not avail yet)
                   .drop(columns = ['_merge'])
)
AcsCpsBeaMerged.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied,NGdp,StateAbb,P
0,Canada-Australia-New Zealand,1,1,1994-01-01,8.951427,4148.02,90098.5,AL,65.564
1,Canada-Australia-New Zealand,1,2,1994-01-01,6.091535,2714.98,23604.7,AK,65.564
2,Canada-Australia-New Zealand,1,4,1994-01-01,20.506824,9182.54,100374.2,AZ,65.564
3,Canada-Australia-New Zealand,1,6,1994-01-01,111.973079,46027.9,861360.0,CA,65.564
4,Canada-Australia-New Zealand,1,8,1994-01-01,33.771626,13001.83,104506.8,CO,65.564


# Clean the Pre-Period-Data

In [53]:
# Initialize
Df = pd.DataFrame()

# Create a path object to the data
DataDir = Path(Paths['preperiod'])

# Construct file list
Files = list(DataDir.glob("*.xml"))

for f in Files:

    # Get the Data dictionary
    ddi = readers.read_ipums_ddi(f)
    df = readers.read_microdata(ddi, DataDir / ddi.file_description.filename)

    # Read data in and do some cleaning
    preperiod_df = (df
            .rename(columns=lambda x: x.lower())
            .drop(columns=['bpl'])                                # We will use the detailed variable
            .rename(columns={'bpld':'bpl'})               
            .assign(bpl = lambda x: x['bpl'].astype(str))         # Change this to string
            .assign(bpl = lambda x: x['bpl'].str.zfill(5))        # Uniform length 5
            .pipe(lambda x: x[~x['bpl'].str[0].isin(['8','9'])])  # Dropping those we can't identify a country of origin for
            .pipe(lambda x: x[x['age'] != 999])                   # Missing age
            .pipe(lambda x: x[x['age'] >= 16])                    # Drop if below the age of 16
            .assign(year = lambda x: pd.to_datetime(x['year'],format='%Y'))
            .assign(ImmigrantGroup = lambda x: x['bpl'].apply(ImmigrantGroup)) # Assign immigrant groups following Peri 2012
            .pipe(lambda x: x.loc[:,['perwt','statefip','year','ImmigrantGroup']])
            )

    # Create State-ImmigrantGroup table
    preperiod_collapse = (preperiod_df
                    .groupby(['ImmigrantGroup','statefip','year'])
                    .apply(lambda x: pd.Series({
                        'Count': np.dot(np.ones(len(x['perwt'])),x['perwt'])}))
                    .reset_index()
                    .assign(statefip = lambda x: x['statefip'].astype(str).str.zfill(2))
                    )
    
    Df = pd.concat([Df,preperiod_collapse])

See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.
See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.


In [54]:
# Prepare these for a large pivot
Df_Wide = (Df
           .assign(yearstr = lambda x: '_' + x['year'].astype(str).str[0:4])
           .assign(ImmigrantGroup = lambda x: x['ImmigrantGroup'].replace(
               {'Canada-Australia-New Zealand':'CaAuNz','United States':'US', 'Western Europe':'WestEu',
                'Latin America':'LA', 'Russia and Eastern Europe':'EastEu',
                'Rest of Asia':'AsiaOther'}))
           .assign(groupyear = lambda x: x['ImmigrantGroup'] + x['yearstr'])
           .drop(columns=['ImmigrantGroup', 'year', 'yearstr'])
           .pivot(columns=['groupyear'], index=['statefip'])
           .fillna(0)
           .pipe(lambda x: x.droplevel(0,axis=1))
           .reset_index()
           )

Df_Wide.head()


groupyear,statefip,Africa_1920,Africa_1930,Africa_1940,Africa_1950,Africa_1960,AsiaOther_1920,AsiaOther_1930,AsiaOther_1940,AsiaOther_1950,...,US_1920,US_1930,US_1940,US_1950,US_1960,WestEu_1920,WestEu_1930,WestEu_1940,WestEu_1950,WestEu_1960
0,1,99.63,0.0,0.0,0.0,0.0,398.52,1615.2,1554.0,820.0,...,1382884.76,1622670.3,1829254.0,1995881.0,2069707.0,10215.14,9691.2,9676.0,7065.0,5779.0
1,2,0.0,0.0,0.0,0.0,0.0,816.81,200.0,0.0,0.0,...,26554.32,30100.0,0.0,0.0,134994.0,3277.67,2700.0,0.0,0.0,1891.0
2,4,97.2,0.0,0.0,55.0,99.0,921.55,302.85,900.0,313.0,...,149890.63,225724.2,279896.0,459518.0,761075.0,10230.14,8984.55,5857.0,8264.0,12844.0
3,5,0.0,0.0,100.0,0.0,0.0,225.26,201.9,500.0,121.0,...,1018332.64,1167991.5,1269238.0,1262126.0,1172489.0,12325.25,6763.65,10029.0,5795.0,4287.0
4,6,314.1,1009.5,2000.0,2634.0,3788.0,61009.82,82577.1,70256.0,77795.0,...,1818134.64,3259372.65,4235989.0,6851779.0,9396205.0,357512.09,469215.6,417933.0,436017.0,453757.0


# Merge Acs/Cps with Pre Period

In [55]:
StateAnalysis = pd.merge(AcsCpsBeaMerged,Df_Wide, on=['statefip'], how='left', indicator=False)
StateAnalysis.head()

Unnamed: 0,ImmigrantGroup,foreign,statefip,year,HoursSupplied,BodiesSupplied,NGdp,StateAbb,P,Africa_1920,...,US_1920,US_1930,US_1940,US_1950,US_1960,WestEu_1920,WestEu_1930,WestEu_1940,WestEu_1950,WestEu_1960
0,Canada-Australia-New Zealand,1,1,1994-01-01,8.951427,4148.02,90098.5,AL,65.564,99.63,...,1382884.76,1622670.3,1829254.0,1995881.0,2069707.0,10215.14,9691.2,9676.0,7065.0,5779.0
1,Canada-Australia-New Zealand,1,2,1994-01-01,6.091535,2714.98,23604.7,AK,65.564,0.0,...,26554.32,30100.0,0.0,0.0,134994.0,3277.67,2700.0,0.0,0.0,1891.0
2,Canada-Australia-New Zealand,1,4,1994-01-01,20.506824,9182.54,100374.2,AZ,65.564,97.2,...,149890.63,225724.2,279896.0,459518.0,761075.0,10230.14,8984.55,5857.0,8264.0,12844.0
3,Canada-Australia-New Zealand,1,6,1994-01-01,111.973079,46027.9,861360.0,CA,65.564,314.1,...,1818134.64,3259372.65,4235989.0,6851779.0,9396205.0,357512.09,469215.6,417933.0,436017.0,453757.0
4,Canada-Australia-New Zealand,1,8,1994-01-01,33.771626,13001.83,104506.8,CO,65.564,94.78,...,521164.89,630129.9,700539.0,885157.0,1082450.0,51065.17,40077.15,31132.0,24838.0,24110.0


In [56]:
StateAnalysis.to_stata(Paths['data'] + '/StateAnalysisFile.dta', write_index=False, convert_dates={'year':'ty'})