In [174]:
import pandas as pd

### 2013-2017 ACS 5-year PUMS

In [175]:
# copied from M:\Data\Census\corrlib
ba_puma = pd.read_csv('./PUMS Relocation Rates/Bay_puma_2010.csv')

In [176]:
# available online: https://factfinder.census.gov/faces/tableservices/jsf/pages/productview.xhtml?pid=ACS_pums_csv_2013_2017&prodType=document
# data dic: https://www2.census.gov/programs-surveys/acs/tech_docs/pums/data_dict/PUMS_Data_Dictionary_2013-2017.pdf?#
pums = pd.read_csv('./PUMS Relocation Rates/psam_h06.csv')

In [177]:
# subset PUMS data to bay area
pums_ba = pums[pums.PUMA.isin(ba_puma.PUMARC)]

In [178]:
# select relevant columns
pums_ba = pums_ba[['SERIALNO', 'DIVISION', 'PUMA', 'REGION', 'ST', 'TEN', 'ADJINC', 'HINCP', 'MV', 'WGTP']]

In [179]:
# 0) use ADJINC to adjust to 2017, use inflation rate to adjust to 2015
#Adjustment factor for income and earnings dollar amounts (6 implied decimal places)
#1061971 .2013 factor (1.007549 * 1.05401460)
#1045195 .2014 factor (1.008425 * 1.03646282)
#1035988 .2015 factor (1.001264 * 1.03468042)
#1029257 .2016 factor (1.007588 * 1.02150538)
#1011189 .2017 factor (1.011189 * 1.00000000)
pums_ba.loc[pums_ba.ADJINC==1061971, 'hh_inc'] = pums_ba.HINCP * (1061971.0/1000000.0)*.96239484
pums_ba.loc[pums_ba.ADJINC==1045195, 'hh_inc'] = pums_ba.HINCP * (1045195.0/1000000.0)*.96239484
pums_ba.loc[pums_ba.ADJINC==1035988, 'hh_inc'] = pums_ba.HINCP * (1035988.0/1000000.0)*.96239484
pums_ba.loc[pums_ba.ADJINC==1029257, 'hh_inc'] = pums_ba.HINCP * (1029257.0/1000000.0)*.96239484
pums_ba.loc[pums_ba.ADJINC==1011189, 'hh_inc'] = pums_ba.HINCP * (1011189.0/1000000.0)*.96239484

In [180]:
# 1) add household income quartile
#Household income (past 12 months, use ADJINC to adjust HINCP to constant dollars)
#bbbbbbbb .N/A (GQ/vacant)
#00000000 .No household income
#-0059999 .Loss of $59,999 or more
#-0059998..-0000001 .Loss of $1 to $59,998
#00000001 .$1 or Break even
#00000002..99999999 .Total household in
pums_ba.loc[(pums_ba.hh_inc > -999999999) & (pums_ba.hh_inc <= 30000), 'hh_inc_quartile'] = 1
pums_ba.loc[(pums_ba.hh_inc > 30000) & (pums_ba.hh_inc <= 60000), 'hh_inc_quartile'] = 2
pums_ba.loc[(pums_ba.hh_inc > 60000) & (pums_ba.hh_inc <= 100000), 'hh_inc_quartile'] = 3
pums_ba.loc[(pums_ba.hh_inc > 100000) & (pums_ba.hh_inc <= 999999999), 'hh_inc_quartile'] = 4

In [181]:
# 2) add tenure
#Tenure
#b .N/A (GQ/vacant)
#1 .Owned with mortgage or loan (include home equity loans)
#2 .Owned free and clear
#3 .Rented
#4 .Occupied without payment of rent
pums_ba.loc[(pums_ba.TEN == 1.0) | (pums_ba.TEN == 2.0), 'tenure'] = 'own'
pums_ba.loc[(pums_ba.TEN == 3.0), 'tenure'] = 'rent'

In [182]:
# 3) add boolean for whether household moved in the last 5 years -- PUMS provides last 4 years
#When moved into this house or apartment
#b .N/A (GQ/vacant)
#1 .12 months or less
#2 .13 to 23 months
#3 .2 to 4 years
#4 .5 to 9 years
#5 .10 to 19 years
#6 .20 to 29 years
#7 .30 years or more
pums_ba.loc[(pums_ba.MV == 1.0) | (pums_ba.MV == 2.0) | (pums_ba.MV == 3.0), 'moved_last_4yrs'] = 1
pums_ba.loc[(pums_ba.MV == 4.0) | (pums_ba.MV == 5.0) | (pums_ba.MV == 6.0) | (pums_ba.MV == 7.0), 'moved_last_4yrs'] = 0

In [183]:
# subset into tenure X income
own_q1 = pums_ba[(pums_ba.tenure == 'own') & (pums_ba.hh_inc_quartile == 1.0)]
own_q1_mv = own_q1[own_q1.moved_last_4yrs==1]
own_q2 = pums_ba[(pums_ba.tenure == 'own') & (pums_ba.hh_inc_quartile == 2.0)]
own_q2_mv = own_q2[own_q2.moved_last_4yrs==1]
own_q3 = pums_ba[(pums_ba.tenure == 'own') & (pums_ba.hh_inc_quartile == 3.0)]
own_q3_mv = own_q3[own_q3.moved_last_4yrs==1]
own_q4 = pums_ba[(pums_ba.tenure == 'own') & (pums_ba.hh_inc_quartile == 4.0)]
own_q4_mv = own_q4[own_q4.moved_last_4yrs==1]
rent_q1 = pums_ba[(pums_ba.tenure == 'rent') & (pums_ba.hh_inc_quartile == 1.0)]
rent_q1_mv = rent_q1[rent_q1.moved_last_4yrs==1]
rent_q2 = pums_ba[(pums_ba.tenure == 'rent') & (pums_ba.hh_inc_quartile == 2.0)]
rent_q2_mv = rent_q2[rent_q2.moved_last_4yrs==1]
rent_q3 = pums_ba[(pums_ba.tenure == 'rent') & (pums_ba.hh_inc_quartile == 3.0)]
rent_q3_mv = rent_q3[rent_q3.moved_last_4yrs==1]
rent_q4 = pums_ba[(pums_ba.tenure == 'rent') & (pums_ba.hh_inc_quartile == 4.0)]
rent_q4_mv = rent_q4[rent_q4.moved_last_4yrs==1]

In [186]:
# get proportion of movers within those groups, weighted first
# and then normalize to 5-year probabilities
own_q1_move_prop = float(own_q1_mv.WGTP.sum())/float(own_q1.WGTP.sum())
own_q1_move_prop = own_q1_move_prop*(5.0/4.0)
print(own_q1_move_prop)
own_q2_move_prop = float(own_q2_mv.WGTP.sum())/float(own_q2.WGTP.sum())
own_q2_move_prop = own_q2_move_prop*(5.0/4.0)
print(own_q2_move_prop)
own_q3_move_prop = float(own_q3_mv.WGTP.sum())/float(own_q3.WGTP.sum())
own_q3_move_prop = own_q3_move_prop*(5.0/4.0)
print(own_q3_move_prop)
own_q4_move_prop = float(own_q4_mv.WGTP.sum())/float(own_q4.WGTP.sum())
own_q4_move_prop = own_q4_move_prop*(5.0/4.0)
print(own_q4_move_prop)
rent_q1_move_prop = float(rent_q1_mv.WGTP.sum())/float(rent_q1.WGTP.sum())
rent_q1_move_prop = rent_q1_move_prop*(5.0/4.0)
print(rent_q1_move_prop)
rent_q2_move_prop = float(rent_q2_mv.WGTP.sum())/float(rent_q2.WGTP.sum())
rent_q2_move_prop = rent_q2_move_prop*(5.0/4.0)
print(rent_q2_move_prop)
rent_q3_move_prop = float(rent_q3_mv.WGTP.sum())/float(rent_q3.WGTP.sum())
rent_q3_move_prop = rent_q3_move_prop*(5.0/4.0)
print(rent_q3_move_prop)
rent_q4_move_prop = float(rent_q4_mv.WGTP.sum())/float(rent_q4.WGTP.sum())
rent_q4_move_prop = rent_q4_move_prop*(5.0/4.0)
print(rent_q4_move_prop)

0.197694819916
0.217176743899
0.244698247936
0.324910234405
0.668657041651
0.737930045983
0.806149839344
0.911928230745


### Now compare against older PUMS: 2006-2008 ACS PUMS

In [27]:
# copied from M:\Data\Census\corrlib
ba_puma_00 = pd.read_csv('./PUMS Relocation Rates/BayArea_puma5_cens2000.csv')

In [25]:
# copied from: M:\Data\Census\PUMS\PUMS 2006-08
pums_07 = pd.read_csv('./PUMS Relocation Rates/ss06_08hca.csv')

In [28]:
# subset PUMS data to bay area
pums_07_ba = pums_07[pums_07.PUMA.isin(ba_puma_00.PUMA)]

In [29]:
# select relevant columns
pums_07_ba = pums_07_ba[['SERIALNO', 'DIVISION', 'PUMA', 'REGION', 'ST', 'TEN', 'ADJINC', 'HINCP', 'MV', 'WGTP']]

In [31]:
# 0) use ADJINC to adjust to 2008
#Adjustment factor for income and earnings dollar amounts (6 implied decimal places)
#1084622 .2006 factor (1.015675 * 1.06788247)
#1055856 .2007 factor (1.016787 * 1.03842365)
#1018389 .2008 factor (1.018389 * 1.00000000)
pums_07_ba.loc[pums_07_ba.ADJINC==1084622, 'hh_inc'] = pums_07_ba.HINCP * (1084622.0/1000000.0)
pums_07_ba.loc[pums_07_ba.ADJINC==1055856, 'hh_inc'] = pums_07_ba.HINCP * (1055856.0/1000000.0)
pums_07_ba.loc[pums_07_ba.ADJINC==1018389, 'hh_inc'] = pums_07_ba.HINCP * (1018389.0/1000000.0)

In [32]:
# 1) add household income quartile
#Household income (past 12 months, use ADJINC to adjust HINCP to constant dollars)
#bbbbbbbb .N/A (GQ/vacant)
#00000000 .No household income
#-0059999 .Loss of $59,999 or more
#-0059998..-0000001 .Loss of $1 to $59,998
#00000001 .$1 or Break even
#00000002..99999999 .Total household in
pums_07_ba.loc[(pums_07_ba.hh_inc > -999999999) & (pums_07_ba.hh_inc <= 30000), 'hh_inc_quartile'] = 1
pums_07_ba.loc[(pums_07_ba.hh_inc > 30000) & (pums_07_ba.hh_inc <= 60000), 'hh_inc_quartile'] = 2
pums_07_ba.loc[(pums_07_ba.hh_inc > 60000) & (pums_07_ba.hh_inc <= 100000), 'hh_inc_quartile'] = 3
pums_07_ba.loc[(pums_07_ba.hh_inc > 100000) & (pums_07_ba.hh_inc <= 999999999), 'hh_inc_quartile'] = 4

In [33]:
# 2) add tenure
#Tenure
#b .N/A (GQ/vacant)
#1 .Owned with mortgage or loan (include home equity loans)
#2 .Owned free and clear
#3 .Rented
#4 .Occupied without payment of rent
pums_07_ba.loc[(pums_07_ba.TEN == 1.0) | (pums_07_ba.TEN == 2.0), 'tenure'] = 'own'
pums_07_ba.loc[(pums_07_ba.TEN == 3.0), 'tenure'] = 'rent'

In [34]:
# 3) add boolean for whether household moved in the last 5 years -- PUMS provides last 4 years
#When moved into this house or apartment
#b .N/A (GQ/vacant)
#1 .12 months or less
#2 .13 to 23 months
#3 .2 to 4 years
#4 .5 to 9 years
#5 .10 to 19 years
#6 .20 to 29 years
#7 .30 years or more
pums_07_ba.loc[(pums_07_ba.MV == 1.0) | (pums_07_ba.MV == 2.0) | (pums_07_ba.MV == 3.0), 'moved_last_4yrs'] = 1
pums_07_ba.loc[(pums_07_ba.MV == 4.0) | (pums_07_ba.MV == 5.0) | (pums_07_ba.MV == 6.0) | (pums_07_ba.MV == 7.0), 'moved_last_4yrs'] = 0

In [36]:
# subset into tenure X income
own_q1_07 = pums_07_ba[(pums_07_ba.tenure == 'own') & (pums_07_ba.hh_inc_quartile == 1.0)]
own_q1_mv_07 = own_q1_07[own_q1_07.moved_last_4yrs==1]
own_q2_07 = pums_07_ba[(pums_07_ba.tenure == 'own') & (pums_07_ba.hh_inc_quartile == 2.0)]
own_q2_mv_07 = own_q2_07[own_q2_07.moved_last_4yrs==1]
own_q3_07 = pums_07_ba[(pums_07_ba.tenure == 'own') & (pums_07_ba.hh_inc_quartile == 3.0)]
own_q3_mv_07 = own_q3_07[own_q3_07.moved_last_4yrs==1]
own_q4_07 = pums_07_ba[(pums_07_ba.tenure == 'own') & (pums_07_ba.hh_inc_quartile == 4.0)]
own_q4_mv_07 = own_q4_07[own_q4_07.moved_last_4yrs==1]
rent_q1_07 = pums_07_ba[(pums_07_ba.tenure == 'rent') & (pums_07_ba.hh_inc_quartile == 1.0)]
rent_q1_mv_07 = rent_q1_07[rent_q1_07.moved_last_4yrs==1]
rent_q2_07 = pums_07_ba[(pums_07_ba.tenure == 'rent') & (pums_07_ba.hh_inc_quartile == 2.0)]
rent_q2_mv_07 = rent_q2_07[rent_q2_07.moved_last_4yrs==1]
rent_q3_07 = pums_07_ba[(pums_07_ba.tenure == 'rent') & (pums_07_ba.hh_inc_quartile == 3.0)]
rent_q3_mv_07 = rent_q3_07[rent_q3_07.moved_last_4yrs==1]
rent_q4_07 = pums_07_ba[(pums_07_ba.tenure == 'rent') & (pums_07_ba.hh_inc_quartile == 4.0)]
rent_q4_mv_07 = rent_q4_07[rent_q4_07.moved_last_4yrs==1]

In [37]:
# get proportion of movers within those groups, weighted first
own_q1_move_prop_07 = float(own_q1_mv_07.WGTP.sum())/float(own_q1_07.WGTP.sum())
print(own_q1_move_prop_07)
own_q2_move_prop_07 = float(own_q2_mv_07.WGTP.sum())/float(own_q2_07.WGTP.sum())
print(own_q2_move_prop_07)
own_q3_move_prop_07 = float(own_q3_mv_07.WGTP.sum())/float(own_q3_07.WGTP.sum())
print(own_q3_move_prop_07)
own_q4_move_prop_07 = float(own_q4_mv_07.WGTP.sum())/float(own_q4_07.WGTP.sum())
print(own_q4_move_prop_07)
rent_q1_move_prop_07 = float(rent_q1_mv_07.WGTP.sum())/float(rent_q1_07.WGTP.sum())
print(rent_q1_move_prop_07)
rent_q2_move_prop_07 = float(rent_q2_mv_07.WGTP.sum())/float(rent_q2_07.WGTP.sum())
print(rent_q2_move_prop_07)
rent_q3_move_prop_07 = float(rent_q3_mv_07.WGTP.sum())/float(rent_q3_07.WGTP.sum())
print(rent_q3_move_prop_07)
rent_q4_move_prop_07 = float(rent_q4_mv_07.WGTP.sum())/float(rent_q4_07.WGTP.sum())
print(rent_q4_move_prop_07)

0.168620608289
0.228811256868
0.281970008437
0.319342557341
0.634495126447
0.701124876492
0.723014064375
0.765592017364


### Now compare against another older PUMS: 2000 PUMS

In [38]:
# copied from: M:\Data\Census\PUMS\PUMS 2000
pums_00 = pd.read_csv('./PUMS Relocation Rates/hbayarea5_2000.csv')

In [39]:
# subset PUMS data to bay area
pums_00_ba = pums_00[pums_00.puma5.isin(ba_puma_00.PUMA)]

In [40]:
# select relevant columns
pums_00_ba = pums_00_ba[['puma5', 'tenure', 'hinc', 'yrmoved', 'hweight']]
pums_00_ba.rename(columns={'tenure':'ten'}, inplace=True)

In [41]:
# 1) add household income quartile
#T Household Total Income in 1999
#V –0059999 . Loss of $59,999 or more
#R –0000001..–0059998 . Loss of $1 to $59,998
#V 00000000 . Not in universe (vacant, GQ, no income)
#V 00000001 . $1 or break even
#R 00000002..99999998 . $2 to $99,999,998
#V 99999999 . $99,999,999 or more
pums_00_ba.loc[(pums_00_ba.hinc > -999999999) & (pums_00_ba.hinc <= 30000), 'hh_inc_quartile'] = 1
pums_00_ba.loc[(pums_00_ba.hinc > 30000) & (pums_00_ba.hinc <= 60000), 'hh_inc_quartile'] = 2
pums_00_ba.loc[(pums_00_ba.hinc > 60000) & (pums_00_ba.hinc <= 100000), 'hh_inc_quartile'] = 3
pums_00_ba.loc[(pums_00_ba.hinc > 100000) & (pums_00_ba.hinc <= 999999999), 'hh_inc_quartile'] = 4

In [42]:
# 2) add tenure
#T Home Ownership
#V 0 . Not in universe (vacant or GQ)
#V 1 . Owned by you or someone in this household with a mortgage or loan
#V 2 . Owned by you or someone in this household free and clear (without a mortgage or loan)
#V 3 . Rented for cash rent
#V 4 . Occupied without payment of cash rent
pums_00_ba.loc[(pums_00_ba.ten == 1.0) | (pums_00_ba.ten == 2.0), 'tenure'] = 'own'
pums_00_ba.loc[(pums_00_ba.ten == 3.0), 'tenure'] = 'rent'

In [43]:
# 3) add boolean for whether household moved in the last 5 years -- 2000 PUMS provides last 5 years
#T Year Moved In
#V blank . Not in universe (vacant or GQ)
#V 1 . 1999 or 2000
#V 2 . 1995 to 1998
#V 3 . 1990 to 1994
#V 4 . 1980 to 1989
#V 5 . 1970 to 1979
#V 6 . 1969 or earlier
pums_00_ba.loc[(pums_00_ba.yrmoved == 1.0) | (pums_00_ba.yrmoved == 2.0), 'moved_last_5yrs'] = 1
pums_00_ba.loc[(pums_00_ba.yrmoved == 3.0) | (pums_00_ba.yrmoved == 4.0) | (pums_00_ba.yrmoved == 5.0) | (pums_00_ba.yrmoved == 6.0), 'moved_last_5yrs'] = 0

In [44]:
# subset into tenure X income
own_q1_00 = pums_00_ba[(pums_00_ba.tenure == 'own') & (pums_00_ba.hh_inc_quartile == 1.0)]
own_q1_mv_00 = own_q1_00[own_q1_00.moved_last_5yrs==1]
own_q2_00 = pums_00_ba[(pums_00_ba.tenure == 'own') & (pums_00_ba.hh_inc_quartile == 2.0)]
own_q2_mv_00 = own_q2_00[own_q2_00.moved_last_5yrs==1]
own_q3_00 = pums_00_ba[(pums_00_ba.tenure == 'own') & (pums_00_ba.hh_inc_quartile == 3.0)]
own_q3_mv_00 = own_q3_00[own_q3_00.moved_last_5yrs==1]
own_q4_00 = pums_00_ba[(pums_00_ba.tenure == 'own') & (pums_00_ba.hh_inc_quartile == 4.0)]
own_q4_mv_00 = own_q4_00[own_q4_00.moved_last_5yrs==1]
rent_q1_00 = pums_00_ba[(pums_00_ba.tenure == 'rent') & (pums_00_ba.hh_inc_quartile == 1.0)]
rent_q1_mv_00 = rent_q1_00[rent_q1_00.moved_last_5yrs==1]
rent_q2_00 = pums_00_ba[(pums_00_ba.tenure == 'rent') & (pums_00_ba.hh_inc_quartile == 2.0)]
rent_q2_mv_00 = rent_q2_00[rent_q2_00.moved_last_5yrs==1]
rent_q3_00 = pums_00_ba[(pums_00_ba.tenure == 'rent') & (pums_00_ba.hh_inc_quartile == 3.0)]
rent_q3_mv_00 = rent_q3_00[rent_q3_00.moved_last_5yrs==1]
rent_q4_00 = pums_00_ba[(pums_00_ba.tenure == 'rent') & (pums_00_ba.hh_inc_quartile == 4.0)]
rent_q4_mv_00 = rent_q4_00[rent_q4_00.moved_last_5yrs==1]

In [45]:
# get proportion of movers within those groups, weighted first
own_q1_move_prop_00 = float(own_q1_mv_00.hweight.sum())/float(own_q1_00.hweight.sum())
print(own_q1_move_prop_00)
own_q2_move_prop_00 = float(own_q2_mv_00.hweight.sum())/float(own_q2_00.hweight.sum())
print(own_q2_move_prop_00)
own_q3_move_prop_00 = float(own_q3_mv_00.hweight.sum())/float(own_q3_00.hweight.sum())
print(own_q3_move_prop_00)
own_q4_move_prop_00 = float(own_q4_mv_00.hweight.sum())/float(own_q4_00.hweight.sum())
print(own_q4_move_prop_00)
rent_q1_move_prop_00 = float(rent_q1_mv_00.hweight.sum())/float(rent_q1_00.hweight.sum())
print(rent_q1_move_prop_00)
rent_q2_move_prop_00 = float(rent_q2_mv_00.hweight.sum())/float(rent_q2_00.hweight.sum())
print(rent_q2_move_prop_00)
rent_q3_move_prop_00 = float(rent_q3_mv_00.hweight.sum())/float(rent_q3_00.hweight.sum())
print(rent_q3_move_prop_00)
rent_q4_move_prop_00 = float(rent_q4_mv_00.hweight.sum())/float(rent_q4_00.hweight.sum())
print(rent_q4_move_prop_00)

0.236740098834
0.311557629432
0.361354534382
0.378042500601
0.674114296145
0.726544850183
0.750852017101
0.785236851867


#### 1990? -- haven't been able to get a usable PUMS file format / iPUMS doesn't have the needed vars

In [48]:
# https://www.census.gov/data/tables/2017/demo/geographic-mobility/cps-2017.html
# https://www.census.gov/data/tables/2018/demo/geographic-mobility/cps-2018.html
# 2016-2017: it does seem like for "persons" low inc moves more, but for "householders" higher inc moves more

In [47]:
# https://www.theatlantic.com/business/archive/2017/10/geographic-mobility-and-housing/542439/
# Highly educated people still relocate for work, but exorbitant housing costs in the best-paying cities 
# make it difficult for anyone else to do so.

In [49]:
# https://www.npr.org/templates/story/story.php?storyId=235384213
# Staying Put: Why Income Inequality Is Up And Geographic Mobility Is Down
# Median income is now a little below what it was in the late 1990s. 
# And you combine that with rising housing prices, then it becomes difficult for people 
# to move to jobs because they can't afford to live where the new jobs are. 

In [46]:
# q1 has less ability to manage rent burden
# q1 could potentially be helped by rent control or deed-restricted units
# q4 has more means to move
# q4 could also be helped by rent control (which matters less...)