In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import binscatter
import numpy as np
import os
import pickle
import pyproj
from shapely.geometry import Point
import censusdata as cd
import geopandas as gpd
from pandas._libs.lib import is_integer
import regex as re
import statsmodels.api as sm
from sklearn.metrics import mean_absolute_error
from statsmodels.graphics.regressionplots import abline_plot, plot_fit

### Figure 1 - Rent and Price Gradients across top 30 MSAs
See STATA Code

### Figure 2 - Price and Rent Growth, NYC and SF

In [2]:
msa_city_halls = {
    "New York-Newark-Jersey City, NY-NJ-PA": Point(-73.97735818122587, 40.75360363043727),
    "Los Angeles-Long Beach-Anaheim, CA": Point(-118.24252459246136, 34.05460218452133),
    "Chicago-Naperville-Elgin, IL-IN-WI": Point(-87.63203491924644, 41.884717076254915),
    "Dallas-Fort Worth-Arlington, TX": Point(-96.80279248769227, 32.800965872868694),
    "Houston-The Woodlands-Sugar Land, TX": Point(-95.36937482498811, 29.761089990716613),
    "Washington-Arlington-Alexandria, DC-VA-MD-WV": Point(-77.03652980226985, 38.89785162759491),
    "Miami-Fort Lauderdale-Pompano Beach, FL": Point(-80.23615723041286, 25.73577257017272),
    "Philadelphia-Camden-Wilmington, PA-NJ-DE-MD": Point(-75.16351548874385, 39.953038487867026),
    "Atlanta-Sandy Springs-Alpharetta, GA": Point(-84.390397844751, 33.7488895763707),
    "Phoenix-Mesa-Chandler, AZ": Point(-112.07730967359518, 33.44900148985069),
    "Boston-Cambridge-Newton, MA-NH": Point(-71.05800373099267, 42.3605486787696),
    "San Francisco-Oakland-Berkeley, CA": Point(-122.41924170230465, 37.77946153559538),
    "Riverside-San Bernardino-Ontario, CA": Point(-117.37553547358051, 33.980732883142146),
    "Detroit-Warren-Dearborn, MI": Point(-83.04382800215781, 42.3297768018276),
    "Seattle-Tacoma-Bellevue, WA": Point(-122.32993003265794, 47.60409149522296),
    "Minneapolis-St. Paul-Bloomington, MN-WI": Point(-93.26543833090201, 44.977478004204215),
    "San Diego-Chula Vista-Carlsbad, CA": Point(-117.10765111313297, 32.675450648815755),
    "Tampa-St. Petersburg-Clearwater, FL": Point(-82.45727419909451, 27.952274038205715),
    "Denver-Aurora-Lakewood, CO": Point(-104.99079680224305, 39.73932033012998),
    "St. Louis, MO-IL": Point(-90.19954729857595, 38.62786882898026),
    "Baltimore-Columbia-Towson, MD": Point(-76.6101482594712, 39.29152536367409),
    "Charlotte-Concord-Gastonia, NC-SC": Point(-80.83797995283503, 35.22253748266689),
    "Orlando-Kissimmee-Sanford, FL": Point(-81.37953479478388, 28.538635949167848),
    "San Antonio-New Braunfels, TX": Point(-98.4951415720483, 29.42473466610585),
    "Portland-Vancouver-Hillsboro, OR-WA": Point(-122.67919403198559, 45.515883523179866),
    "Sacramento-Roseville-Folsom, CA": Point(-121.49333111762331, 38.58271161629985),
    "Pittsburgh, PA": Point(-79.99659636546517, 40.44023678221106),
    "Las Vegas-Henderson-Paradise, NV": Point(-115.14855607536857, 36.16747408209834),
    "Austin-Round Rock-Georgetown, TX": Point(-97.74713291785895, 30.265186279240133),
    "Cincinnati, OH-KY-IN": Point(-84.51909611760684, 39.10446774409015),
    "Cleveland-Elyria, OH": Point(-81.6930476140442, 41.5053277050308),
    "Kansas City, MO-KS": Point(-94.578285730115, 39.1010737646118),
    "Columbus, OH": Point(-83.0024547595931, 39.9637007958694),
    "Indianapolis-Carmel-Anderson, IN": Point(-86.1527214971812, 39.7721604823431),
    "San Jose-Sunnyvale-Santa Clara, CA": Point(-121.884315873927, 37.3379980099373),
    "Virginia Beach-Norfolk-Newport News, VA-NC": Point(-76.0558554165705, 36.7529078409642),
    "Nashville-Davidson--Murfreesboro--Franklin, TN": Point(-86.7662065918226, 36.1719947399064),
    "Providence-Warwick, RI-MA": Point(-71.412318525675, 41.8242118708439),
    "Milwaukee-Waukesha, WI": Point(-87.9095751962868, 43.0419137098374),
    "Jacksonville, FL": Point(-81.6609781752069, 30.3756071265681),
    "Memphis, TN-MS-AR": Point(-90.0525006455091, 35.1571305205873),
    "Oklahoma City, OK": Point(-97.5201902178872, 35.4691695406864),
    "Hartford-East Hartford-Middletown, CT": Point(-72.6709678916762, 41.7641327640195),
    "Louisville/Jefferson County, KY-IN": Point(-85.7605710596327, 38.2548592792082),
    "New Orleans-Metairie, LA": Point(-90.0766333463257, 29.9537839928333),
    "Richmond, VA": Point(-77.4317698778892, 37.5414041880906),
    "Buffalo-Cheektowaga, NY": Point(-78.8789065772405, 42.8867608775758),
    "Raleigh-Cary, NC": Point(-78.6428638281766, 35.7788111341718),
    "Salt Lake City, UT": Point(-111.887360818175, 40.7661353706654),
    "Rochester, NY": Point(-77.6141558833138, 43.1571105088485),
    "Birmingham-Hoover, AL": Point(-86.8100188707269, 33.529068145948),
    "Grand Rapids-Kentwood, MI": Point(-85.6710991751044, 42.9757472987403),
    "Tucson, AZ": Point(-110.97295530836, 32.2231612404127),
    "Urban Honolulu, HI": Point(-157.857410100852, 21.3095473693441),
    "Tulsa, OK": Point(-95.9901218815961, 36.1576899024592),
    "Fresno, CA": Point(-119.783643958335, 36.7399589236356),
    "Worcester, MA-CT": Point(-71.8012264542394, 42.2628940064412),
    "Bridgeport-Stamford-Norwalk, CT": Point(-73.1923457029622, 41.1806689168382),
    "Albuquerque, NM": Point(-106.65184220209, 35.0917980651784),
    "Albany-Schenectady-Troy, NY": Point(-73.7543872000024, 42.6519510059115),
    "Omaha-Council Bluffs, NE-IA": Point(-95.9373610092953, 41.2597924597602),
    "New Haven-Milford, CT": Point(-72.9249732759824, 41.3100898957712),
    "Bakersfield, CA": Point(-119.019787756511, 35.3732062493274),
    "Baton Rouge, LA": Point(-91.1890544687199, 30.4554774662898),
    "Greenville-Anderson, SC": Point(-82.3951555110306, 34.8566630853142),
    "Oxnard-Thousand Oaks-Ventura, CA": Point(-119.181614727174, 34.203202717367),
    "Allentown-Bethlehem-Easton, PA-NJ": Point(-75.4667009116915, 40.6040190197423),
    "Knoxville, TN": Point(-83.9158075433303, 35.968663057639),
    "El Paso, TX": Point(-106.484682158435, 31.7619967235255),
    "Dayton-Kettering, OH": Point(-84.1933035680526, 39.7598257315708),
    "McAllen-Edinburg-Mission, TX": Point(-98.2390646772491, 26.301782579367),
    "Columbia, SC": Point(-81.0371440198162, 34.0107137654275),
    "North Port-Sarasota-Bradenton, FL": Point(-82.2069803886827, 27.0754528684216),
    "Charleston-North Charleston, SC": Point(-80.012286644401, 32.8757024464949),
    "San Juan-Bayamón-Caguas, PR": Point(-66.116313035346, 18.4657257515783)
}

In [3]:
# Define the MSA centers using the location of their City Halls (Grand Central Station for NYC)
df = pd.read_stata(r"..\Data\Intermediate\grad.dta")

df = df.loc[
    (df['cbsa'].isin(["New York-Newark-Jersey City, NY-NJ-PA", "San Francisco-Oakland-Berkeley, CA"])) &
    (df['month'].isin(["2019-12-01", "2020-12-01"])),
    ['zip', 'month', 'ZORI_', 'ZHVI_']
]

df['ZORI_log'] = np.log(df['ZORI_'])
df['ZHVI_log'] = np.log(df['ZHVI_'])
df['log_change_zori'] = df.sort_values(by = ['zip', 'month']).groupby('zip')['ZORI_log'].diff()
df['log_change_zhvi'] = df.sort_values(by = ['zip', 'month']).groupby('zip')['ZHVI_log'].diff()

df = df \
    .sort_values(by = 'zip') \
    .reset_index(drop = True)

zillow = df \
    .loc[df['month'] == '2020-12-01',
         ['zip', 'log_change_zori', 'log_change_zhvi']]

# Load the geographic data
zips = gpd.read_file(r"..\Data\Source\USA_ZIPS.geojson") \
    .rename(columns = {'zip': 'ZIP'})

# Load the ZIP-County crosswalk file
cross = pd.read_excel(r"..\Data\Source\ZIP_COUNTY_032020.xlsx")
cross['ZIP'] = cross.ZIP.astype(str).str.zfill(5)
cross['COUNTY'] = cross.COUNTY.astype(str).str.zfill(5)
cross = cross.merge(cross.groupby('ZIP') \
                         .agg('max') \
                         .rename(columns = {"TOT_RATIO": "MAX"}) \
                         .reset_index()[['ZIP', 'MAX']],
                    how = 'left',
                    on = 'ZIP')
cross = cross[cross.MAX == cross.TOT_RATIO] \
    .drop(columns = ['RES_RATIO', 'BUS_RATIO', 'OTH_RATIO', 'TOT_RATIO', 'MAX']) \
    .rename(columns = {'COUNTY': 'FIPS'})

# Read in the CBSA definitions from the Census Bureau
df = pd.read_excel(r"..\Data\Source\CBSAs.xls",
                   skiprows = 2) \
    .rename(columns = {'CBSA Title': 'CBSA', 
                       'County/County Equivalent': 'county', 
                       'State Name': 'state',
                       'Central/Outlying County': 'peripheral',
                       'Metropolitan/Micropolitan Statistical Area': 'type'})
df['FIPS State Code'] = df['FIPS State Code'].fillna(0).astype(int).astype(str).str.zfill(2)
df['FIPS County Code'] = df['FIPS County Code'].fillna(0).astype(int).astype(str).str.zfill(3)
df['FIPS'] = df['FIPS State Code'] + df['FIPS County Code']
df['peripheral'] = (df['peripheral'] != "Central")
df['type'] = np.where(df['type']=='Metropolitan Statistical Area', 'metro', 'micro')
df = df[['CBSA', 'type', 'county', 'state', 'peripheral', 'FIPS']] \
    .dropna() \
    .merge(cross,
           how = 'left',
           on = 'FIPS') \
    .merge(zips,
           how = 'left',
           on = 'ZIP')
gdf = gpd.GeoDataFrame(df, geometry = df.geometry, crs = "epsg:4326")
gdf = gdf[~gdf.geometry.isna()]
gdf = gdf.to_crs(epsg = 3857)
gdf['center'] = gdf.geometry.centroid

gdf = gdf \
    .merge(zillow, left_on = 'ZIP', right_on = 'zip', how = 'left') \
    .dropna(subset = ['log_change_zori', 'log_change_zhvi'], how = 'all')

with open(r'..\Data\Intermediate\fig2_data.pickle', 'wb') as handle:
    pickle.dump(gdf, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Define the MSA centers using the location of their City Halls (Grand Central Station for NYC)
MSA_OI = {
    "New York-Newark-Jersey City, NY-NJ-PA": "New York City",
    "San Francisco-Oakland-Berkeley, CA": "San Francisco"
}

# Load the centers by using the City Hall location
msa_centers = pd.DataFrame(msa_city_halls.items(), columns=['CBSA', 'city_center'])
msa_centers = gpd.GeoDataFrame(msa_centers, geometry = msa_centers.city_center, crs = "epsg:4326")
msa_centers = msa_centers \
    .to_crs(epsg = 3857) \
    .drop(columns = ['city_center']) \
    .rename(columns = {"geometry": 'city_center'})
msa_centers = dict(zip(msa_centers.CBSA,msa_centers.city_center))

with open(r'..\Data\Intermediate\msa_centers.pickle', 'wb') as handle:
    pickle.dump(msa_centers, handle, protocol=pickle.HIGHEST_PROTOCOL)

### Figure 3 - Pandemic Induced Changes in Prices and Rents
No prep needed

### Figure 4 - Robustness in Bid-Rent Curve Estimation Across Price
See STATA Code

### Figure 5 - MSA level Changes in Price and Rent Gradients

In [4]:
df = pd.read_stata(r"..\Data\Intermediate\grad.dta")

df = df[['n', 'cbsa', 'pop_2019']] \
    .drop_duplicates(subset = ['n', 'cbsa']) \
    .dropna()

df3 = pd.read_stata(r"..\Data\Intermediate\coef.dta")

df3 = df3[df3['yvar'].isin(['zhvi_zoriav', 'zori_zoriav'])]
df3['pre'] = (df3['month'] == '2019-12-01')
df3['post'] = (df3['month'] == '2020-12-01')
df3 = df3[(df3['pre'] | df3['post']) & (df3['n'] <= 30)]

df3 = df3.pivot(index = 'n', columns = ['month', 'yvar'], values = 'coef_dist_log').reset_index()

df3.columns = ['n', 'pre_zhvi', 'post_zhvi', 'pre_zori', 'post_zori']

df3['delta_grad_zhvi'] = df3['post_zhvi'] - df3['pre_zhvi']
df3['delta_grad_zori'] = df3['post_zori'] - df3['pre_zori']

with open(r'..\Data\Intermediate\msa_centers.pickle', 'rb') as handle:
    msa_centers = pickle.load(handle)

msa_centers = pd.Series(msa_centers).to_frame().reset_index().rename(columns = {'index': 'cbsa', 0: 'geometry'})
msa_centers = gpd.GeoDataFrame(msa_centers, crs = "EPSG:3857")
msa_centers['cbsa'] = msa_centers['cbsa']#.str.replace(" ", "_")

cbsa_grad = df3 \
    .merge(df, on = 'n', how = 'left') \
    .merge(msa_centers, on = 'cbsa', how = 'left')

cbsa_grad['marker_size'] = cbsa_grad['pop_2019']/1000000*50

cbsa_grad = gpd.GeoDataFrame(cbsa_grad, geometry = 'geometry')

with open(r'..\Data\Intermediate\fig5_data.pickle', 'wb') as handle:
    pickle.dump(cbsa_grad, handle, protocol=pickle.HIGHEST_PROTOCOL)

### Figure 6 - Changes in Listing Prices and Market Inventory
No prep needed

### Figure 7 - Price Change against Changes in Inventory
No prep needed

### Figure 8 - Associations of Intracity Migration

### Figure 9 - Out-Migration Rates Using VenPath Data
No prep needed

### Figure 10 - Out-Migration Rates Using Infutor Data
No prep needed

### Figure 11 - Price-Rent Ratio against Distance for New York
No prep needed

### Figure 12 - Changes in Rent and Price Growth Rates
No prep needed

### Figure A1 - Bid-rent Functions for San Francisco and New York

### Figure A2 -  Changes in Bid-rent function

### Figure A3 - Changes in Rents and Prices Against Pre-Pandemic Levels

### Figure A4 - Pandemic Induced Changes in Prices and Rents without Sample Restrictions

### Figure A5 - Changes in Listing Prices and Market Inventory without Sample Restrictions

### Figure A6 - Changes in Listing Prices and Market Inventory

### Figure A7 - Price change against Changes in Inventory without Sample Restrictions

### Figure A8 - Population Changes by Distance From Center: Nighttime Definition

### Figure A9 - Associations of Intracity Migration without Sample Restrictions

### Figure A10 - Migration Against Distance, Rents, and Prices

### Figure A12 - Price-Rent Ratio against Distance for New York

### Figure A13 - Migration Rates Pre- and Post-Pandemic

### Figure A14 - Cumulative Rent Growth under Transitory Case for Top 30 MSAs, New York, San Francisco and Los Angeles

### Figure B1 - Comparing Rental Series Across Data Sets

#### Panels A & B

In [10]:
zillow = pd.read_stata(r"..\Data\Intermediate\zillow.dta")

zillow = zillow[[col for col in zillow.columns.to_list() if bool(re.match(r"ZORI_\d{6}", col))] + ['zip']]

zillow = zillow.melt(id_vars=["zip"], var_name="month", value_name="ZORI")

zillow['year'] = zillow['month'].str[-4:]

zillow = zillow.drop(columns = ['month'])

zillow = zillow.dropna(subset = ['ZORI']).groupby(['zip', 'year'])['ZORI'].agg(np.mean).reset_index()
zillow['year'] = zillow['year'].astype(int)
zillow = zillow.loc[zillow['year'] > 2017]

distance = pd.read_csv(r"..\Data\Intermediate\MSA_distance.csv")

distance = distance[['ZIP', 'distance']] \
    .rename(columns = {'ZIP': 'zip'})

distance['zip'] = distance['zip'].astype(str).str.zfill(5)

fmr = pd.read_stata(r"..\Data\Source\FMRs.dta")
fmr = fmr.drop(columns = 'index').rename(columns = {'ZIP': 'zip'})
fmr['zip'] = fmr['zip'].astype(str).str.zfill(5)

acs = pd.read_csv(r"..\Data\Source\ACS5_2019_DP04_03192021.csv", skiprows = 1)

vrs_list = [
    'id',
    'Percent!!BEDROOMS!!Total housing units!!No bedroom',
    'Percent!!BEDROOMS!!Total housing units!!1 bedroom',
    'Percent!!BEDROOMS!!Total housing units!!2 bedrooms',
    'Percent!!BEDROOMS!!Total housing units!!3 bedrooms',
    'Percent!!BEDROOMS!!Total housing units!!4 bedrooms',
    'Percent!!BEDROOMS!!Total housing units!!5 or more bedrooms'
]
acs = acs[vrs_list]

acs['fips'] = acs['id'].str.extract('0500000US(\d{5})')

acs = acs.drop(columns = 'id')
acs.columns = [f'{i}_BR' for i, col in enumerate(acs.columns.to_list()) if 'Percent' in col] + ['fips']

df = pd.read_excel(r"..\Data\Source\ZIP_COUNTY_032020.xlsx")

df['zip'] = df['ZIP'].astype(str).str.zfill(5)

df['fips'] = df['COUNTY'].astype(str).str.zfill(5)

df = df[['TOT_RATIO', 'zip', 'fips']]

wm = lambda x: np.average(x, weights = df.loc[x.index, "TOT_RATIO"])

acs = df \
    .merge(acs, on = 'fips', how = 'inner') \
    .groupby('zip').agg(wm).reset_index() \
    .drop(columns = 'TOT_RATIO')

fig_data = fmr \
    .merge(zillow, how = 'inner', on = ['zip', 'year']) \
    .merge(distance, how = 'inner', on = 'zip')
fig_data['log_distance'] = np.log(1 + fig_data['distance'])
fig_data = fig_data.drop_duplicates()

fig_data = fig_data.merge(acs, on = 'zip', how = 'inner')
fig_data['hud_fmr_comb'] = \
    fig_data['hud_fmr_0_br'] * fig_data['0_BR'] / 100 + \
    fig_data['hud_fmr_1_br'] * fig_data['1_BR'] / 100 + \
    fig_data['hud_fmr_2_br'] * fig_data['2_BR'] / 100 + \
    fig_data['hud_fmr_3_br'] * fig_data['3_BR'] / 100 + \
    fig_data['hud_fmr_4_br'] * (fig_data['4_BR'] + fig_data['5_BR']) / 100

fig_data['std_err'] = np.abs(fig_data['hud_fmr_comb'] - fig_data['ZORI'])
fig_data['pc_err'] = np.log(fig_data['hud_fmr_comb']) - np.log(fig_data['ZORI'])

fig_data.to_csv(r'..\Data\Intermediate\FMR_plotdata.csv', index = False)

  results[key] = self.aggregate(func)


#### Panels C & D

In [11]:
df = pd.read_stata(r"..\Data\Intermediate\zillow.dta")

df = df[['zip'] + 
        [col for col in df.columns if re.match(r"^ZORI_\d{6}$", col)] +
        [col for col in df.columns if re.match(r"^ZHVI_\d{6}$", col)]]
df = pd.wide_to_long(df, ["ZORI_", "ZHVI_"], i="zip", j="month").reset_index()
df['month'] = pd.to_datetime(df['month'], format = '%m%Y').dt.to_period('M')
df = df.rename(columns = {"ZORI_": "ZORI", "ZHVI_": "ZHVI"})
zillow = df.copy(deep = True)

get_fips = lambda row: ''.join([row['index'].params()[i][1] for i in range(3)])

req_vars = {
    'B02009_001E': "cnt_black", 
    'B01003_001E': "pop", 
    'B19001_016E': "150-200_income", 
    'B19001_017E': "250-*_income", 
    'B01002_001E': "med_age",
    'B25010_001E': 'ave_hh_size',
    'B25010_002E': 'ave_owner_hh_size',
    'B25010_003E': 'ave_renter_hh_size',
    'B25032_001E': 'tot_units_1',
    'B25032_002E': 'tot_owner_units',
    'B25032_013E': 'tot_renter_units',
    'B25008_001E': 'tot_pop_occup',
    'B25008_002E': "owner_pop_occup",
    'B25008_003E': "renter_pop_occup",
    'B06011_001E': "med_income",
    'B19013_001E': "med_hh_income",
    'B25031_001E': "med_gross_rent",
    'B25041_001E': "tot_units_2",
    'B25041_007E': "geq_5rooms"
}

# Download the desired variables for all tracts in the state
cdata = cd.download('acs5', 2019,
                    cd.censusgeo([('zip code tabulation area', '*')]),
                    list(req_vars.keys())) \
    .rename(columns = req_vars) \
    .reset_index()
     
cdata = cdata.replace(-666666666.0, np.nan)
        
cdata = cdata[cdata['pop'] > 0].reset_index(drop = True)
cdata['zip'] = cdata['index'].apply(lambda x: x.name).astype(str).str[-5:]

cdata['cns_median_age'] = cdata['med_age']
cdata['cns_median_income'] = cdata['med_income']
cdata['cns_median_hh_inc'] = cdata['med_hh_income']
cdata['cns_median_rent'] = cdata['med_gross_rent']
cdata['cns_pop'] = cdata['pop']
cdata['cns_leq_5_units'] = cdata['geq_5rooms']/cdata['tot_units_2']
cdata.loc[cdata['cns_leq_5_units'] == np.inf, 'cns_leq_5_units'] = np.nan
cdata['cns_black_ratio'] = cdata['cnt_black']/cdata['pop']
cdata['cns_rich_ratio'] = (cdata['150-200_income'] + cdata['250-*_income'])/cdata['pop']
cdata['cns_renter_ratio'] = cdata['tot_renter_units']/cdata['tot_units_1']

cdata = cdata[[col for col in cdata.columns if 'cns' in col] + ['zip']]

gdf = gpd.read_file(r"..\Data\Source\USA_ZIPS.geojson")

# Project to the North America Albers Equal Area Conic Projection
crs = pyproj.CRS("""PROJCS["North_America_Albers_Equal_Area_Conic",
    GEOGCS["GCS_North_American_1983",
        DATUM["North_American_Datum_1983",
            SPHEROID["GRS_1980",6378137,298.257222101]],
        PRIMEM["Greenwich",0],
        UNIT["Degree",0.017453292519943295]],
    PROJECTION["Albers_Conic_Equal_Area"],
    PARAMETER["False_Easting",0],
    PARAMETER["False_Northing",0],
    PARAMETER["longitude_of_center",-96],
    PARAMETER["Standard_Parallel_1",20],
    PARAMETER["Standard_Parallel_2",60],
    PARAMETER["latitude_of_center",40],
    UNIT["Meter",1],
    AUTHORITY["EPSG","102008"]]""")

gdf['area'] = gdf['geometry'] \
    .to_crs(crs = crs) \
    .map(lambda p: p.area / 10**6)

gdf = gdf[['zip', 'area']]

dist = pd.read_csv(r"..\Data\Intermediate\MSA_distance.csv")

dist = dist[['CBSA', 'ZIP', 'distance']] \
    .rename(columns = {'ZIP': 'zip', 'CBSA': 'cbsa'})

dist['zip'] = dist['zip'].astype(str).str.zfill(5)

top_MSAs = pd.read_stata(r"..\Data\Source\top75msas.dta")[['n', 'cbsa']]

table_data = cdata \
    .merge(zillow, on = 'zip', how = 'inner') \
    .merge(dist, on = 'zip', how = 'inner') \
    .merge(gdf, on = 'zip', how = 'inner') \
    .merge(top_MSAs, on = 'cbsa', how = 'inner')

table_data['log_distance'] = np.log(1 + table_data['distance'])

In [12]:
table_data['cns_pop_density'] = table_data['cns_pop']/table_data['area']
table_data['ZORI_avail'] = (table_data['ZORI'].notna())

table_data['log_dist_qs'] = pd.qcut(
        table_data.loc[table_data['ZHVI'].notna() ,'log_distance'], q = 100
    ) \
    .apply(lambda x: x.mid) \
    .astype(float)

table_data['log_dist_bins'] = pd.cut(
        table_data.loc[table_data['ZHVI'].notna() ,'log_distance'], bins = 100
    ) \
    .apply(lambda x: x.mid) \
    .astype(float)

table_data['month'] = pd.to_datetime(table_data['month'].astype(str), format = '%Y-%m')
table_data.to_stata(r"..\Data\Intermediate\zori_mi_analysis_data.dta")

#### Panels E & F

### Figure B2 - Correlation Between CPI Rent of Primary Residence and ZORI at MSA level
See STATA Code

### Figure B3 - Relationship between Distance and Rentership
No prep needed

### Figure C1 - Change in Urban Minus Suburban Rent Growth Relative to Pre-Pandemic for Combination of Transitory and Permanent Regime
See STATA Code

### Figure C2 - Evolution of Price-Rent Ratio when Pandemic is Transitory and Permanent along with a Combination of Two Regimes
See STATA Code