In [None]:
# required packages: geopandas & plotly
# For plotly, simply use 
# conda 
# installing geopands on windows can be tricky, see below for a solution

In [1]:
import re 
import pandas as pd 
import numpy as np
pd.options.display.max_rows = 100
pd.options.display.max_columns = 50

# plotting
import plotly.express as px
import plotly.figure_factory as ff

## Installation guide: geopandas

There are two steps:

1. download packages from: https://www.lfd.uci.edu/~gohlke/pythonlibs/

We need 1) GDAL; 2) pyproj; 3)Fiona; 4) Shapely; and 5) geopandas

Get the correct version. For example, with python 3.8.x on a x64 OS, we need GDAL‑3.1.4‑cp38‑cp38‑win_amd64.whl

2. install the packages

open cmd (anaconda), and cd to the folder with these packages
`cd/d F:\DataMining\geopandas`

install packages (you only need to type: pip install xx and use tab to autofill the file names)
`pip install GDAL-3.1.4-cp38-cp38-win_amd64.whl
pip install pyproj-3.0.0.post1-cp38-cp38-win_amd64.whl
pip install Fiona-1.8.18-cp38-cp38-win_amd64.whl
pip install Shapely-1.7.1-cp38-cp38-win_amd64.whl
pip install geopandas-0.8.1-py3-none-any.whl`

# main data for empirical application

Input: `MSAs_SA.xls`, `macro.csv` and `FRM30.csv`

Output: `MSA_list.csv`, `empirical_main.csv`

In [2]:
## read house price index from Freddie Mac
hpi_1 = pd.read_excel('./data/raw/MSAs_SA.xls',sheet_name='MSA Indices SA A-L',skiprows=5)
hpi_2 = pd.read_excel('./data/raw/MSAs_SA.xls',sheet_name='MSA Indices SA M-Z',skiprows=5)
hpi = pd.concat([hpi_1.iloc[:,:-1],hpi_2.iloc[:,1:]],axis=1)
hpi = hpi.dropna()

# store the list of MSA names
MSA_list = pd.DataFrame(hpi.columns[1:])
MSA_list.columns = ['MSA']
MSA_list.to_csv('./data/MSA_list.csv',index=False)

# transform wide to lon
hpi = hpi.melt(id_vars='Month')
hpi.columns = ['date','MSA','HPI']
hpi['HPI'] = hpi.HPI.astype('float')

In [3]:
# unrate 
unrate = pd.read_csv('./data/raw/la.data.60.Metro.txt',sep='\t')

#  select unrate
unrate['measure_code'] = unrate.iloc[:,0].apply(lambda x: x.lstrip().rstrip()[-2:])
unrate = unrate.loc[unrate.measure_code=='03']

# extract cbsa code
unrate['cbsa'] = unrate.iloc[:,0].apply(lambda x: x.lstrip().rstrip()[7:12])

# keep unadjusted series
unrate = unrate.loc[unrate.iloc[:,0].str.contains('LAU')]

# drop annual average 
unrate = unrate.loc[~unrate.iloc[:,2].str.contains('M13'),:]

# clean up
unrate['date'] = unrate.iloc[:,1].astype(str)+unrate.iloc[:,2]
unrate = unrate.iloc[:,[-1,-2,3]]
unrate.columns =['date','cbsa','UR']

# merge with names
area = pd.read_csv('./data/raw/la.area.txt',sep='\t')
area = area.loc[area.area_code.str.contains('MT'),:]
area['cbsa'] = area.area_code.apply(lambda x: x[4:9])
area['MSA'] = area.area_text.str.replace('Metropolitan Statistical Area','').str.lstrip().str.rstrip()
area['MSA'] = area.MSA.str.replace('Metropolitan NECTA','').str.lstrip().str.rstrip()
area['MSA'] = area.MSA.str.replace('Boston-Cambridge-Nashua, MA-NH','Boston-Cambridge-Newton, MA-NH')
area['MSA'] = area.MSA.str.replace('Macon-Bibb County, GA','Macon, GA')
area['MSA'] = area.MSA.str.replace('Manchester, NH','Manchester-Nashua, NH')
area['MSA'] = area.MSA.str.replace('New Haven, CT','New Haven-Milford, CT')
area['MSA'] = area.MSA.str.replace('Norwich-New London-Westerly, CT-RI','Norwich-New London, CT')
area['MSA'] = area.MSA.str.replace('Springfield, MA-CT','Springfield, MA')
area = area.iloc[:,[-1,-2]]

unrate = unrate.merge(area,on='cbsa')

hpi = hpi.merge(unrate,on=['date','MSA'],how='left')

  unrate = pd.read_csv('./data/raw/la.data.60.Metro.txt',sep='\t')


In [4]:
# read macro.csv
macro = pd.read_csv('./data/macro.csv',skiprows=[1])

In [5]:
# read FRM30.csv, convert it to monthly data
frm = pd.read_csv('./data/FRM30.csv')
frm.date = frm.date.apply(lambda x: x.split('/')[0]+'M'+x.split('/')[1].zfill(2))
frm = frm.groupby('date').mean().reset_index()

In [6]:
# merge
df = pd.merge(hpi,macro,how='outer',on='date')
df = df.merge(frm,how='left',on='date')
df['date'] = pd.to_datetime(df.date,format='%YM%m')
df = df.sort_values(['MSA','date']).reset_index(drop=True)

In [7]:
df['INFL_H'] = pd.concat([df.loc[:,['date','MSA']],np.log(df.HPI)],axis=1).groupby('MSA').HPI.diff()*100
df['GRPI'] = pd.concat([df.loc[:,['date','MSA']],np.log(df.RPI)],axis=1).groupby('MSA').RPI.diff()*100
df['GIP'] = pd.concat([df.loc[:,['date','MSA']],np.log(df.INDPRO)],axis=1).groupby('MSA').INDPRO.diff()*100
df['GHOUST'] = pd.concat([df.loc[:,['date','MSA']],np.log(df.HOUST)],axis=1).groupby('MSA').HOUST.diff()*100
df['GPERMIT'] = pd.concat([df.loc[:,['date','MSA']],np.log(df.PERMIT)],axis=1).groupby('MSA').PERMIT.diff()*100
df['GREALLN'] = pd.concat([df.loc[:,['date','MSA']],np.log(df.REALLN)],axis=1).groupby('MSA').REALLN.diff()*100
df['INFL_CPI'] = pd.concat([df.loc[:,['date','MSA']],np.log(df.CPIAUCSL)],axis=1).groupby('MSA').CPIAUCSL.diff()*100
df['INFL_PCE'] = pd.concat([df.loc[:,['date','MSA']],np.log(df.PCEPI)],axis=1).groupby('MSA').PCEPI.diff()*100
df['RINFL_H'] = df['INFL_H'] - df['INFL_PCE']

In [8]:
df.to_csv('./data/empirical_main.csv',index=False)

# MSA features

Input: `MSA_list.csv`, `EconProfile_MSA.csv`, `GDP_MSA`, `household-debt-by-msa.csv`, `HOUSING_SUPPLY.dta`

Output: `MSA_features.csv`, which can be easily merged (using the column `MSA_old` with the grouping outcomes given by the GLP

## Economic Profiles & Debt-to-Income Ratios

In [8]:
MSA_rule = {'Albany-Lebanon, OR' : 'Albany, OR',
            'Anniston-Oxford-Jacksonville, AL' : 'Anniston-Oxford, AL',
            'Atlanta-Sandy Springs-Roswell, GA' : 'Atlanta-Sandy Springs-Alpharetta, GA',
            'Austin-Round Rock, TX' : 'Austin-Round Rock-Georgetown, TX',
            'Bend-Redmond, OR' : 'Bend, OR',
            'Blacksburg-Christiansburg-Radford, VA' :  'Blacksburg-Christiansburg, VA',
            'Bremerton-Silverdale, WA' : 'Bremerton-Silverdale-Port Orchard, WA',
            'Buffalo-Cheektowaga-Niagara Falls, NY' : 'Buffalo-Cheektowaga, NY',
            'Dayton-Kettering, OH' :  'Dayton, OH',
            'Eugene, OR' : 'Eugene-Springfield, OR',
            'Fayetteville-Springdale-Rogers, AR-MO' : 'Fayetteville-Springdale-Rogers, AR',
            'Grand Rapids-Wyoming, MI' :  'Grand Rapids-Kentwood, MI',
            'Greenville-Anderson-Mauldin, SC' :  'Greenville-Anderson, SC',
            'Gulfport-Biloxi-Pascagoula, MS' :  'Gulfport-Biloxi, MS',
            'Eugene, OR' : 'Eugene-Springfield, OR',
            'Grand Rapids-Wyoming, MI' :  'Grand Rapids-Kentwood, MI',
            'Greenville-Anderson-Mauldin, SC' :  'Greenville-Anderson, SC',
            'Gulfport-Biloxi-Pascagoula, MS' :  'Gulfport-Biloxi, MS',
            'Hilton Head Island-Bluffton-Beaufort, SC' : 'Hilton Head Island-Bluffton, SC',
            'Hartford-West Hartford-East Hartford, CT':'Hartford-East Hartford-Middletown, CT',
            'Kingsport-Bristol-Bristol, TN-VA' : 'Kingsport-Bristol, TN-VA',
            'Mankato-North Mankato, MN' :  'Mankato, MN',
            'Macon, GA':'Macon-Bibb County, GA',
            'Miami-Fort Lauderdale-West Palm Beach, FL' : 'Miami-Fort Lauderdale-Pompano Beach, FL',
            'Milwaukee-Waukesha-West Allis, WI' : 'Milwaukee-Waukesha, WI',
            'Naples-Immokalee-Marco Island, FL' : 'Naples-Marco Island, FL',
            'Niles-Benton Harbor, MI' : 'Niles, MI',
            'Olympia-Tumwater, WA' :  'Olympia-Lacey-Tumwater, WA',
            'Prescott, AZ' :  'Prescott Valley-Prescott, AZ',
            'Phoenix-Mesa-Scottsdale, AZ':'Phoenix-Mesa-Chandler, AZ',
            'Raleigh-Cary, NC' :  'Raleigh, NC',
            'Sacramento--Roseville--Arden-Arcade, CA' :  'Sacramento-Roseville-Folsom, CA',
            'San Diego-Carlsbad, CA' :  'San Diego-Chula Vista-Carlsbad, CA',
            'San Francisco-Oakland-Hayward, CA' :  'San Francisco-Oakland-Berkeley, CA',
            'San Luis Obispo-Paso Robles-Arroyo Grande, CA' :  'San Luis Obispo-Paso Robles, CA',
            'Santa Rosa, CA' :   'Santa Rosa-Petaluma, CA',
            'Scranton--Wilkes-Barre, PA' : 'Scranton--Wilkes-Barre--Hazleton, PA',
            'Sebring, FL' :  'Sebring-Avon Park, FL',
            'Staunton-Waynesboro, VA' : 'Staunton, VA',
            'Stockton-Lodi, CA' : 'Stockton, CA',
            'Trenton, NJ' : 'Trenton-Princeton, NJ',
            'Vallejo-Fairfield, CA' :  'Vallejo, CA',
            'Visalia-Porterville, CA' : 'Visalia, CA',
            'Wausau, WI' : 'Wausau-Weston, WI'}

In [9]:
# read profile
prof = pd.read_csv('./data/raw/EconProfile_MSA.csv',skiprows=4)
prof = prof.dropna()
prof = prof.loc[prof.GeoName.str.contains(','),['GeoFips','GeoName','Description','2017']]
prof = prof.pivot(index='GeoName', columns='Description',values='2017').reset_index()
# clean MSA names
prof['GeoName'] = prof.GeoName.str.replace(r'\s\(.*\).*','',regex=True)

In [10]:
# read profile
gdp = pd.read_csv('./data/raw/GDP_MSA.csv',skiprows=4)
gdp = gdp.dropna()
gdp = gdp.loc[gdp.GeoName.str.contains(','),['GeoFips','GeoName','2017']]
gdp['GeoName'] = gdp.GeoName.str.replace(r'\s\(.*\).*','',regex=True)

In [11]:
# merge EconProfile and GDP 
prof = pd.merge(prof,gdp,how='outer',on='GeoName',indicator=True)
prof[prof._merge!='both']
prof = prof.drop(columns='_merge') # drop merge indicators
# rename columns
prof.columns = ['MSA','PINCPC','POP','PINC','EMP','cbsa','RGDP'] 
# create real gdp per capita
prof['RGDPPC'] = prof['RGDP']/prof['POP']*1000
# measure population by '000 
prof['POP'] = prof['POP']/1000                           
prof['EMP'] = prof['EMP']/1000 

In [12]:
# debt to income ratio
debt = pd.read_csv('./data/raw/household-debt-by-msa.csv')
debt = debt.groupby(['Year','Metropolitan Statistical Areas']).mean().reset_index().iloc[:,[1,3,4]]
debt.columns = ['MSA', 'D2I_Low','D2I_H']
debt['MSA'] = debt.MSA.str.replace(r'\s\(.*\).*','',regex=True)

In [13]:
MSA_list = pd.read_csv('./data/MSA_list.csv')
# change MSA names to match Sep 2018 record
MSA_list['MSA_old'] = MSA_list['MSA']
for key in MSA_rule:
    prof.loc[prof.MSA==key,'MSA'] = MSA_rule[key]
    debt.loc[debt.MSA==key,'MSA'] = MSA_rule[key]
    MSA_list.loc[MSA_list.MSA==key,'MSA'] = MSA_rule[key]

In [14]:
MSA_list = pd.merge(MSA_list,prof,how='outer',on='MSA',indicator=True)
MSA_list[MSA_list._merge!='both']

Unnamed: 0,MSA,MSA_old,PINCPC,POP,PINC,EMP,cbsa,RGDP,RGDPPC,_merge
382,"Poughkeepsie-Newburgh-Middletown, NY",,51658.0,673.253,34779113.0,348.386,39100,27492574.0,40835.427395,right_only
383,"Twin Falls, ID",,37897.0,109.037,4132141.0,64.497,46300,4493510.0,41210.873373,right_only


In [15]:
MSA_list = MSA_list[MSA_list._merge!='right_only']
MSA_list = MSA_list.drop(columns='_merge')
MSA_list = pd.merge(MSA_list,debt,how='outer',on='MSA',indicator=True)
MSA_list[MSA_list._merge!='both']

Unnamed: 0,MSA,MSA_old,PINCPC,POP,PINC,EMP,cbsa,RGDP,RGDPPC,D2I_Low,D2I_H,_merge
110,"Enid, OK","Enid, OK",42493.0,61.492,2612964.0,37.566,21420,3345503.0,54405.499902,,,left_only


In [16]:
MSA_list = MSA_list[MSA_list._merge!='right_only']
MSA_list = MSA_list.drop(columns='_merge')

## Supply Elasticity & Regulation Index

In [17]:
MSA_rule2 = { 'Allentown-Bethlehem-Easton, PA': 'Allentown-Bethlehem-Easton, PA-NJ',
              'Appleton-Oshkosh-Neenah, WI': 'Appleton, WI',
              'Atlanta, GA': 'Atlanta-Sandy Springs-Alpharetta, GA',
              'Atlantic-Cape May, NJ': 'Atlantic City-Hammonton, NJ',
              'Augusta-Aiken, GA-SC' :'Augusta-Richmond County, GA-SC',
              'Austin-San Marcos, TX': 'Austin-Round Rock-Georgetown, TX',
              'Baltimore, MD': 'Baltimore-Columbia-Towson, MD',
              'Barnstable-Yarmouth, MA': 'Barnstable Town, MA',
              'Biloxi-Gulfport-Pascagoula, MS': 'Gulfport-Biloxi, MS',
              'Birmingham, AL': 'Birmingham-Hoover, AL',
              'Bloomington-Normal, IL': 'Bloomington, IL',
              'Boston-Worcester-Lawrence-Lowell-Brocktn, MA-NH': 'Boston-Cambridge-Newton, MA-NH',
              'Boulder-Longmont, CO': 'Boulder, CO',
              'Bremerton, WA': 'Bremerton-Silverdale-Port Orchard, WA',
              'Brownsville-Harlingen-San Benito, TX': 'Brownsville-Harlingen, TX',
              'Bryan-College Station, TX': 'College Station-Bryan, TX',
              'Buffalo-Niagara Falls, NY':'Buffalo-Cheektowaga, NY',
              'Burlington, VT': 'Burlington-South Burlington, VT',
              'Charlotte-Gastonia-Rock Hill, NC-SC': 'Charlotte-Concord-Gastonia, NC-SC',
              'Chicago, IL': 'Chicago-Naperville-Elgin, IL-IN-WI',
              'Chico-Paradise, CA': 'Chico, CA',
              'Cleveland-Lorain-Elyria, OH': 'Cleveland-Elyria, OH',
              'Dallas, TX': 'Dallas-Fort Worth-Arlington, TX',
              'Fort Worth-Arlington, TX':'Dallas-Fort Worth-Arlington, TX',
              'Dayton-Springfield, OH': 'Dayton, OH',
              'Daytona Beach, FL': 'Deltona-Daytona Beach-Ormond Beach, FL',
              'Denver, CO': 'Denver-Aurora-Lakewood, CO',
              'Des Moines, IA': 'Des Moines-West Des Moines, IA',
              'Detroit, MI': 'Detroit-Warren-Dearborn, MI',
              'Duluth-Superior, MN-WI': 'Duluth, MN-WI',
              'Evansville-Henderson, IN-KY': 'Evansville, IN-KY',
              'Fargo-Moorhead, ND-MN': 'Fargo, ND-MN',
              'Flagstaff, AZ-UT': 'Flagstaff, AZ',
              'Fort Collins-Loveland, CO': 'Fort Collins, CO',
              'Grand Rapids-Muskegon-Holland, MI': 'Grand Rapids-Kentwood, MI',
              'Greensboro-Winston-Salem-High Point, NC': 'Greensboro-High Point, NC',
              'Greenville-Spartanburg-Anderson, SC': 'Greenville-Anderson, SC',
              'Hagerstown, MD': 'Hagerstown-Martinsburg, MD-WV',
              'Harrisburg-Lebanon-Carlisle, PA': 'Harrisburg-Carlisle, PA',
              'Hartford, CT': 'Hartford-East Hartford-Middletown, CT',
              'Hickory-Morganton-Lenoir, NC': 'Hickory-Lenoir-Morganton, NC',
              'Houston, TX': 'Houston-The Woodlands-Sugar Land, TX',
              'Galveston-Texas City, TX': 'Houston-The Woodlands-Sugar Land, TX',
              'Indianapolis, IN': 'Indianapolis-Carmel-Anderson, IN',
              'Johnson City-Kingsport-Bristol, TN-VA': 'Johnson City, TN',
              'Kalamazoo-Battle Creek, MI': 'Kalamazoo-Portage, MI',
              'La Crosse, WI-MN': 'La Crosse-Onalaska, WI-MN',
              'Lafayette, IN': 'Lafayette-West Lafayette, IN',
              'Las Vegas, NV-AZ': 'Las Vegas-Henderson-Paradise, NV',
              'Lexington, KY': 'Lexington-Fayette, KY',
              'Little Rock-North Little Rock, AR': 'Little Rock-North Little Rock-Conway, AR',
              'Longview-Marshall, TX': 'Longview, TX',
              'Los Angeles-Long Beach, CA': 'Los Angeles-Long Beach-Anaheim, CA',
              'Louisville, KY-IN': 'Louisville/Jefferson County, KY-IN',
              'Medford-Ashland, OR': 'Medford, OR',
              'Memphis, TN-AR-MS': 'Memphis, TN-MS-AR',
              'Minneapolis-St. Paul, MN-WI': 'Minneapolis-St. Paul-Bloomington, MN-WI',
              'Myrtle Beach, SC': 'Myrtle Beach-Conway-North Myrtle Beach, SC-NC',
              'Naples, FL': 'Naples-Marco Island, FL',
              'Nashville, TN': 'Nashville-Davidson--Murfreesboro--Franklin, TN',
              'New Haven-Bridgprt-Stamfrd-Danbry-Wtrbry, CT': 'New Haven-Milford, CT',
              'New London-Norwich, CT': 'Norwich-New London, CT',
              'New Orleans, LA': 'New Orleans-Metairie, LA',
              'Olympia, WA': 'Olympia-Lacey-Tumwater, WA',
              'Omaha, NE-IA': 'Omaha-Council Bluffs, NE-IA',
              'Orlando, FL': 'Orlando-Kissimmee-Sanford, FL',
              'Parkersburg-Marietta, WV-OH': 'Parkersburg-Vienna, WV',
              'Pensacola, FL': 'Pensacola-Ferry Pass-Brent, FL',
              'Peoria-Pekin, IL': 'Peoria, IL',
              'Philadelphia, PA-NJ': 'Philadelphia-Camden-Wilmington, PA-NJ-DE-MD',
              'Phoenix-Mesa, AZ': 'Phoenix-Mesa-Chandler, AZ',
              'Portland, ME': 'Portland-South Portland, ME',
              'Portland-Vancouver, OR-WA': 'Portland-Vancouver-Hillsboro, OR-WA',
              'Providence-Warwick-Pawtucket, RI': 'Providence-Warwick, RI-MA',
              'Raleigh-Durham-Chapel Hill, NC': 'Raleigh, NC',
              'Richmond-Petersburg, VA': 'Richmond, VA',
              'Riverside-San Bernardino, CA': 'Riverside-San Bernardino-Ontario, CA',
              'Saginaw-Bay City-Midland, MI': 'Saginaw, MI',
              'St. Joseph, MO': 'St. Joseph, MO-KS',
              'Salt Lake City-Ogden, UT': 'Salt Lake City, UT',
              'San Antonio, TX': 'San Antonio-New Braunfels, TX',
              'San Diego, CA': 'San Diego-Chula Vista-Carlsbad, CA',
              'San Francisco, CA': 'San Francisco-Oakland-Berkeley, CA',
              'Oakland, CA': 'San Francisco-Oakland-Berkeley, CA',
              'San Jose, CA': 'San Jose-Sunnyvale-Santa Clara, CA',
              'San Luis Obispo-Atascadero-Paso Robles, CA': 'San Luis Obispo-Paso Robles, CA',
              'Santa Barbara-Santa Maria-Lompoc, CA': 'Santa Maria-Santa Barbara, CA',
              'Seattle-Bellevue-Everett, WA': 'Seattle-Tacoma-Bellevue, WA',
              'Sioux City, IA-NE': 'Sioux City, IA-NE-SD',
              'South Bend, IN': 'South Bend-Mishawaka, IN-MI',
              'Spokane, WA': 'Spokane-Spokane Valley, WA',
              'Texarkana, TX-Texarkana AR': 'Texarkana, TX-AR',
              'Vallejo-Fairfield-Napa, CA': 'Vallejo, CA',
              'Vineland-Millville-Bridgeton, NJ': 'Vineland-Bridgeton, NJ',
              'Washington, DC-MD-VA-WV': 'Washington-Arlington-Alexandria, DC-VA-MD-WV',
              'York, PA':'York-Hanover, PA',
              'Youngstown-Warren, OH': 'Youngstown-Warren-Boardman, OH-PA',
              'Fort Lauderdale, FL': 'Miami-Fort Lauderdale-Pompano Beach, FL',
              'Miami, FL':'Miami-Fort Lauderdale-Pompano Beach, FL',
              'Melbourne-Titusville-Palm Bay, FL':'Palm Bay-Melbourne-Titusville, FL',
              'Fort Myers-Cape Coral, FL':'Cape Coral-Fort Myers, FL',
              'Fort Pierce-Port St. Lucie, FL':'Port St. Lucie, FL',
              'Fort Walton Beach, FL': 'Crestview-Fort Walton Beach-Destin, FL',
              'New York, NY': 'New York-Newark-Jersey City, NY-NJ-PA',
              'Newark, NJ': 'New York-Newark-Jersey City, NY-NJ-PA',
              'Jersey City, NJ':'New York-Newark-Jersey City, NY-NJ-PA',
              'Norfolk-Virginia Beach-Newport News, VA-NC':'Virginia Beach-Norfolk-Newport News, VA-NC',
              'Richland-Kennewick-Pasco, WA': 'Kennewick-Richland, WA',
              'Sarasota-Bradenton, FL': 'North Port-Sarasota-Bradenton, FL',
              'Scranton-Wilkes-Barre-Hazleton, PA': 'Scranton--Wilkes-Barre--Hazleton, PA',
              'Steubenville-Weirton, OH-WV': 'Weirton-Steubenville, WV-OH',
              'Tacoma, WA': 'Seattle-Tacoma-Bellevue, WA',
              'Ventura, CA': 'Oxnard-Thousand Oaks-Ventura, CA',
              'Visalia-Tulare-Porterville, CA':'Visalia, CA',
              'West Palm Beach-Boca Raton, FL': 'Palm Bay-Melbourne-Titusville, FL',
              'Hamilton-Middletown, OH':'Cincinnati, OH-KY-IN',
              'Kenosha, WI':'Chicago-Naperville-Elgin, IL-IN-WI',
              'Sharon, PA':'Youngstown-Warren-Boardman, OH-PA',
              'Benton Harbor, MI':'Niles, MI'
}

In [18]:
supp = pd.read_stata('./data/raw/HOUSING_SUPPLY.dta')
supp = supp.rename(columns={'msaname':'MSA'})
supp['MSA'] = supp.MSA.str.replace(r'\s+\([A-Z]+\)','',regex=True)
supp = supp[['MSA','WRLURI','elasticity']]
for key in MSA_rule:
    supp.loc[supp.MSA==key,'MSA'] = MSA_rule[key]
for key in MSA_rule2:
    supp.loc[supp.MSA==key,'MSA'] = MSA_rule2[key]

In [19]:
supp = supp.groupby('MSA').mean().reset_index()

In [20]:
MSA_list = pd.merge(MSA_list,supp,how='outer',on='MSA',indicator=True)
MSA_list[MSA_list._merge=='right_only']

Unnamed: 0,MSA,MSA_old,PINCPC,POP,PINC,EMP,cbsa,RGDP,RGDPPC,D2I_Low,D2I_H,WRLURI,elasticity,_merge
382,"Gary, IN",,,,,,,,,,,-0.69447,1.736834,right_only
383,"Jamestown, NY",,,,,,,,,,,-0.648981,2.868494,right_only
384,"Newburgh, NY-PA",,,,,,,,,,,-0.046225,1.787929,right_only
385,"Wilmington-Newark, DE-MD",,,,,,,,,,,0.467964,1.991168,right_only


In [21]:
MSA_list = MSA_list[MSA_list._merge!='right_only']
MSA_list = MSA_list.drop(columns='_merge')

## Save MSA_features

In [22]:
MSA_list.to_csv('./data/MSA_features.csv',index=False)

# Combine MSA features with group estimates

## Baseline: FE with lagged dependent variables

Read `MSA_features.csv` from Section 2, and combine it with group estimates `EMP_FE_Ylag_Gr_re.csv`.

In [2]:
# read MSA features
df1 = pd.read_csv('./data/MSA_features.csv')

# read group estimates
df2 = pd.read_csv('./output/EMP/EMP_FE_Ylag_Gr_re.csv',header=None)
# clean up
df2 = df2.iloc[:,[1,2,3]]
df2.columns = ['FE_Y_G2','FE_Y_G3','FE_Y_G4']

# merge the two
Gr = pd.concat([df1,df2],axis=1) 

In [3]:
# mean
base = Gr[['FE_Y_G2','RGDPPC','PINCPC','POP','EMP','WRLURI','elasticity','D2I_Low','D2I_H']].groupby('FE_Y_G2').mean().reset_index()
for g in ['FE_Y_G3','FE_Y_G4']:
    base = pd.concat([base,Gr[[g,'RGDPPC','PINCPC','POP','EMP','WRLURI','elasticity','D2I_Low','D2I_H']].groupby(g).mean().reset_index()])
    
    
# standard deviation
base1 = Gr[['FE_Y_G2','RGDPPC','PINCPC','POP','EMP','WRLURI','elasticity','D2I_Low','D2I_H']].groupby('FE_Y_G2').std().reset_index()
for g in ['FE_Y_G3','FE_Y_G4']:
    base1 = pd.concat([base1,Gr[[g,'RGDPPC','PINCPC','POP','EMP','WRLURI','elasticity','D2I_Low','D2I_H']].groupby(g).std().reset_index()])

count = Gr.groupby('FE_Y_G2')['MSA'].count().reset_index()
for g in ['FE_Y_G3','FE_Y_G4']:
    count = pd.concat([count,Gr[[g,'MSA']].groupby(g)['MSA'].count().reset_index()])

tab6 = pd.concat([base,base1,count['MSA']],axis=1)
#tab6.iloc[:,[22,0,1,12,2,13,3,14,4,15,5,16,6,17,7,18,8,19,9,10]]

In [4]:
tab6['Gr'] = [1,2,1,2,3,1,2,3,4]

In [5]:
tab6.iloc[:,[-1,-2,1,12,2,13,3,14,4,15,5,16,6,17,7,18,8,19]]

Unnamed: 0,Gr,MSA,RGDPPC,RGDPPC.1,PINCPC,PINCPC.1,POP,POP.1,EMP,EMP.1,WRLURI,WRLURI.1,elasticity,elasticity.1,D2I_Low,D2I_Low.1,D2I_H,D2I_H.1
0,1,59,52435.720082,18144.909932,48492.881356,12687.78345,1080.942898,2060.545424,668.502559,1342.756952,0.293063,0.847953,1.869773,0.922585,1.585508,0.514316,1.739722,0.484784
1,2,323,46052.013267,13190.213875,45446.321981,9399.370229,666.181207,1537.968664,413.168755,1001.106909,-0.195701,0.801485,2.720897,1.485238,1.399565,0.473981,1.560718,0.445479
0,1,24,53686.497708,22897.612094,49063.416667,14082.064797,1485.602667,2768.059624,885.454667,1781.912,0.512736,0.396054,1.529502,0.774894,1.840625,0.449401,1.954375,0.434643
1,2,200,48452.222896,14443.884514,46352.175,9531.57584,774.12389,1861.153916,490.26834,1219.815421,-0.171589,0.870971,2.685243,1.465185,1.358882,0.440207,1.529085,0.417952
2,3,158,44237.893962,11531.121199,44887.879747,9828.082465,559.955184,917.89788,339.18088,589.691925,-0.152653,0.780285,2.631473,1.44304,1.453244,0.511211,1.613583,0.481131
0,1,24,53686.497708,22897.612094,49063.416667,14082.064797,1485.602667,2768.059624,885.454667,1781.912,0.512736,0.396054,1.529502,0.774894,1.840625,0.449401,1.954375,0.434643
1,2,87,49303.418564,12894.464499,47058.471264,9574.577846,842.368218,2223.666621,535.018874,1461.835623,0.134841,0.98487,2.131202,0.869963,1.412816,0.465239,1.579286,0.445824
2,3,168,46886.79083,14375.036469,46086.214286,10455.754375,642.016423,1310.595573,403.76069,857.378685,-0.335621,0.694772,2.986564,1.615613,1.363877,0.441637,1.525046,0.414137
3,4,103,43821.879133,11592.460643,43943.194175,8158.464212,603.426243,1040.404187,361.803971,661.488351,-0.154302,0.814787,2.616223,1.465959,1.449976,0.530465,1.622066,0.498503


### Ad hoc grouping

In [6]:
print('PINCPC Percentile:')
pct_all = Gr.PINCPC.describe(percentiles=[0.9])
print(pct_all)
print('\n')
print('PINCPC Percentile in Group 1:')
pct_g1 = Gr.loc[Gr['FE_Y_G3']==1,'PINCPC'].describe(percentiles=[0.1])
print(pct_g1)

PINCPC Percentile:
count       382.000000
mean      45916.863874
std       10019.399870
min       25451.000000
50%       43932.500000
90%       56249.600000
max      112769.000000
Name: PINCPC, dtype: float64


PINCPC Percentile in Group 1:
count       24.000000
mean     49063.416667
std      14082.064797
min      34323.000000
10%      37177.400000
50%      44385.000000
max      98690.000000
Name: PINCPC, dtype: float64


In [7]:
Gr['PINCPC_Top10'] = 0
Gr.loc[Gr.PINCPC>=56249.6,'PINCPC_Top10']=1

Gr['FE_Y_PoorG3_1'] = 0
Gr.loc[(Gr['FE_Y_G3']==1)&(Gr.PINCPC<=pct_g1[4]),'FE_Y_PoorG3_1'] = 1

In [8]:
Gr.to_csv('./data/MSA_features_Adhoc_FE_Ylag.csv',index=False)

### Map

__First convert MSA to County__

According to [Freddie Mac](http://www.freddiemac.com/research/indices/fmhpi-faq.page), the definition of MSAs follow the most recent ones in Office of Management and Budget (OMB).

I take the Sep. 2018 one on their [website](https://www.census.gov/geographies/reference-files/time-series/demo/metro-micro/delineation-files.html)



__Details of codes and requirements are [here](https://plot.ly/python/county-choropleth/)__

In [9]:
# load crosswalk
cross = pd.read_csv('./data/cbsa_county.csv')
cross['FIPS'] = cross.apply(lambda x: str(x['FIPS State Code']).zfill(2)+str(x['FIPS County Code']).zfill(3), axis = 1) 
cross = cross.iloc[:,[0,1,6]]
cross.columns = ['cbsa','MSA','FIPS']

# load MSA feature
Gr.cbsa = Gr.cbsa.astype(int)

# merge data
cross = pd.merge(cross,Gr.loc[:,['cbsa','FE_Y_G3','FE_Y_G4']],how='outer',on='cbsa',indicator=True)

# keep only 382 MSAs used
cross = cross[cross._merge=='both']
cross = cross.drop(columns='_merge')

In [None]:
# plot mappings for G3 G4 using FIPS
# colorscale = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
#               "#85bcdb","#6baed6","#57a0ce","#4292c6","#3082be","#2171b5","#1361a9",
#               "#08519c","#0b4083","#08306b"]

colorscale = ["#08306b","#3082be","#c6dbef"]

fips = cross['FIPS'].tolist()
values = cross['FE_Y_G3'].astype(int).tolist()

fig = ff.create_choropleth(
    fips=fips, values=values,
    colorscale=colorscale,
    show_hover=True, 
    centroid_marker={'opacity': 0},
    asp=2.9,
    state_outline={'color': 'rgb(0,0,0)', 'width': 0.5},
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
)

fig.layout.template = None
fig.show()


In [None]:
# plot mappings for G3 G4 using FIPS
# colorscale = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
#               "#85bcdb","#6baed6","#57a0ce","#4292c6","#3082be","#2171b5","#1361a9",
#               "#08519c","#0b4083","#08306b"]

colorscale = ["#08306b","#3082be","#85bcdb","#c6dbef"]

fips = cross['FIPS'].tolist()
values = cross['FE_Y_G4'].astype(int).tolist()

fig = ff.create_choropleth(
    fips=fips, values=values,
    colorscale=colorscale,
    show_hover=True, 
    centroid_marker={'opacity': 0},
    asp=2.9,
    state_outline={'color': 'rgb(0,0,0)', 'width': 0.5},
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
)

fig.layout.template = None
fig.show()


## Robustness: FE without lagged dependent variables

In [9]:
# read MSA features
df1 = pd.read_csv('./data/MSA_features.csv')

# read group estimates
df2 = pd.read_csv('./output/EMP/EMP_FE_NoYlag_Gr_re.csv',header=None)
# clean up
df2 = df2.iloc[:,[1,2,3]]
df2.columns = ['FE_NoY_G2','FE_NoY_G3','FE_NoY_G4']

# merge the two
Gr = pd.concat([df1,df2],axis=1) 

In [10]:
# mean
base = Gr[['FE_NoY_G2','RGDPPC','PINCPC','POP','EMP','WRLURI','elasticity','D2I_Low','D2I_H']].groupby('FE_NoY_G2').mean().reset_index()
for g in ['FE_NoY_G3','FE_NoY_G4']:
    base = pd.concat([base,Gr[[g,'RGDPPC','PINCPC','POP','EMP','WRLURI','elasticity','D2I_Low','D2I_H']].groupby(g).mean().reset_index()])
    
    
# standard deviation
base1 = Gr[['FE_NoY_G2','RGDPPC','PINCPC','POP','EMP','WRLURI','elasticity','D2I_Low','D2I_H']].groupby('FE_NoY_G2').std().reset_index()
for g in ['FE_NoY_G3','FE_NoY_G4']:
    base1 = pd.concat([base1,Gr[[g,'RGDPPC','PINCPC','POP','EMP','WRLURI','elasticity','D2I_Low','D2I_H']].groupby(g).std().reset_index()])

count = Gr.groupby('FE_NoY_G2')['MSA'].count().reset_index()
for g in ['FE_NoY_G3','FE_NoY_G4']:
    count = pd.concat([count,Gr[[g,'MSA']].groupby(g)['MSA'].count().reset_index()])

tab6 = pd.concat([base,base1,count['MSA']],axis=1)
#tab6.iloc[:,[22,0,1,12,2,13,3,14,4,15,5,16,6,17,7,18,8,19,9,10]]

In [11]:
tab6['Gr'] = [1,2,1,2,3,1,2,3,4]

In [12]:
tab6.iloc[:,[23,22,1,12,2,13,3,14,4,15,5,16,6,17,7,18,8,19]]

Unnamed: 0,Gr,MSA,RGDPPC,RGDPPC.1,PINCPC,PINCPC.1,POP,POP.1,EMP,EMP.1,WRLURI,WRLURI.1,elasticity,elasticity.1,D2I_Low,D2I_Low.1,D2I_H,D2I_H.1
0,1,81,52090.94082,21252.063606,48205.506173,13568.631738,932.447827,1864.314296,580.632605,1211.782082,0.229714,0.996099,2.091972,1.010341,1.523241,0.491167,1.681513,0.478479
1,2,301,45678.211175,11312.857996,45300.983389,8752.315601,675.826867,1564.423783,418.152551,1018.792152,-0.215254,0.74911,2.724737,1.515749,1.402742,0.480231,1.562405,0.44654
0,1,31,54013.539189,20465.20062,48826.967742,13319.425187,1327.491806,2479.19828,799.685323,1592.017831,0.428975,0.929194,1.747071,0.984927,1.720323,0.458419,1.826019,0.44786
1,2,181,48591.710421,15244.346106,46557.629834,9693.61092,688.169818,1815.623568,433.961834,1185.789129,-0.176023,0.864611,2.75024,1.634293,1.336889,0.497566,1.502818,0.43514
2,3,170,44111.696877,10701.675003,44703.970588,9553.625471,666.124576,1158.056834,409.163659,758.730036,-0.164509,0.735345,2.584304,1.262761,1.471971,0.449127,1.637058,0.458652
0,1,12,56274.551371,10565.712124,51268.083333,8832.893755,2029.007417,3717.287565,1291.545417,2427.724559,0.465637,0.524941,1.232472,0.892922,1.89875,0.43065,2.06375,0.47022
1,2,157,49273.317269,17550.223213,47504.968153,12012.057431,774.285879,1943.255186,480.394975,1260.205959,0.022429,0.918448,2.373862,1.168969,1.443822,0.501381,1.58527,0.448979
2,3,110,46299.802202,10642.436579,45029.736364,7989.813664,648.140682,1091.991377,409.428782,742.096851,-0.252433,0.761629,2.812972,1.704544,1.397661,0.433252,1.581659,0.453095
3,4,103,43342.952429,10960.419401,43820.135922,8070.377166,599.472951,1095.282345,358.615893,687.788214,-0.236094,0.723642,2.779513,1.452085,1.382476,0.491507,1.548207,0.445405


### Ad hoc grouping

In [13]:
print('PINCPC Percentile:')
pct_all = Gr.PINCPC.describe(percentiles=[0.9])
print(pct_all)
print('\n')
print('PINCPC Percentile in Group 1:')
pct_g1 = Gr.loc[Gr['FE_NoY_G3']==1,'PINCPC'].describe(percentiles=[0.1])
print(pct_g1)

PINCPC Percentile:
count       382.000000
mean      45916.863874
std       10019.399870
min       25451.000000
50%       43932.500000
90%       56249.600000
max      112769.000000
Name: PINCPC, dtype: float64


PINCPC Percentile in Group 1:
count       31.000000
mean     48826.967742
std      13319.425187
min      30153.000000
10%      36809.000000
50%      45087.000000
max      98690.000000
Name: PINCPC, dtype: float64


In [14]:
Gr['PINCPC_Top10'] = 0
Gr.loc[Gr.PINCPC>=56249.6,'PINCPC_Top10']=1

Gr['FE_NoY_PoorG3_1'] = 0
Gr.loc[(Gr['FE_NoY_G3']==1)&(Gr.PINCPC<=pct_g1[4]),'FE_NoY_PoorG3_1'] = 1

In [15]:
Gr.to_csv('./data/MSA_features_Adhoc_FE_NoYlag.csv',index=False)

### Map

__First convert MSA to County__

According to [Freddie Mac](http://www.freddiemac.com/research/indices/fmhpi-faq.page), the definition of MSAs follow the most recent ones in Office of Management and Budget (OMB).

I take the Sep. 2018 one on their [website](https://www.census.gov/geographies/reference-files/time-series/demo/metro-micro/delineation-files.html)



__Details of codes and requirements are [here](https://plot.ly/python/county-choropleth/)__

In [17]:
# load crosswalk
cross = pd.read_csv('./data/cbsa_county.csv')
cross['FIPS'] = cross.apply(lambda x: str(x['FIPS State Code']).zfill(2)+str(x['FIPS County Code']).zfill(3), axis = 1) 
cross = cross.iloc[:,[0,1,6]]
cross.columns = ['cbsa','MSA','FIPS']

# load MSA feature
Gr.cbsa = Gr.cbsa.astype(int)

# merge data
cross = pd.merge(cross,Gr.loc[:,['cbsa','FE_NoY_G3','FE_NoY_G4']],how='outer',on='cbsa',indicator=True)

# keep only 382 MSAs used
cross = cross[cross._merge=='both']
cross = cross.drop(columns='_merge')

In [None]:
# plot mappings for G3 G4 using FIPS
# colorscale = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
#               "#85bcdb","#6baed6","#57a0ce","#4292c6","#3082be","#2171b5","#1361a9",
#               "#08519c","#0b4083","#08306b"]

colorscale = ["#08306b","#3082be","#c6dbef"]

fips = cross['FIPS'].tolist()
values = cross['FE_NoY_G3'].astype(int).tolist()

fig = ff.create_choropleth(
    fips=fips, values=values,
    colorscale=colorscale,
    show_hover=True, 
    centroid_marker={'opacity': 0},
    asp=2.9,
    state_outline={'color': 'rgb(0,0,0)', 'width': 0.5},
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
)

fig.layout.template = None
fig.show()


In [None]:
# plot mappings for G3 G4 using FIPS
# colorscale = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
#               "#85bcdb","#6baed6","#57a0ce","#4292c6","#3082be","#2171b5","#1361a9",
#               "#08519c","#0b4083","#08306b"]

colorscale = ["#08306b","#3082be","#85bcdb","#c6dbef"]

fips = cross['FIPS'].tolist()
values = cross['FE_NoY_G4'].astype(int).tolist()

fig = ff.create_choropleth(
    fips=fips, values=values,
    colorscale=colorscale,
    show_hover=True, 
    centroid_marker={'opacity': 0},
    asp=2.9,
    state_outline={'color': 'rgb(0,0,0)', 'width': 0.5},
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
)

fig.layout.template = None
fig.show()


## Horizon-by-horizon grouping

In [16]:
Gr = pd.read_csv('./output/EMP/EMP_HBH_Gr_re.csv',header=None)
col_name = []
for h in range(1,25):
    col_name.append('H='+str(h))
Gr.columns = col_name

In [17]:
MSA_list = pd.read_csv('./data/MSA_features.csv')
MSA_list = pd.concat([MSA_list,Gr],axis=1)

In [18]:
# mean
base = MSA_list[['H=1','RGDPPC','PINCPC','POP','EMP','WRLURI','elasticity','D2I_Low','D2I_H']].groupby('H=1').mean().reset_index()
for g in ['H=6','H=12','H=18','H=24']:
    base = pd.concat([base,MSA_list[[g,'RGDPPC','PINCPC','POP','EMP','WRLURI','elasticity','D2I_Low','D2I_H']].groupby(g).mean().reset_index()])
    
# standard deviation
base1 = MSA_list[['H=1','RGDPPC','PINCPC','POP','EMP','WRLURI','elasticity','D2I_Low','D2I_H']].groupby('H=1').std().reset_index()
for g in ['H=6','H=12','H=18','H=24']:
    base1 = pd.concat([base1,MSA_list[[g,'RGDPPC','PINCPC','POP','EMP','WRLURI','elasticity','D2I_Low','D2I_H']].groupby(g).std().reset_index()])

count = MSA_list.groupby('H=1')['MSA'].count().reset_index()
for g in ['H=6','H=12','H=18','H=24']:
    count = pd.concat([count,MSA_list[[g,'MSA']].groupby(g)['MSA'].count().reset_index()])

tab6 = pd.concat([base,base1,count['MSA']],axis=1)
#tab6.iloc[:,[22,0,1,12,2,13,3,14,4,15,5,16,6,17,7,18,8,19,9,10]]

In [19]:
tab6['Gr'] = [1,2,3]*5

In [20]:
tab6.iloc[:,[27,26,1,14,2,15,3,16,4,17,5,18,6,19,7,20,8,21]]

Unnamed: 0,Gr,MSA,RGDPPC,RGDPPC.1,PINCPC,PINCPC.1,POP,POP.1,EMP,EMP.1,WRLURI,WRLURI.1,elasticity,elasticity.1,D2I_Low,D2I_Low.1,D2I_H,D2I_H.1
0,1,88,48935.751096,15842.882291,47240.909091,10238.172841,1039.528466,2612.536198,644.257398,1702.742843,0.271589,0.760227,1.970821,0.813924,1.561648,0.500916,1.723598,0.487022
1,2,168,48398.590215,15055.734289,46585.380952,10848.316756,610.860714,1176.33593,380.534274,751.075102,-0.29094,0.830919,2.88136,1.684563,1.325506,0.437612,1.497912,0.413262
2,3,126,43898.400933,11167.634074,44100.777778,8404.823848,673.405206,1201.232063,414.847222,799.028955,-0.176583,0.784304,2.651478,1.343641,1.47276,0.50628,1.616737,0.46469
0,1,30,52835.67146,21678.992257,48809.533333,13914.508035,1449.0845,2590.599992,888.015667,1692.425419,0.46016,0.579185,1.618602,0.884371,1.784167,0.447986,1.915577,0.457305
1,2,244,47652.712703,13691.009316,45883.831967,9203.178069,768.264881,1742.905485,482.631881,1136.124016,-0.165787,0.829408,2.666156,1.412928,1.347305,0.437703,1.508528,0.405826
2,3,108,44038.664138,12190.289794,45187.972222,10475.951449,444.657361,742.747304,263.819537,474.184614,-0.180862,0.823911,2.689891,1.558232,1.511898,0.536952,1.685784,0.507993
0,1,42,53631.14945,18969.807961,49157.785714,12791.473007,1202.20119,2250.903731,742.842714,1469.903119,0.442819,0.838262,1.741417,0.88573,1.682619,0.493855,1.849934,0.491231
1,2,240,47948.042304,13952.90476,46126.716667,9991.960878,733.079458,1703.922223,460.900846,1111.80257,-0.215855,0.807262,2.687325,1.430214,1.349456,0.428293,1.519167,0.406832
2,3,100,42084.693402,10725.93909,44052.03,8335.399189,525.20641,1011.50819,310.79562,643.221437,-0.121787,0.779379,2.717142,1.575701,1.51015,0.556435,1.652011,0.509962
0,1,20,53006.961942,24374.100288,48564.9,14018.352493,1592.32255,2968.167271,947.63355,1912.198229,0.259883,0.631199,1.835936,1.043152,1.777375,0.48457,1.882353,0.48206


### Maps

In [21]:
# load crosswalk
cross = pd.read_csv('./data/cbsa_county.csv')
cross['FIPS'] = cross.apply(lambda x: str(x['FIPS State Code']).zfill(2)+str(x['FIPS County Code']).zfill(3), axis = 1) 
cross = cross.iloc[:,[0,1,6]]
cross.columns = ['cbsa','MSA','FIPS']

# load MSA feature
MSA_list.cbsa = MSA_list.cbsa.astype(int)

# merge data
cross = pd.merge(cross,MSA_list.loc[:,['cbsa','H=1','H=6','H=12','H=18','H=24']],how='outer',on='cbsa',indicator=True)

# keep only 382 MSAs used
cross = cross[cross._merge=='both']
cross = cross.drop(columns='_merge')

In [None]:
# plot mappings for G3 G4 using FIPS
# colorscale = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
#               "#85bcdb","#6baed6","#57a0ce","#4292c6","#3082be","#2171b5","#1361a9",
#               "#08519c","#0b4083","#08306b"]

colorscale = ["#08306b","#3082be","#c6dbef"]

fips = cross['FIPS'].tolist()
values = cross['H=1'].astype(int).tolist()

fig = ff.create_choropleth(
    fips=fips, values=values,
    colorscale=colorscale,
    show_hover=True, 
    centroid_marker={'opacity': 0},
    asp=2.9,
    state_outline={'color': 'rgb(0,0,0)', 'width': 0.5},
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
)

fig.layout.template = None
fig.show()