###### Imports and Settings

In [1]:
import pandas as pd
import numpy as np
import requests
from functools import reduce
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 150)

# This notebook outlines the download and formatting process for the Zillow Home Value and Rental indices for counties and places in the GNRC operating region.  

Go to this page: https://www.zillow.com/research/data/  

+ Under "HOME VALUES", select Data Type "ZHVI All Homes (SFR, Condo/Co-op) Time Series, Smoothed, Seasonally Adjusted" and download this for Metro & US, State, County, and City. 
+ Under "RENTALS", select Data Type "ZORI (Smoothed, Seasonally Adjusted): All Homes Plus Multifamily Time Series" and download this for Metro & US, County, and City (State is not currently available).   

### Save these csvs as they come in the Data Downloads folder of Parent Data Gathering

In [2]:
hv_metrous = pd.read_csv('../../Data Downloads/Zillow_MetroUS_ZHVI.csv')
hv_state = pd.read_csv('../../Data Downloads/Zillow_State_ZHVI.csv')
hv_county = pd.read_csv('../../Data Downloads/Zillow_County_ZHVI.csv')
hv_place = pd.read_csv('../../Data Downloads/Zillow_City_ZHVI.csv')
rental_metrous = pd.read_csv('../../Data Downloads/Zillow_MetroUS_ZORI.csv')
rental_county = pd.read_csv('../../Data Downloads/Zillow_County_ZORI.csv')
rental_place = pd.read_csv('../../Data Downloads/Zillow_City_ZORI.csv')

The county codes are not FIPS codes, unsure what they are but our region contains the following:
Cheatham:2185, Davidson:2243, Dickson:1668 , Houston:1784, Humphreys:2728, Macon:623, Maury:632, Montgomery:2982, Robertson:2834, Rutherford:3016, Sumner:1407, Stewart:2044, Trousdale:2856, Williamson:3080, Wilson:1496, (KY) Allen:369 , (KY) Simpson:2028

In [3]:
#filter down the metro and us files to include only the US and the Nashville and Clarksville MSAs
metrous = [102001, 394902, 394471]
hv_metrous = hv_metrous.loc[hv_metrous['RegionID'].isin(metrous)].reset_index(drop = True)
rental_metrous = rental_metrous.loc[rental_metrous['RegionID'].isin(metrous)].reset_index(drop = True)
#filter the one state file
hv_state = hv_state.loc[hv_state['RegionID'] == 53].reset_index(drop = True)
#filter the county files, Simpson Co KY is RegionID 2028, but doesn't have associated data until recent years so not including for now
counties = [2185, 2243, 1668, 1784, 2728, 623, 632, 2982, 2834, 3016, 1407, 2044, 2856, 3080, 1496, 369]
hv_county = hv_county.loc[hv_county['RegionID'].isin(counties)].reset_index(drop = True)
rental_county = rental_county.loc[rental_county['RegionID'].isin(counties)].reset_index(drop = True)
#filter the place files
places = [41932, 30583, 10843, 30993, 49233, 45339, 11564, 32006, 46091, 25534, 42878, 39894, 19523, 6118, 26161, 54450, 7208, 27227, 29482, 41690]
hv_place = hv_place.loc[hv_place['RegionID'].isin(places)].reset_index(drop = True)
rental_place = rental_place.loc[rental_place['RegionID'].isin(places)].reset_index(drop = True)

## Fix up home value DFs

In [4]:
hv_place = hv_place.drop(columns = ['RegionID', 'SizeRank', 'RegionType', 'StateName', 'State', 'Metro', 'CountyName']).set_index(('RegionName'))
hv_county = hv_county.drop(columns = ['RegionID', 'SizeRank', 'RegionType', 'StateName', 'State', 'Metro', 'StateCodeFIPS', 'MunicipalCodeFIPS']).set_index(('RegionName'))
hv_state = hv_state.drop(columns = ['RegionID', 'SizeRank', 'RegionType', 'StateName']).set_index(('RegionName'))
hv_metrous = hv_metrous.drop(columns = ['RegionID', 'SizeRank', 'RegionType', 'StateName']).set_index(('RegionName'))

In [5]:
frame = [hv_place, hv_county, hv_state, hv_metrous]
emptyframe = pd.DataFrame()

for df in frame:
    emptyframe = emptyframe.append(df)
    
hvs = emptyframe

  emptyframe = emptyframe.append(df)
  emptyframe = emptyframe.append(df)
  emptyframe = emptyframe.append(df)
  emptyframe = emptyframe.append(df)


In [6]:
data = hvs.transpose().reset_index().rename(columns = {'index':'NAME'})

In [7]:
year = data['NAME'].str.split(pat = "-", expand = True)
data['Year'] = year[0]

In [8]:

GNRCCounties = [data['Stewart County'], data['Montgomery County'], data['Houston County'], data['Humphreys County'],
                data['Dickson County'], data['Cheatham County'], data['Robertson County'],
                data['Sumner County'], data['Davidson County'],data['Wilson County'],
                data['Trousdale County'],data['Williamson County'], data['Rutherford County']]
data['GNRC'] = sum(GNRCCounties)
GNRCCountiesAll = [data['Stewart County'], data['Montgomery County'], data['Houston County'], data['Humphreys County'],
                   data['Dickson County'], data['Cheatham County'], data['Robertson County'],
                   data['Sumner County'], data['Davidson County'],data['Wilson County'],
                   data['Trousdale County'],data['Williamson County'], data['Rutherford County'], data['Maury County']]
data['GNRC Region'] = sum(GNRCCountiesAll)
MPOCounties = [data['Robertson County'], data['Sumner County'], data['Davidson County'], data['Wilson County'], data['Williamson County'], 
              data['Rutherford County'], data['Maury County']]
data['MPO'] = sum(MPOCounties)

In [9]:
data = data.rename(columns = {'Nashville': 'Nashville-Davidson metropolitan government (balance), Tennessee', 'Murfreesboro': 'Murfreesboro city, Tennessee',
                              'Clarksville': 'Clarksville city, Tennessee', 'Franklin': 'Franklin city, Tennessee', 
                              'Hendersonville': 'Hendersonville city, Tennessee', 'Lebanon': 'Lebanon city, Tennessee', 
                              'Columbia': 'Columbia city, Tennessee', 'Smyrna': 'Smyrna town, Tennessee', 'Mount Juliet': 'Mount Juliet city, Tennessee', 
                              'Gallatin': 'Gallatin city, Tennessee', 'Brentwood': 'Brentwood city, Tennessee', 
                              'Spring Hill': 'Spring Hill city, Tennessee', 'La Vergne': 'La Vergne city, Tennessee', 
                              'Nolensville': 'Nolensville town, Tennessee', 'Ashland City': 'Ashland City town, Tennessee', 
                              'White House': 'White House city, Tennessee', 'Fairview': 'Fairview city, Tennessee', 
                              'Thompsons Station': "Thompson's Station town, Tennessee", 'Millersville': 'Millersville city, Tennessee', 
                              'Davidson County': 'Davidson County, Tennessee', 'Rutherford County': 'Rutherford County, Tennessee',
                              'Williamson County': 'Williamson County, Tennessee', 'Montgomery County': 'Montgomery County, Tennessee',
                              'Sumner County': 'Sumner County, Tennessee', 'Wilson County': 'Wilson County, Tennessee',
                              'Maury County': 'Maury County, Tennessee', 'Robertson County': 'Robertson County, Tennessee',
                              'Dickson County': 'Dickson County, Tennessee', 'Cheatham County': 'Cheatham County, Tennessee',
                              'Macon County': 'Macon County, Tennessee', 'Allen County': 'Allen County, Tennessee',
                              'Humphreys County': 'Humphreys County, Tennessee', 'Stewart County': 'Stewart County, Tennessee',
                              'Trousdale County': 'Trousdale County, Tennessee', 'Houston County': 'Houston County, Tennessee',
                              'Nashville, TN': 'Nashville MSA', 'Clarksville, TN': 'Clarksville MSA'})

In [10]:
data = data.groupby(['Year']).mean()
data = data.transpose().reset_index()
data = data.rename(columns = {'RegionName':'NAME'})
data = data.set_index('NAME')
data = data.melt(value_name = 'Home Value', ignore_index = False)
hv = data.reset_index()
hv.head()

Unnamed: 0,NAME,Year,Home Value
0,Nashville-Davidson metropolitan government (ba...,2000,143116.25
1,"Murfreesboro city, Tennessee",2000,133682.583333
2,"Clarksville city, Tennessee",2000,117695.333333
3,"Franklin city, Tennessee",2000,242765.416667
4,"Hendersonville city, Tennessee",2000,174455.166667


## Fix Up Rental DFs

In [11]:
rental_place = rental_place.drop(columns = ['RegionID', 'SizeRank', 'RegionType', 'StateName', 'State', 'Metro', 'CountyName']).set_index(('RegionName'))
rental_county= rental_county.drop(columns = ['RegionID', 'SizeRank', 'RegionType', 'StateName', 'State', 'Metro', 'StateCodeFIPS', 'MunicipalCodeFIPS']).set_index(('RegionName'))
rental_metrous = rental_metrous.drop(columns = ['RegionID', 'SizeRank', 'RegionType', 'StateName']).set_index(('RegionName'))

In [12]:
frame = [rental_place, rental_county, rental_metrous]
emptyframe = pd.DataFrame()

for df in frame:
    emptyframe = emptyframe.append(df)
    
rental = emptyframe

  emptyframe = emptyframe.append(df)
  emptyframe = emptyframe.append(df)
  emptyframe = emptyframe.append(df)


In [13]:
data = rental.transpose().reset_index().rename(columns = {'index':'NAME'})

In [14]:
year = data['NAME'].str.split(pat = "-", expand = True)
data['Year'] = year[0]

In [15]:
#No Stewart County, Houston County, Humphreys County, Dickson County , or Trousdale County data
GNRCCounties = [data['Montgomery County'], 
                data['Cheatham County'], data['Robertson County'],
                data['Sumner County'], data['Davidson County'],data['Wilson County'],
                data['Williamson County'], data['Rutherford County']]
data['GNRC'] = sum(GNRCCounties)
GNRCCountiesAll = [data['Montgomery County'], 
                   data['Cheatham County'], data['Robertson County'],
                   data['Sumner County'], data['Davidson County'],data['Wilson County'],
                   data['Williamson County'], data['Rutherford County'], data['Maury County']]
data['GNRC Region'] = sum(GNRCCountiesAll)
MPOCounties = [data['Robertson County'], data['Sumner County'], data['Davidson County'], data['Wilson County'], data['Williamson County'], 
              data['Rutherford County'], data['Maury County']]
data['MPO'] = sum(MPOCounties)

In [16]:
data = data.rename(columns = {'Nashville': 'Nashville-Davidson metropolitan government (balance), Tennessee', 'Murfreesboro': 'Murfreesboro city, Tennessee',
                              'Clarksville': 'Clarksville city, Tennessee', 'Franklin': 'Franklin city, Tennessee', 
                              'Hendersonville': 'Hendersonville city, Tennessee', 'Lebanon': 'Lebanon city, Tennessee', 
                              'Columbia': 'Columbia city, Tennessee', 'Smyrna': 'Smyrna town, Tennessee', 'Mount Juliet': 'Mount Juliet city, Tennessee', 
                              'Gallatin': 'Gallatin city, Tennessee', 'Brentwood': 'Brentwood city, Tennessee', 
                              'Spring Hill': 'Spring Hill city, Tennessee', 'La Vergne': 'La Vergne city, Tennessee', 
                              'Nolensville': 'Nolensville town, Tennessee', 'Ashland City': 'Ashland City town, Tennessee', 
                              'White House': 'White House city, Tennessee', 'Fairview': 'Fairview city, Tennessee', 
                              'Thompsons Station': "Thompson's Station town, Tennessee", 'Millersville': 'Millersville city, Tennessee', 
                              'Davidson County': 'Davidson County, Tennessee', 'Rutherford County': 'Rutherford County, Tennessee',
                              'Williamson County': 'Williamson County, Tennessee', 'Montgomery County': 'Montgomery County, Tennessee',
                              'Sumner County': 'Sumner County, Tennessee', 'Wilson County': 'Wilson County, Tennessee',
                              'Maury County': 'Maury County, Tennessee', 'Robertson County': 'Robertson County, Tennessee',
                              'Dickson County': 'Dickson County, Tennessee', 'Cheatham County': 'Cheatham County, Tennessee',
                              'Macon County': 'Macon County, Tennessee', 'Allen County': 'Allen County, Tennessee',
                              'Humphreys County': 'Humphreys County, Tennessee', 'Stewart County': 'Stewart County, Tennessee',
                              'Trousdale County': 'Trousdale County, Tennessee', 'Houston County': 'Houston County, Tennessee',
                              'Nashville, TN': 'Nashville MSA', 'Clarksville, TN': 'Clarksville MSA'})

In [17]:
data = data.groupby(['Year']).mean()
data = data.transpose().reset_index()
data = data.rename(columns = {'RegionName':'NAME'})
data = data.set_index('NAME')
data = data.melt(value_name = 'Average Rent', ignore_index = False)
rental = data.reset_index()
rental.head()

Unnamed: 0,NAME,Year,Average Rent
0,Nashville-Davidson metropolitan government (ba...,2015,1237.522741
1,"Murfreesboro city, Tennessee",2015,1235.929405
2,"Clarksville city, Tennessee",2015,965.920467
3,"Franklin city, Tennessee",2015,1416.132261
4,"Hendersonville city, Tennessee",2015,1047.5428


In [18]:
df = rental.merge(hv, on = ['NAME', 'Year'], how = 'outer')

In [19]:
geos = pd.read_csv('../../Data Downloads/geofips.csv')
geos['GEO_ID'] = geos['GEO_ID'].fillna('None')

In [20]:
df = df.merge(geos, how = 'inner')

In [21]:
df = df.set_index('NAME').transpose()
#rename geographies dict
df = df.rename(columns = {'Allen County, Kentucky': 'Allen County, KY', 'Cheatham County, Tennessee': 'Cheatham County', 
                              'Davidson County, Tennessee': 'Davidson County', 'Dickson County, Tennessee': 'Dickson County', 
                              'Houston County, Tennessee': 'Houston County', 'Humphreys County, Tennessee': 'Humphreys County', 
                              'Maury County, Tennessee': 'Maury County', 'Montgomery County, Tennessee': 'Montgomery County', 
                              'Robertson County, Tennessee': 'Robertson County', 'Rutherford County, Tennessee': 'Rutherford County', 
                              'Simpson County, Kentucky': 'Simpson County, KY', 'Stewart County, Tennessee': 'Stewart County', 
                              'Sumner County, Tennessee': 'Sumner County', 'Trousdale County, Tennessee': 'Trousdale County', 
                              'Williamson County, Tennessee': 'Williamson County', 'Wilson County, Tennessee': 'Wilson County', 
                              'Adams city, Tennessee': 'Adams', 'Ashland City town, Tennessee': 'Ashland City', 'Belle Meade city, Tennessee': 'Belle Meade', 
                              'Berry Hill city, Tennessee': 'Berry Hill', 'Brentwood city, Tennessee': 'Brentwood', 'Burns town, Tennessee': 'Burns', 
                              'Cedar Hill city, Tennessee': 'Cedar Hill', 'Charlotte town, Tennessee': 'Charlotte', 'Clarksville city, Tennessee': 'Clarksville', 
                              'Columbia city, Tennessee': 'Columbia', 'Coopertown town, Tennessee': 'Coopertown', 'Cross Plains city, Tennessee': 'Cross Plains', 
                              'Cumberland City town, Tennessee': 'Cumberland City', 'Dickson city, Tennessee': 'Dickson', 'Dover city, Tennessee': 'Dover', 
                              'Eagleville city, Tennessee': 'Eagleville', 'Erin city, Tennessee': 'Erin', 'Fairview city, Tennessee': 'Fairview', 
                              'Forest Hills city, Tennessee': 'Forest Hills', 'Franklin city, Tennessee': 'Franklin', 'Gallatin city, Tennessee': 'Gallatin', 
                              'Goodlettsville city, Tennessee': 'Goodlettsville', 'Greenbrier town, Tennessee': 'Greenbrier', 
                              'Hendersonville city, Tennessee': 'Hendersonville', 'Kingston Springs town, Tennessee': 'Kingston Springs', 
                              'La Vergne city, Tennessee': 'La Vergne', 'Lafayette city, Tennessee': 'Lafayette', 'Lebanon city, Tennessee': 'Lebanon', 
                              'McEwen city, Tennessee': 'McEwen', 'Millersville city, Tennessee': 'Millersville', 'Mitchellville city, Tennessee': 'Mitchellville', 
                              'Mount Juliet city, Tennessee': 'Mount Juliet', 'Mount Pleasant city, Tennessee': 'Mount Pleasant', 
                              'Murfreesboro city, Tennessee': 'Murfreesboro', 'Nashville-Davidson metropolitan government (balance)': 'Nashville', 
                              'New Johnsonville city, Tennessee': 'New Johnsonville', 'Nolensville town, Tennessee': 'Nolensville', 
                              'Oak Hill city, Tennessee': 'Oak Hill', 'Pegram town, Tennessee': 'Pegram', 'Pleasant View city, Tennessee': 'Pleasant View', 
                              'Portland city, Tennessee': 'Portland', 'Ridgetop city, Tennessee': 'Ridgetop', 'Slayden town, Tennessee': 'Slayden', 
                              'Smyrna town, Tennessee': 'Smyrna', 'Spring Hill city, Tennessee': 'Spring Hill', 'Springfield city, Tennessee': 'Springfield', 
                              'Tennessee Ridge town, Tennessee': 'Tennessee Ridge', "Thompson's Station town, Tennessee": "Thompson's Station", 
                              'Vanleer town, Tennessee': 'Vanleer', 'Watertown city, Tennessee': 'Watertown', 'Waverly city, Tennessee': 'Waverly', 
                              'Westmoreland town, Tennessee': 'Westmoreland', 'White Bluff town, Tennessee': 'White Bluff', 
                              'White House city, Tennessee': 'White House', 'Franklin city, Kentucky': 'Franklin, KY', 
                              'Scottsville city, Kentucky': 'Scottsville, KY', 'United States': 'US'})
df = df.transpose().reset_index(drop = False)

In [23]:
df.head()

Unnamed: 0,NAME,Year,Average Rent,Home Value,GEO_ID
0,Murfreesboro,2015,1235.929405,184979.583333,1600000US4751560
1,Murfreesboro,2016,1302.142637,206002.416667,1600000US4751560
2,Murfreesboro,2017,1346.910339,228989.416667,1600000US4751560
3,Murfreesboro,2018,1403.296663,249646.833333,1600000US4751560
4,Murfreesboro,2019,1465.377527,261084.416667,1600000US4751560


In [22]:
df.to_feather('../../Outputs/Zillow')