In [251]:
import pdb
import json
import folium
import warnings
import numpy as np
import pandas as pd
import geopandas as gpd

warnings.filterwarnings("ignore")

Zillow Home Value Index (ZHVI): A smoothed, seasonally adjusted measure of the median estimated home value across a given region and housing type.

https://www.zillow.com/research/data/

In [265]:
geo_df = gpd.read_file("data/dc_zip_geo.json")
geo_df.rename(columns={"ZCTA5CE10": "zipcode"}, inplace=True)
geo_df.zipcode.astype(str)
geo_df = geo_df[["zipcode", 'geometry']]
geo_df.head()

Unnamed: 0,zipcode,geometry
0,20024,"(POLYGON ((-77.064251 38.888956, -77.064311 38..."
1,20004,"(POLYGON ((-77.031963 38.897349, -77.031955 38..."
2,20566,"POLYGON ((-77.05487100000001 38.897347, -77.05..."
3,20535,"POLYGON ((-77.02397499999999 38.893877, -77.02..."
4,20551,"POLYGON ((-77.044938 38.892101, -77.0466760000..."


In [235]:
with open("data/dc_zip_geo.json") as jsonFile:
    geojson = json.load(jsonFile)

In [236]:
dc_zips = []
for feat in geojson['features']:
    dc_zips.append(feat["properties"]["ZCTA5CE10"])

In [263]:
zip_df = pd.read_csv("data/Zip_Zhvi_AllHomes.csv", encoding='latin')
zip_df.rename(columns={"RegionName": "zipcode"}, inplace=True)
zip_df.zipcode = zip_df.zipcode.astype(str)
zip_df.head()

Unnamed: 0,RegionID,zipcode,City,State,Metro,CountyName,SizeRank,1996-04,1996-05,1996-06,...,2018-10,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07
0,61639,10025,New York,NY,New York-Newark-Jersey City,New York County,1,168300.0,168400.0,168400.0,...,1072100,1066500,1054600,1032700,1017300,1011200,991800,974900,970400,969800
1,84654,60657,Chicago,IL,Chicago-Naperville-Elgin,Cook County,2,160000.0,161000.0,161700.0,...,350700,352700,353400,353500,353800,353400,351600,348700,346500,346100
2,61637,10023,New York,NY,New York-Newark-Jersey City,New York County,3,353100.0,353700.0,354200.0,...,1391500,1383600,1378200,1373300,1373500,1366200,1359900,1370300,1397800,1412700
3,91982,77494,Katy,TX,Houston-The Woodlands-Sugar Land,Harris County,4,210400.0,212200.0,212200.0,...,336700,335900,336000,335600,334200,333400,333500,333500,331800,330100
4,84616,60614,Chicago,IL,Chicago-Naperville-Elgin,Cook County,5,194400.0,196100.0,197300.0,...,429900,431500,432700,433200,433000,431700,429700,427300,424300,422100


In [436]:
merged_geo = geo_df.merge(zip_df, on='zipcode', how='inner')
merged_geo

Unnamed: 0,zipcode,geometry,RegionID,City,State,Metro,CountyName,SizeRank,1996-04,1996-05,...,2018-10,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07
0,20024,"(POLYGON ((-77.064251 38.888956, -77.064311 38...",66146,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,5019,85300.0,85500.0,...,405300,407200,409200,411000,413300,414700,414100,411900,411200,411200
1,20004,"(POLYGON ((-77.031963 38.897349, -77.031955 38...",66128,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,12514,,,...,463600,465300,466200,465600,463900,464000,466000,466500,466200,466700
2,20011,"POLYGON ((-77.013606 38.968233, -77.0117730000...",66135,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,189,113700.0,114200.0,...,617800,624100,628200,629700,628300,621500,607200,593100,587200,586100
3,20018,"POLYGON ((-76.963447 38.935202, -76.963382 38....",66141,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,5350,113500.0,113400.0,...,535200,536000,535500,531800,528800,526000,518400,509100,502900,499300
4,20037,"POLYGON ((-77.064643 38.891715, -77.064696 38....",66155,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,5768,142800.0,142400.0,...,529400,529900,531400,532500,525000,512700,504700,503600,504500,506200
5,20010,"(POLYGON ((-77.055403 38.936371, -77.05646 38....",66134,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,2599,102400.0,103000.0,...,714700,716500,715000,710800,707700,706900,703000,697400,696800,699500
6,20016,"POLYGON ((-77.074592 38.957021, -77.0745030000...",66139,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,2077,267600.0,268800.0,...,974000,979800,982500,984400,986300,985900,980400,972900,969600,969300
7,20015,"POLYGON ((-77.082977 38.96294, -77.082168 38.9...",66138,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,6471,296000.0,296200.0,...,1019900,1028700,1036200,1042700,1049000,1058300,1068900,1067000,1050700,1036500
8,20005,"POLYGON ((-77.036542 38.902525, -77.036541 38....",66129,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,5274,,,...,503000,503000,503100,502700,502200,502100,501200,498900,498300,499000
9,20017,"POLYGON ((-76.98264 38.931967, -76.982563 38.9...",66140,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,5474,113900.0,114200.0,...,543900,545300,547800,548300,545100,535800,522200,511700,509000,508300


In [437]:
def format_dates(df):
    """
    takes the merged geo_df and return a new df with average median price per year in each zipcode
    """
    
    zipcodes = df.zipcode
    columns = df.columns
    collection = []
    for z in zipcodes:
        current_df = df[df.zipcode == z]
        for year in range(1996, 2020):
            year = str(year)
            date_cols = [x for x in columns if year in x]
            
            year_average = round(np.mean(current_df[date_cols].T.iloc[:,0]), 2)
            collection.append({"zipcode": z, year: year_average})
    return df.merge(gpd.GeoDataFrame(collection).groupby('zipcode').mean(), on='zipcode', how='left')

final_geo_df = format_dates(merged_geo)

In [438]:
final_geo_df

Unnamed: 0,zipcode,geometry,RegionID,City,State,Metro,CountyName,SizeRank,1996-04,1996-05,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,20024,"(POLYGON ((-77.064251 38.888956, -77.064311 38...",66146,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,5019,85300.0,85500.0,...,273041.67,278566.67,287216.67,309175.0,325925.0,351275.0,364650.0,392191.67,402366.67,412485.71
1,20004,"(POLYGON ((-77.031963 38.897349, -77.031955 38...",66128,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,12514,,,...,378366.67,391625.0,416708.33,436658.33,447866.67,458375.0,456666.67,463733.33,460566.67,465557.14
2,20011,"POLYGON ((-77.013606 38.968233, -77.0117730000...",66135,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,189,113700.0,114200.0,...,301933.33,301775.0,321608.33,370591.67,426933.33,457216.67,502725.0,560908.33,606083.33,607585.71
3,20018,"POLYGON ((-76.963447 38.935202, -76.963382 38....",66141,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,5350,113500.0,113400.0,...,280216.67,278950.0,290091.67,330066.67,366633.33,407050.0,448591.67,508533.33,532941.67,516614.29
4,20037,"POLYGON ((-77.064643 38.891715, -77.064696 38....",66155,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,5768,142800.0,142400.0,...,407358.33,407925.0,429341.67,451391.67,493850.0,494591.67,486025.0,504725.0,524300.0,512742.86
5,20010,"(POLYGON ((-77.055403 38.936371, -77.05646 38....",66134,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,2599,102400.0,103000.0,...,397975.0,402583.33,424166.67,482733.33,540641.67,602458.33,638766.67,666833.33,701075.0,703157.14
6,20016,"POLYGON ((-77.074592 38.957021, -77.0745030000...",66139,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,2077,267600.0,268800.0,...,759766.67,754316.67,771108.33,821050.0,847658.33,884025.0,915916.67,945216.67,964350.0,978400.0
7,20015,"POLYGON ((-77.082977 38.96294, -77.082168 38.9...",66138,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,6471,296000.0,296200.0,...,811150.0,807341.67,827716.67,859075.0,880375.0,927875.0,962691.67,990991.67,997341.67,1053300.0
8,20005,"POLYGON ((-77.036542 38.902525, -77.036541 38....",66129,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,5274,,,...,394150.0,405400.0,411041.67,437525.0,457091.67,471975.0,482108.33,499658.33,507308.33,500628.57
9,20017,"POLYGON ((-76.98264 38.931967, -76.982563 38.9...",66140,Washington,DC,Washington-Arlington-Alexandria,District of Columbia,5474,113900.0,114200.0,...,282600.0,288525.0,298441.67,330566.67,365133.33,414183.33,457775.0,506275.0,534016.67,525771.43


In [442]:
def make_maps(date_range=(1996, 2019), save=True)
    
    for date in range(date_range[0], date_range[1] + 1):
        date = str(date)
        
        colormap = folium.LinearColormap(colors=['#FFFFCC', '#D9F0A3', '#ADDD8E',
                                '#78C679', '#41AB5D', '#238443', '#005A32'], vmin=0.0, vmax=1_000_000, )

        def style_function(x):
            return {
                'weight': 0.5,
                'color': 'black',
                'fillColor': colormap(x['properties'][date]),
                'fillOpacity': 0.3
            }

        def highlight_function(x):
            return {
                'weight': 2,
                'color': 'red'
            }

        tooltip = folium.features.GeoJsonTooltip(
            fields=['zipcode', date],
            aliases=['Zipcode', f'{date} Median Home Price'],
            labels=True,
            sticky=True,
            localize=True
        )

        m = folium.Map(location=[ 38.9072, -77.0369], zoom_start=11, tiles="cartodbpositron")

        folium.GeoJson(
            data=final_geo_df[["zipcode", 'geometry', date]].dropna(),
            name="DC Zipcodes",
            style_function=style_function,
            highlight_function= highlight_function,
            smooth_factor=1.0,
            tooltip=tooltip
        ).add_to(m)

        colormap.add_to(m)
        folium.LayerControl().add_to(m)
        
        if save:
            m.save(f"map-{date}.html")
        else:
            return m
        

In [443]:

make_maps((1996, 2019))


In [347]:
dc_data = zip_df[zip_df.RegionName.isin(dc_zips)]
dc_data.rename(columns={"RegionName": "zipcode"}, inplace=True)
dc_data.iloc[:, 1] = dc_data['zipcode'].astype(str)

dc_data.drop(["RegionID", "City", "State", "Metro", "CountyName", "SizeRank"], axis=1, inplace=True)

# dc_data.set_index("zipcode", inplace=True)

AttributeError: 'DataFrame' object has no attribute 'RegionName'

In [239]:
formatted_data = []

for col in dc_data.columns[1:]:
    for z in dc_data.zipcode:        
        at_zip = dc_data[col][dc_data.zipcode == z]
        value = at_zip[at_zip.index[0]]
        
        formatted_data.append({'date': col, 'zipcode': z, "price": value})
        
dc_reformat_data = pd.DataFrame(formatted_data)

In [240]:
dc_reformat_data.date = pd.to_datetime(dc_reformat_data.date)
dc_reformat_data.set_index("date", inplace=True)


In [250]:
dc_no_date = dc_reformat_data.groupby('zipcode').mean()
dc_data = dc_no_date.reset_index()
dc_data.head()

Unnamed: 0,zipcode,price
0,20001,481578.756477
1,20002,331710.357143
2,20003,447822.142857
3,20004,416552.849741
4,20005,428826.943005


In [248]:
m = folium.Map(location=[ 38.9072, -77.0369], zoom_start=11, tiles="cartodbpositron")

dc_json = "https://raw.githubusercontent.com/OpenDataDE/State-zip-code-GeoJSON/master/dc_district_of_columbia_zip_codes_geo.min.json"

# folium.Choropleth(
#     geo_data=dc_json,
#     name="DC",
#     data=dc_data,
#     columns=["zipcode", "price"],
#     key_on="feature.properties.ZCTA5CE10",
#     fill_color="YlGn",
#     nan_fill_color="black",
#     nan_fill_opacity=0.2,
#     fill_opacity=0.5,
#     line_opacity=0.4,
#     legend_name='Average Home Price',
# ).add_to(m)

folium.features.GeoJson(
    
)

folium.LayerControl().add_to(m)

m

# m.save('index.html')