In [1]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

import matplotlib
import seaborn as sns
%matplotlib inline
from matplotlib.pylab import rcParams
plt.style.use('fivethirtyeight')

from fbprophet import Prophet as proph

In [14]:
new_df = pd.read_csv('Zip_Zhvi_SingleFamilyResidence.csv', encoding = 'unicode_escape')
new_df.head()

Unnamed: 0,RegionID,RegionName,City,State,Metro,CountyName,SizeRank,1996-04,1996-05,1996-06,...,2018-10,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07
0,84654,60657,Chicago,IL,Chicago-Naperville-Elgin,Cook County,1,337200.0,338200.0,339000.0,...,1050700,1049700,1050800,1055800,1061200,1070000,1083000,1086000,1072000,1056500
1,91982,77494,Katy,TX,Houston-The Woodlands-Sugar Land,Harris County,2,210400.0,212200.0,212200.0,...,336700,335900,336000,335600,334200,333400,333500,333500,331800,330100
2,84616,60614,Chicago,IL,Chicago-Naperville-Elgin,Cook County,3,502900.0,504900.0,506300.0,...,1319300,1320800,1325400,1331900,1338800,1345000,1351100,1349700,1337100,1323500
3,91940,77449,Katy,TX,Houston-The Woodlands-Sugar Land,Harris County,4,95400.0,95600.0,95800.0,...,179300,180200,181000,182100,183400,183100,182000,181800,183300,184300
4,93144,79936,El Paso,TX,El Paso,El Paso County,5,77300.0,77300.0,77300.0,...,126400,126900,127600,128200,128600,128900,128700,128500,129000,129800


In [15]:
def get_datetimes(df):
    return pd.to_datetime(df.columns.values[1:], format='%Y-%m')

In [16]:
def melt_data(df):
    mdf = pd.melt(df, id_vars=['RegionName', 'City', 'Metro', 'State', 'CountyName', 'SizeRank'], 
                 value_vars = df.loc[:, '2012-01':'2019-07'], value_name = 'Value', var_name = 'Date')
    mdf_cleaned = mdf.dropna(subset=['Value', 'Metro'])
    return mdf_cleaned

In [17]:
df = melt_data(new_df)
df.head()

Unnamed: 0,RegionName,City,Metro,State,CountyName,SizeRank,Date,Value
0,60657,Chicago,Chicago-Naperville-Elgin,IL,Cook County,1,2012-01,739600.0
1,77494,Katy,Houston-The Woodlands-Sugar Land,TX,Harris County,2,2012-01,248800.0
2,60614,Chicago,Chicago-Naperville-Elgin,IL,Cook County,3,2012-01,931300.0
3,77449,Katy,Houston-The Woodlands-Sugar Land,TX,Harris County,4,2012-01,116000.0
4,79936,El Paso,El Paso,TX,El Paso County,5,2012-01,112400.0


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1290042 entries, 0 to 1433431
Data columns (total 8 columns):
RegionName    1290042 non-null int64
City          1290042 non-null object
Metro         1290042 non-null object
State         1290042 non-null object
CountyName    1290042 non-null object
SizeRank      1290042 non-null int64
Date          1290042 non-null object
Value         1290042 non-null float64
dtypes: float64(1), int64(2), object(5)
memory usage: 88.6+ MB


In [19]:
df.isna().sum()

RegionName    0
City          0
Metro         0
State         0
CountyName    0
SizeRank      0
Date          0
Value         0
dtype: int64

In [20]:
df.Date = pd.to_datetime(df.Date)

In [29]:
df_sr = df.SizeRank.quantile(q=0.01)
df_topone = df.loc[df['SizeRank']< df_sr]

In [30]:
df_topone.shape

(12880, 8)

## Prophet

In [22]:
def subset_zipcode(zipcode, df):
    df_12 = df.copy()
    df_12 = df_12[df_12['RegionName'] == zipcode]
    df_12.drop(columns=['RegionName'], inplace=True)
    df_12.rename(columns={'Date': 'ds', 'Value': 'y'}, inplace=True)
    return df_12

In [37]:
Model = proph(interval_width=0.95) 
date_value_df = subset_zipcode(zipcode, df_topone)
Model.fit(date_value_df)
future_dates = Model.make_future_dataframe(periods=12, freq='MS')
forecast = Model.predict(future_dates).tail(12)

INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [38]:
forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
67,2019-08-01,455839.410215,440667.769138,465228.940558,455702.695527,455973.266932,-2898.960468,-2898.960468,-2898.960468,-2898.960468,-2898.960468,-2898.960468,0.0,0.0,0.0,452940.449747
68,2019-09-01,459919.898783,442695.076714,468421.496565,459493.072451,460314.069984,-4359.935533,-4359.935533,-4359.935533,-4359.935533,-4359.935533,-4359.935533,0.0,0.0,0.0,455559.96325
69,2019-10-01,463868.758688,446929.529963,472906.884068,462994.137288,464610.737961,-3581.522776,-3581.522776,-3581.522776,-3581.522776,-3581.522776,-3581.522776,0.0,0.0,0.0,460287.235913
70,2019-11-01,467949.247257,454197.010118,480568.370018,466583.039149,469056.586355,-863.121303,-863.121303,-863.121303,-863.121303,-863.121303,-863.121303,0.0,0.0,0.0,467086.125954
71,2019-12-01,471898.107162,461799.67599,487303.679672,470119.05859,473418.487067,2320.911025,2320.911025,2320.911025,2320.911025,2320.911025,2320.911025,0.0,0.0,0.0,474219.018187
72,2020-01-01,475978.59573,467797.776672,492629.177529,473642.453056,477929.195822,4209.176419,4209.176419,4209.176419,4209.176419,4209.176419,4209.176419,0.0,0.0,0.0,480187.772149
73,2020-02-01,480059.084299,469883.156699,496920.94136,477140.039612,482489.548236,3638.515154,3638.515154,3638.515154,3638.515154,3638.515154,3638.515154,0.0,0.0,0.0,483697.599453
74,2020-03-01,483876.315541,474094.634545,500885.34113,480446.747976,486756.727975,3753.038699,3753.038699,3753.038699,3753.038699,3753.038699,3753.038699,0.0,0.0,0.0,487629.35424
75,2020-04-01,487956.804109,478744.532462,504574.616196,483964.746917,491328.297669,3960.269918,3960.269918,3960.269918,3960.269918,3960.269918,3960.269918,0.0,0.0,0.0,491917.074027
76,2020-05-01,491905.664014,480638.501526,507374.239134,487360.508842,495892.841589,2142.640663,2142.640663,2142.640663,2142.640663,2142.640663,2142.640663,0.0,0.0,0.0,494048.304677


In [40]:
def prophet_m(zipcode, df):   
    Model = proph(interval_width=0.95) 
    date_value_df = subset_zipcode(zipcode, df)
    Model.fit(date_value_df)
    future_dates = Model.make_future_dataframe(periods=12, freq='MS')
    forecast = Model.predict(future_dates).tail(12)
    
    
    ds = forecast['ds'].iloc[0]
    y_hat = forecast['yhat'].values[0]
    y_hat_lower = forecast['yhat_lower'].values[0]
    y_hat_upper = forecast['yhat_upper'].values[0]
    current_price = date_value_df['y'].iloc[-1]

    return (ds, y_hat, y_hat_lower, y_hat_upper, current_price)

In [41]:
zipcodes = df_topone['RegionName'].unique()
# zipcodes = [60657, 75070]
predictions = []
for zipcode in zipcodes:
    ds, y_hat, y_hat_lower, y_hat_upper, current_price = prophet_m(zipcode, df_topone) #(current $, predicted $, upper, lower)
    predictions.append({'zip': zipcode,
                        'ds': ds,
                        'y_hat': y_hat,
                        'y_hat_lower': y_hat_lower,
                        'y_hat_upper': y_hat_upper,
                        'current_price': current_price})


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seaso

INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seaso

INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seaso

INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seaso

In [42]:
pred_df = pd.DataFrame(predictions)

In [43]:
pred_df['diff'] = pred_df['y_hat'] - pred_df['current_price'] 

In [44]:
pred_df.sort_values(by='diff', ascending=False)

Unnamed: 0,current_price,ds,y_hat,y_hat_lower,y_hat_upper,zip,diff
83,3031000.0,2019-08-01,3.379698e+06,3.262882e+06,3.497639e+06,33139,348697.971025
9,3760500.0,2019-08-01,4.014548e+06,3.912970e+06,4.108201e+06,94109,254048.299445
24,6906400.0,2019-08-01,7.100881e+06,6.253292e+06,8.060487e+06,10128,194481.481213
79,1523800.0,2019-08-01,1.664609e+06,1.593588e+06,1.731272e+06,94110,140809.376222
99,1435100.0,2019-08-01,1.550155e+06,1.468095e+06,1.635725e+06,11211,115054.571453
49,1224100.0,2019-08-01,1.330729e+06,1.281545e+06,1.379357e+06,11230,106629.429118
23,9409000.0,2019-08-01,9.510136e+06,9.195908e+06,9.819696e+06,10011,101135.558215
57,1032400.0,2019-08-01,1.121218e+06,1.083944e+06,1.156396e+06,11221,88818.003397
82,1361600.0,2019-08-01,1.433928e+06,1.403754e+06,1.461410e+06,90034,72328.187557
32,1841900.0,2019-08-01,1.881785e+06,1.859023e+06,1.902485e+06,90046,39885.476333


In [45]:
pred_df.to_csv('prediction_home.csv')

In [46]:
!ls

LICENSE
README.md
Untitled.ipynb
Zip_Zhvi_SingleFamilyResidence.csv
mod_4_starter_notebook-Copy1.ipynb
mod_4_starter_notebook-JungmoKim_EricaHo.ipynb
prediction_home.csv
prediction_home_2020_7.csv
prophet_model.ipynb
zillow_data.csv


In [47]:
!pwd

/Users/Erica/flatiron/mod_4/home_value_prediction/home_value_prediction
