In [10]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

import matplotlib
import seaborn as sns
%matplotlib inline
from matplotlib.pylab import rcParams
plt.style.use('fivethirtyeight')

from fbprophet import Prophet as proph

In [2]:
zipcodes_df = pd.read_csv('zillow_data.csv')

In [3]:
yearly_df = zipcodes_df.loc[:, '1996-04':'2018-04']

In [4]:
def get_datetimes(df):
    return pd.to_datetime(df.columns.values[1:], format='%Y-%m')

In [6]:
def melt_data(df):
    mdf = pd.melt(df, id_vars=['RegionName', 'City', 'Metro', 'State', 'CountyName', 'SizeRank'], 
                 value_vars = df.loc[:, '1996-04':'2018-04'], value_name = 'Value', var_name = 'Date')
    mdf_cleaned = mdf.dropna(subset=['Value', 'Metro'])
    return mdf_cleaned

In [7]:
df = melt_data(zipcodes_df)
# df.head()

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3508681 entries, 0 to 3901594
Data columns (total 8 columns):
RegionName    int64
City          object
Metro         object
State         object
CountyName    object
SizeRank      int64
Date          object
Value         float64
dtypes: float64(1), int64(2), object(5)
memory usage: 240.9+ MB


In [9]:
df.isna().sum()

RegionName    0
City          0
Metro         0
State         0
CountyName    0
SizeRank      0
Date          0
Value         0
dtype: int64

In [12]:
df.Date = pd.to_datetime(df.Date)
df_since1996 = df.set_index('Date')

## Dataframe

In [15]:
df_1 = df[['RegionName', 'Date', 'Value']]
df_1.set_index('Date')
df_1.head()

Unnamed: 0,RegionName,Date,Value
0,60657,1996-04-01,334200.0
1,75070,1996-04-01,235700.0
2,77494,1996-04-01,210400.0
3,60614,1996-04-01,498100.0
4,79936,1996-04-01,77300.0


In [72]:
df_1['Value'].iloc[-1]

357200.0

## Prophet

In [90]:
def subset_zipcode(zipcode, df):
    df_12 = df.copy()
    df_12 = df_12[df_12['RegionName'] == zipcode]
    df_12.drop(columns=['RegionName'], inplace=True)
    df_12.rename(columns={'Date': 'ds', 'Value': 'y'}, inplace=True)
    return df_12


In [91]:
def prophet_m(zipcode, df):   
    Model = proph(interval_width=0.95) 
    date_value_df = subset_zipcode(zipcode, df)
    Model.fit(date_value_df)
    future_dates = Model.make_future_dataframe(periods=12, freq='MS')
    forecast = Model.predict(future_dates).tail(1)
    
    
    ds = forecast['ds'].iloc[0]
    y_hat = forecast['yhat'].values[0]
    y_hat_lower = forecast['yhat_lower'].values[0]
    y_hat_upper = forecast['yhat_upper'].values[0]
    current_price = date_value_df['y'].iloc[-1]

    return (ds, y_hat, y_hat_lower, y_hat_upper, current_price)

#zipcodes = df_1['RegionName'].unique()
zipcodes = [60657, 75070]
predictions = []
for zipcode in zipcodes:
    ds, y_hat, y_hat_lower, y_hat_upper, current_price = prophet_m(zipcode, df_1) #(current $, predicted $, upper, lower)
    predictions.append({'zip': zipcode,
                        'ds': ds,
                        'y_hat': y_hat,
                        'y_hat_lower': y_hat_lower,
                        'y_hat_upper': y_hat_upper,
                        'current_price': current_price})


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [87]:
df = pd.DataFrame(predictions)

In [88]:
df['diff'] = df['y_hat'] - df['current_price'] 

In [89]:
df.sort_values(by='diff', ascending=False)



Unnamed: 0,current_price,ds,y_hat,y_hat_lower,y_hat_upper,zip,diff
0,1030600.0,2019-04-01,1078485.0,1042142.0,1114831.0,60657,47884.655975
1,321800.0,2019-04-01,349842.3,337253.9,361733.3,75070,28042.334751


In [None]:
predictions.append({'zip': zipcode, 'ds': ds, 'y_hat', ...})

In [56]:
predictions[0]['ds'].iloc[0]

Timestamp('2019-04-01 00:00:00')