In [78]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, r2_score
import json

In [79]:
def prepare_data_for_prophet(df, zipcode):
    zipcode_data = df[df['zipcode'] == zipcode].sort_values('YearMonth')
    prophet_df = zipcode_data[['YearMonth', 'AveragePrice']].rename(columns={'YearMonth': 'ds', 'AveragePrice': 'y'})
    return prophet_df

In [80]:
# Fit Prophet model 
def fit_and_predict_prophet(data, steps=60):
    model = Prophet(yearly_seasonality=True, daily_seasonality=False)
    model.fit(data)
    future = model.make_future_dataframe(periods=steps, freq='M')
    forecast = model.predict(future)
    return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']], model

In [81]:
# Calculate ROI
def calculate_roi(initial_price, final_price):
    return (final_price - initial_price) / initial_price

In [82]:
# Function to get scores for all zip codes 
def get_all_zipcode_scores(df, years=[1, 3, 5]):
    results = []
    for zipcode in df['zipcode'].unique():
        zipcode_data = prepare_data_for_prophet(df, zipcode)
        if len(zipcode_data) < 24:  
            continue

        current_price = zipcode_data['y'].iloc[-1]
        forecast, model = fit_and_predict_prophet(zipcode_data)

        roi_results = {}
        for year in years:
            future_price = forecast['yhat'].iloc[12*year - 1]
            roi = calculate_roi(current_price, future_price)
            roi_lower = calculate_roi(current_price, forecast['yhat_lower'].iloc[12*year - 1])
            roi_upper = calculate_roi(current_price, forecast['yhat_upper'].iloc[12*year - 1])
            roi_results[f'{year}Yr_ROI'] = roi
            roi_results[f'{year}Yr_ROI_Lower'] = roi_lower
            roi_results[f'{year}Yr_ROI_Upper'] = roi_upper

        historical_forecast = forecast[:len(zipcode_data)]
        mae = mean_absolute_error(zipcode_data['y'], historical_forecast['yhat'])
        r2 = r2_score(zipcode_data['y'], historical_forecast['yhat'])

        results.append({
            'zipcode': int(zipcode), 
            'borough': df[df['zipcode'] == zipcode]['Borough'].iloc[0],
            'current_price': float(current_price),  
            'MAE': float(mae), 
            'R²': float(r2),  
            **{key: float(value) for key, value in roi_results.items()}  
        })

    # Save results to JSON
    with open('zipcode_scores_prophet.json', 'w') as f:
        json.dump(results, f, indent=4)

In [83]:
df = pd.read_csv('training_data.csv')
df['YearMonth'] = pd.to_datetime(df['YearMonth'])

get_all_zipcode_scores(df)

19:42:26 - cmdstanpy - INFO - Chain [1] start processing
19:42:27 - cmdstanpy - INFO - Chain [1] done processing
19:42:27 - cmdstanpy - INFO - Chain [1] start processing
19:42:27 - cmdstanpy - INFO - Chain [1] done processing
19:42:28 - cmdstanpy - INFO - Chain [1] start processing
19:42:28 - cmdstanpy - INFO - Chain [1] done processing
19:42:29 - cmdstanpy - INFO - Chain [1] start processing
19:42:29 - cmdstanpy - INFO - Chain [1] done processing
19:42:29 - cmdstanpy - INFO - Chain [1] start processing
19:42:30 - cmdstanpy - INFO - Chain [1] done processing
19:42:30 - cmdstanpy - INFO - Chain [1] start processing
19:42:30 - cmdstanpy - INFO - Chain [1] done processing
19:42:31 - cmdstanpy - INFO - Chain [1] start processing
19:42:31 - cmdstanpy - INFO - Chain [1] done processing
19:42:32 - cmdstanpy - INFO - Chain [1] start processing
19:42:32 - cmdstanpy - INFO - Chain [1] done processing
19:42:32 - cmdstanpy - INFO - Chain [1] start processing
19:42:32 - cmdstanpy - INFO - Chain [1]