### Time series model to run Prophet
- Objectives: To run a prediction on the uploaded dataset, and append the predictions to the df. Function should be consolidated into a single python function (.py file)

In [None]:
import pandas as pd
from prophet import Prophet
import numpy as np
from sklearn.metrics import mean_squared_error

df = pd.read_csv('final_mock_data.csv')
df['Date'] = pd.to_datetime(df['Date'])
df.head(3)

### [IMPT!] Combined function

In [None]:
def prophet():
    df = pd.read_csv('final_mock_data.csv')
    df['Date'] = pd.to_datetime(df['Date'])
    df_grouped = df.resample("M", on='Date').agg({
        'ad_spend': 'sum',
        "new_accounts": "sum",
        "revenue": "sum",
        }).reset_index()

    def prophet_forecast(df_grouped, predict_col, time_period):
        # Prepare data for Prophet
        df_prophet = df_grouped.rename(columns={'Date': 'ds', predict_col: 'y'})

        # Initialize Prophet model
        model = Prophet()
        model.fit(df_prophet)

        # Create future dataframe for prediction
        future = model.make_future_dataframe(periods=4, freq='M')

        # Make predictions
        forecast = model.predict(future)
        forecast_snippet = forecast[['ds', 'yhat']][-time_period:]

        return forecast_snippet

    # Get individual forecasts
    future_rev = prophet_forecast(df_grouped, 'revenue', 4)
    future_ad = prophet_forecast(df_grouped, 'ad_spend', 4)
    future_accounts = prophet_forecast(df_grouped, 'new_accounts', 4)

    # Combine the forecasts
    predictions = pd.DataFrame()
    predictions['ds'] = future_rev['ds']
    predictions['revenue'] = future_rev['yhat']
    predictions['ad_spend'] = future_ad['yhat']
    predictions['new_accounts'] = future_accounts['yhat']

    return predictions

df2 = prophet('2022-01-01', '2024-09-30')
print(df2)
df2.to_csv('prophet_predictions.csv', index=False)


### Walk forward validation to test Prophet

In [None]:
# Define walk-forward validation parameters
initial_train_size = 12  # Use the first 12 months for initial training
test_window = 1          # Predict 1 month ahead in each iteration

mape_list = []  # Store MAPE for each step
rmse_list = []  # Store RMSE for each step
actuals = []
predictions = []

df_prophet = df_grouped.rename(columns={'Date': 'ds', 'revenue': 'y'}) #prediction by months

# Walk-forward loop
for i in range(initial_train_size, len(df_prophet) - test_window + 1):
    train = df_prophet.iloc[:i]  # Expand training set
    test = df_prophet.iloc[i:i + test_window]  # Next time step
    
    # Train Prophet model
    model = Prophet()
    model.add_regressor('ad_spend') 
    model.fit(train)
    
    # Prepare future dataframe
    future = test[['ds', 'ad_spend']]
    
    # Predict
    forecast = model.predict(future)
    y_pred = forecast['yhat'].values[0]
    y_true = test['y'].values[0]
    
    # Store actual and predicted values
    actuals.append(y_true)
    predictions.append(y_pred)
    
    # Compute RMSE
    rmse = np.sqrt(mean_squared_error([y_true], [y_pred]))
    rmse_list.append(rmse)

    # Compute MAPE (avoid division by zero)
    if y_true != 0:
        mape = np.abs((y_true - y_pred) / y_true) * 100
    else:
        mape = np.nan  # Ignore cases where actual value is zero

    mape_list.append(mape)

# Calculate average RMSE
avg_rmse = np.mean(rmse_list)
avg_mape = np.nanmean(mape_list)  # Ignore NaN values in MAPE

# Print results
print(f"Average RMSE: {avg_rmse:.2f}")
print(f"Average MAPE: {avg_mape:.2f}%") #14.32% with ad_spend, 22.22% without ad_spend

### Archive: Old function to combine into single DF

In [None]:
def prophet(from_date, to_date):
    
    df = pd.read_csv('final_mock_data.csv')
    df['Date'] = pd.to_datetime(df['Date'])
    df = df[df['Date'].isin(pd.date_range(start=from_date, end=to_date))]
    time_period = 4
    # takes in df_grouped, makes df_plotting

    df_grouped = df.resample("M", on='Date').agg({
        'ad_spend': 'sum',
        "new_accounts": "sum",
        "revenue": "sum",
        }).reset_index()

    df_plotting = df_grouped[['Date', 'revenue', 'ad_spend', 'new_accounts']]
    df_plotting.rename(columns={'Date': 'ds'}, inplace=True)

    def prophet_forecast(df_plotting, df_grouped, predict_col, time_period):

        # Prepare data for Prophet
        df_prophet = df_grouped.rename(columns={'Date': 'ds', predict_col: 'y'})
        df_grouped.rename(columns={'Date': 'ds'}, inplace=True) #for subsequent merging

        # Initialize Prophet model
        model = Prophet()
        model.fit(df_prophet)

        # Create future dataframe for prediction
        future = model.make_future_dataframe(periods=time_period, freq='M')

        # Make predictions
        forecast = model.predict(future)
        forecast_snippet = forecast[['ds', 'yhat']][-time_period:]

        # Add future months to df_plotting
        last_date = df_grouped['ds'].max()
        future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=time_period, freq='M')
        future_df = pd.DataFrame({'ds': future_dates})

        # Only add empty values for columns that don't already exist
        for col in ['revenue', 'ad_spend', 'new_accounts']:
            if col not in future_df.columns:
                future_df[col] = np.nan

        df_plotting = pd.concat([df_plotting, future_df], ignore_index=True)

        # Update predictions in df_plotting
        for idx, row in forecast_snippet.iterrows():
            df_plotting.loc[df_plotting['ds'] == row['ds'], predict_col] = row['yhat']

        return df_plotting

    # Get individual forecasts
    df_plotting_rev = prophet_forecast(df_plotting,df_grouped, 'revenue', 4)
    df_plotting_ad = prophet_forecast(df_plotting,df_grouped, 'ad_spend', 4)
    df_plotting_accounts = prophet_forecast(df_plotting,df_grouped, 'new_accounts', 4)

    # Combine the forecasts
    df_plotting_combined = pd.DataFrame()
    df_plotting_combined['ds'] = df_plotting_rev['ds']
    df_plotting_combined['revenue'] = df_plotting_rev['revenue'] 
    df_plotting_combined['ad_spend'] = df_plotting_ad['ad_spend']
    df_plotting_combined['new_accounts'] = df_plotting_accounts['new_accounts']

    df_plotting = df_plotting_combined

    return df_plotting

df2 = prophet('2022-01-01', '2024-09-30')
print(df2)