Prophet Model

##Install Libraries

In [1]:
!pip -q install prophet pandas matplotlib plotly


In [2]:
import prophet, pandas as pd, matplotlib, plotly
print("prophet:", prophet.__version__)
print("pandas:", pd.__version__)


prophet: 1.1.7
pandas: 2.2.2


##Load Dataset

In [None]:
from google.colab import files
uploaded = files.upload()


In [None]:
import pandas as pd

# load dataset
df = pd.read_csv("cleaned_weather.csv")

# show first 5 rows
df.head()


##Prophet Forecast

In [None]:
# prepare dataset for Prophet
df_prophet = df[['date', 'T']].rename(columns={'date': 'ds', 'T': 'y'})

# convert date to proper datetime format
df_prophet['ds'] = pd.to_datetime(df_prophet['ds'])

# check first rows
df_prophet.head()


In [None]:
from prophet import Prophet

# create and fit the model
model = Prophet()
model.fit(df_prophet)

# create future dataframe (let’s forecast 30 days ahead)
future = model.make_future_dataframe(periods=30, freq='D')

# make predictions
forecast = model.predict(future)

# check first rows of forecast
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head()


In [None]:
import matplotlib.pyplot as plt

# plot forecast
fig1 = model.plot(forecast)
plt.show()


In [None]:
# Plot the components (trend and seasonality)
fig2 = model.plot_components(forecast)
plt.show()

In [None]:
# Split data into train and test sets (last 30 days for testing)
train = df_prophet[:-30]
test = df_prophet[-30:]

# Fit Prophet on training data
model_eval = Prophet()
model_eval.fit(train)

# Create future dataframe for test period
future_eval = model_eval.make_future_dataframe(periods=30, freq='D')
forecast_eval = model_eval.predict(future_eval)

# Extract predictions for test period
predictions = forecast_eval[-30:]

# Compare with actual values
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Calculate metrics
mae = mean_absolute_error(test['y'], predictions['yhat'])
mse = mean_squared_error(test['y'], predictions['yhat'])
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

# Plot actual vs predicted
plt.figure(figsize=(12, 6))
plt.plot(test['ds'], test['y'], 'bo-', label='Actual')
plt.plot(predictions['ds'], predictions['yhat'], 'ro--', label='Predicted')
plt.fill_between(predictions['ds'], predictions['yhat_lower'], predictions['yhat_upper'], color='red', alpha=0.1)
plt.title('Actual vs Predicted Temperature (Test Set)')
plt.xlabel('Date')
plt.ylabel('Temperature')
plt.legend()
plt.grid(True)
plt.show()

##ARIMA Forecast

In [None]:
# Import required libraries for ARIMA
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt

# Check stationarity with ADF test
def check_stationarity(timeseries):
    result = adfuller(timeseries, autolag='AIC')
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])
    print('Critical Values:')
    for key, value in result[4].items():
        print(f'\t{key}: {value}')

    if result[1] <= 0.05:
        print("Conclusion: The series is stationary")
    else:
        print("Conclusion: The series is not stationary")

# Check stationarity of temperature data
check_stationarity(df_prophet['y'])

In [None]:
# Plot ACF and PACF
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))

# ACF plot
plot_acf(df_prophet['y'], lags=40, ax=ax1)
ax1.set_title('Autocorrelation Function (ACF)')

# PACF plot
plot_pacf(df_prophet['y'], lags=40, ax=ax2, method='ywm')
ax2.set_title('Partial Autocorrelation Function (PACF)')

plt.tight_layout()
plt.show()

##Model Comparison

In [None]:
# Import ARIMA model
from statsmodels.tsa.arima.model import ARIMA

# Split data into train and test (same as Prophet evaluation)
train_arima = df_prophet[:-30]
test_arima = df_prophet[-30:]

# Fit ARIMA model
model_arima = ARIMA(train_arima['y'], order=(1,1,1))
model_arima_fit = model_arima.fit()

# Summary of the model
print(model_arima_fit.summary())

# Make predictions for the test period
predictions_arima = model_arima_fit.forecast(steps=30)

# Calculate error metrics
mae_arima = mean_absolute_error(test_arima['y'], predictions_arima)
mse_arima = mean_squared_error(test_arima['y'], predictions_arima)
rmse_arima = np.sqrt(mse_arima)

print(f"\nARIMA Model Performance:")
print(f"Mean Absolute Error (MAE): {mae_arima:.2f}")
print(f"Mean Squared Error (MSE): {mse_arima:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse_arima:.2f}")

# Plot actual vs predicted for ARIMA
plt.figure(figsize=(12, 6))
plt.plot(test_arima['ds'], test_arima['y'], 'bo-', label='Actual')
plt.plot(test_arima['ds'], predictions_arima, 'ro--', label='ARIMA Predicted')
plt.title('Actual vs ARIMA Predicted Temperature (Test Set)')
plt.xlabel('Date')
plt.ylabel('Temperature')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Create a comparison table of error metrics
comparison_data = {
    'Model': ['Prophet', 'ARIMA'],
    'MAE': [mae, mae_arima],
    'MSE': [mse, mse_arima],
    'RMSE': [rmse, rmse_arima]
}

comparison_df = pd.DataFrame(comparison_data)
print("Model Performance Comparison:")
print(comparison_df.to_string(index=False))

# Plot both models' predictions against actual data
plt.figure(figsize=(14, 7))
plt.plot(test['ds'], test['y'], 'ko-', label='Actual', linewidth=2)
plt.plot(predictions['ds'], predictions['yhat'], 'b--', label='Prophet', linewidth=2)
plt.plot(predictions_arima.index, predictions_arima, 'r--', label='ARIMA', linewidth=2)
plt.title('Temperature Forecast Comparison: Prophet vs ARIMA')
plt.xlabel('Date')
plt.ylabel('Temperature')
plt.legend()
plt.grid(True)
plt.show()

# Calculate percentage improvement
improvement_mae = ((mae_arima - mae) / mae_arima) * 100
improvement_rmse = ((rmse_arima - rmse) / rmse_arima) * 100

print(f"\nPerformance Comparison:")
print(f"Prophet is {improvement_mae:.2f}% better than ARIMA in terms of MAE")
print(f"Prophet is {improvement_rmse:.2f}% better than ARIMA in terms of RMSE")

# Determine which model performed better
if mae < mae_arima:
    print("\nConclusion: Prophet performed better than ARIMA for temperature forecasting")
else:
    print("\nConclusion: ARIMA performed better than Prophet for temperature forecasting")

##  Conclusion

In this project, we implemented and compared **Facebook Prophet** and **ARIMA** for time series forecasting using a weather dataset (temperature data).  

### Key Observations
- **Prophet**
  - Automatically captured **trend** and **seasonality** in the data.  
  - Handled variations smoothly and provided **confidence intervals** that are useful in decision-making.  
  - Easy to implement with minimal manual tuning.  

- **ARIMA**
  - Provided a strong **statistical baseline** for forecasting.  
  - Required the data to be **stationary** and needed careful parameter tuning.  
  - Worked reasonably well but struggled to capture seasonal effects compared to Prophet.  

###  Model Performance
- Based on error metrics (**MAE, MSE, RMSE**), **Prophet outperformed ARIMA** for this dataset.  
- Prophet’s ability to model seasonality and trends made its predictions more reliable for long-term forecasting.  

### Final Conclusion
- For this dataset, **Prophet performed better than ARIMA**.  
- In practical business scenarios (such as spaza shops or small businesses):  
  - **Prophet** would be ideal for forecasting sales and demand where **seasonality and holidays** play a major role.  
  - **ARIMA** could still be useful for **short-term forecasts** when the data is clean and stationary.  

Overall, Prophet is the preferred choice for this dataset, but ARIMA remains an important baseline model.
