# WEEK 6 — Time Series Forecasting using Prophet

### Model by Sharif Ullah

## STEP 1 — Import Required Libraries

In [10]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
import matplotlib.pyplot as plt


## STEP 2 — Load the Dataset

#### Explanation:
#### We load the dataset and convert the date column into datetime format, which is required for time series analysis.

In [11]:
df = pd.read_csv("train.csv")
df['date'] = pd.to_datetime(df['date'])
df.head()


Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0
1,1,2013-01-01,1,BABY CARE,0.0,0
2,2,2013-01-01,1,BEAUTY,0.0,0
3,3,2013-01-01,1,BEVERAGES,0.0,0
4,4,2013-01-01,1,BOOKS,0.0,0


## STEP 3 — Select One Store and One Product Family

#### Explanation:
#### Prophet works on a single time series. Therefore, we select one store and one product family with sufficient data.

family
AUTOMOTIVE    1684
BABY CARE     1684
BEAUTY        1684
BEVERAGES     1684
BOOKS         1684
Name: count, dtype: int64

In [13]:
df_filtered = df[(df['store_nbr'] == 1) & (df['family'] == 'GROCERY I')]
df_filtered.head()


Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
12,12,2013-01-01,1,GROCERY I,0.0,0
1794,1794,2013-01-02,1,GROCERY I,2652.0,0
3576,3576,2013-01-03,1,GROCERY I,2121.0,0
5358,5358,2013-01-04,1,GROCERY I,2056.0,0
7140,7140,2013-01-05,1,GROCERY I,2216.0,0


#### Now select a family that actually has data

## STEP 4 — Aggregate Daily Sales

#### Explanation:
#### We aggregate sales at a daily level so that each date has a single sales value.
 

In [14]:
daily_sales = (
    df_filtered
    .groupby('date')['sales']
    .sum()
    .reset_index()
)

daily_sales.head()


Unnamed: 0,date,sales
0,2013-01-01,0.0
1,2013-01-02,2652.0
2,2013-01-03,2121.0
3,2013-01-04,2056.0
4,2013-01-05,2216.0


## STEP 5 — Prepare Data in Prophet Format

#### Explanation:
#### Prophet requires two columns:
#### ds → date
#### y → target value (sales)
#### Missing values are removed to ensure the model has sufficient data.

In [17]:
prophet_df = daily_sales.rename(columns={
    'date': 'ds',
    'sales': 'y'
})

# REMOVE missing values 
prophet_df = prophet_df.dropna()

# Safety check
print("Total rows after cleaning:", prophet_df.shape[0])

prophet_df.head()


Total rows after cleaning: 1684


Unnamed: 0,ds,y
0,2013-01-01,0.0
1,2013-01-02,2652.0
2,2013-01-03,2121.0
3,2013-01-04,2056.0
4,2013-01-05,2216.0


## STEP 6 — Stationarity Check 

#### Explanation:
#### Stationarity checks like ADF are required for ARIMA models.
#### Prophet does not require stationarity, as it internally handles trend and seasonality.

## STEP 7 — Train the Prophet Model

#### Explanation:
#### The Prophet model is trained using historical sales data while considering yearly and weekly seasonal patterns.

In [18]:
model = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False
)

model.fit(prophet_df)


09:05:23 - cmdstanpy - INFO - Chain [1] start processing
09:05:31 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x1bd6c9e79d0>

## STEP 8 — Create Future Dates (Forecast Horizon)

#### Explanation:
#### We create future dates for the next 30 days to generate forecasts.

In [19]:
future = model.make_future_dataframe(periods=30)


## STEP 9 — Generate Forecast

#### Explanation:
#### The model predicts future sales along with confidence intervals.

In [20]:
forecast = model.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
1709,2017-09-10,1410.96084,773.966791,1997.358373
1710,2017-09-11,2758.438709,2108.227884,3357.755516
1711,2017-09-12,2784.99068,2157.982935,3436.80003
1712,2017-09-13,3148.380392,2521.962376,3742.790624
1713,2017-09-14,2609.609936,2007.080119,3230.25642


## STEP 10 — Plot Forecast with Confidence Intervals