In [1]:
!pip install prophet



In [2]:
import pandas as pd
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
from tqdm import tqdm
import holidays
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np


# **File Name**

In [3]:
df = pd.read_csv('April 29 to June 30 New Users.csv')
df['Year'] = df['Date'].apply(lambda x: str(x)[:4])
df['Month'] = df['Date'].apply(lambda x: str(x)[4:6])
df['Day'] = df['Date'].apply(lambda x: str(x)[-2:])
df['ds'] = pd.to_datetime(df['Date'], format='%Y%m%d')
df['y'] = df['New users']

# Keep only necessary columns
prophet_df = df[['ds', 'y']]
df.head()

Unnamed: 0,Date,New users,Year,Month,Day,ds,y
0,20250429,18240,2025,4,29,2025-04-29,18240
1,20250430,17295,2025,4,30,2025-04-30,17295
2,20250501,17644,2025,5,1,2025-05-01,17644
3,20250502,21591,2025,5,2,2025-05-02,21591
4,20250503,18990,2025,5,3,2025-05-03,18990


In [4]:
df_daily = df.groupby('ds')['y'].sum().reset_index()

# 2. Train-Validation Split (Last 30 days as validation)
# Find the split date based on the daily data
split_date = df_daily['ds'].max() - pd.Timedelta(days=30)

train = df_daily[df_daily['ds'] < split_date]
valid = df_daily[df_daily['ds'] >= split_date]


In [5]:
def add_pakistan_holidays(df):
    pk_holidays = holidays.CountryHoliday('PK', years=df['ds'].dt.year.unique())
    df['is_holiday'] = df['ds'].apply(lambda date: date in pk_holidays)
    return df       ### filhaal useless function

In [6]:
# 3. Optimized Prophet Model
model = Prophet(weekly_seasonality=True)
# Get official Pakistan holidays (requires `holidays` package)
pk_holidays = holidays.PK(years=[2024,2025])

# Add to Prophet as custom holidays
holiday_df = pd.DataFrame([
    {"holiday": name, "ds": pd.to_datetime(date)}
    for date, name in pk_holidays.items()
])
#model.add_country_holidays(country_name='PK')  # Keep this
model.holidays = holiday_df
model.fit(train[['ds', 'y']])

# 4. Create Future DataFrame including the validation period and the next 31 days
future = model.make_future_dataframe(periods=len(valid) + 31, freq='D', include_history=True)
forecast = model.predict(future)

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:n_changepoints greater than number of observations. Using 24.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpevad953w/sd52kn23.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpevad953w/x0yqjmcx.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.12/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=63298', 'data', 'file=/tmp/tmpevad953w/sd52kn23.json', 'init=/tmp/tmpevad953w/x0yqjmcx.json', 'output', 'file=/tmp/tmpevad953w/prophet_model6wcn5xle/prophet_model-20250910070941.csv', 'method=optimize', 'algorithm=newton', 'iter=10000']
07:09:41 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
07:09:41 - cmdstanpy - INFO - Chain [1] done processing
INFO:

In [7]:
pk_holidays = holidays.PK(years=[2024,2025])
# 1. Extract holiday names and dates
holiday_dates = [date for date, name in pk_holidays.items()]
holiday_names = [name for date, name in pk_holidays.items()]

# 2. Identify holidays in forecast period
forecast_holidays = forecast[
    forecast['ds'].isin(holiday_dates) &
    (forecast['ds'] > train['ds'].max())
][['ds']] # Only keep 'ds' column


# 3. Merge with holiday names - Ensure 'ds' is datetime in holiday_df
holiday_df = pd.DataFrame({'ds': pd.to_datetime(holiday_dates), 'holiday': holiday_names})
forecast_holidays = forecast_holidays.merge(holiday_df, on='ds')

print("Upcoming holiday impacts:")
print(forecast_holidays[['ds', 'holiday']]) # only print relevant columns

  forecast['ds'].isin(holiday_dates) &


Upcoming holiday impacts:
          ds      holiday
0 2025-06-07  Eid al-Adha
1 2025-06-08  Eid al-Adha
2 2025-06-09  Eid al-Adha
3 2025-07-05       Ashura
4 2025-07-06       Ashura


In [8]:
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
89,2025-07-27,24402.082977,22339.254387,26483.203619
90,2025-07-28,23736.291074,21768.588193,25767.458496
91,2025-07-29,25025.430304,23095.523973,27111.960752
92,2025-07-30,24373.783653,22341.887816,26411.447259
93,2025-07-31,24370.76927,22363.417972,26521.263397


In [9]:
# Filter forecast to just the next 31 days
final_forecast = forecast[forecast['ds'] >= df_daily['ds'].max()].tail(31)
# Export only baseline forecast + confidence interval
baseline_export = final_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
baseline_export.columns = ['ds', 'baseline', 'ci_lower', 'ci_upper']
# Merge with historical data
combined = pd.merge(
    df_daily[['ds', 'y']],
    baseline_export,
    on='ds',
    how='outer'
)
# Add data type flag
combined['data_type'] = np.where(combined['y'].notnull(), 'Actual', 'Forecast')
combined.to_csv('New_Users_Forecast.csv', index=False)
df.head()

Unnamed: 0,Date,New users,Year,Month,Day,ds,y
0,20250429,18240,2025,4,29,2025-04-29,18240
1,20250430,17295,2025,4,30,2025-04-30,17295
2,20250501,17644,2025,5,1,2025-05-01,17644
3,20250502,21591,2025,5,2,2025-05-02,21591
4,20250503,18990,2025,5,3,2025-05-03,18990


In [10]:
# 6. Final Forecast & Visualization
# Filter forecast to just the next 31 days
final_forecast = forecast[forecast['ds'] >= df_daily['ds'].max()].tail(31)


# 7. Interactive Plotly Visualization
fig = go.Figure()

# Plot actual values
fig.add_trace(go.Scatter(
    x=df_daily['ds'],
    y=df_daily['y'],
    mode='lines+markers',
    name='Actual New Users',
    line=dict(color='blue'),
    marker=dict(size=4),
    hovertemplate='Date: %{x}<br>Actual: %{y}<extra></extra>'

))

# Plot forecasted values
fig.add_trace(go.Scatter(
    x=final_forecast['ds'],
    y=final_forecast['yhat'],
    mode='lines+markers',
    name='Forecasted New Users',
    line=dict(color='red', dash='dash'),
    marker=dict(size=4),
    hovertemplate='Date: %{x}<br>Forecast: %{y:.0f}<extra></extra>'
    )
)

# Plot confidence interval
fig.add_trace(go.Scatter(
    x=pd.concat([final_forecast['ds'], final_forecast['ds'][::-1]]),
    y=pd.concat([final_forecast['yhat_upper'], final_forecast['yhat_lower'][::-1]]),
    fill='toself',
    fillcolor='rgba(255, 0, 0, 0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    hoverinfo='skip',
    showlegend=True,
    name='Confidence Interval'
))

# Layout
fig.update_layout(
    title='New Users Forecast (Next 31 Days)',
    xaxis_title='Date',
    yaxis_title='New Users',
    hovermode='x unified',
    template='plotly_white',
    legend=dict(x=0.01, y=0.99)
)


fig.show()

# **Validation**

In [11]:
# 5. Validation
# Filter forecast to match the validation dates
val_forecast = forecast[forecast['ds'].isin(valid['ds'])]

# Ensure valid and val_forecast have the same dates and are sorted
valid = valid.sort_values(by='ds').reset_index(drop=True)
val_forecast = val_forecast.sort_values(by='ds').reset_index(drop=True)


mae = mean_absolute_error(valid['y'], val_forecast['yhat'])
rmse = np.sqrt(mean_squared_error(valid['y'], val_forecast['yhat']))

print(f"Validation MAE: {mae:.0f}")
print(f"Validation RMSE: {rmse:.0f}")
print()
print(df_daily['y'].describe())
# Avoid division by zero
mape = np.mean(np.abs((valid['y'] - val_forecast['yhat']) / valid['y'])) * 100
accuracy = 100 - mape

print(f"MAPE: {mape:.2f}%")
print(f"Percentage Accuracy: {accuracy:.2f}%")



Validation MAE: 2299
Validation RMSE: 3026

count       63.000000
mean     20856.047619
std       3479.088268
min      16210.000000
25%      17811.000000
50%      20845.000000
75%      22667.000000
max      30605.000000
Name: y, dtype: float64
MAPE: 9.57%
Percentage Accuracy: 90.43%


In [12]:
# 6. Final Forecast & Visualization
# Filter forecast to just the next 31 days
final_forecast = forecast[forecast['ds'] >= df_daily['ds'].max()].tail(31)


# 7. Interactive Plotly Visualization
fig = go.Figure()

# Plot actual values
fig.add_trace(go.Scatter(
    x=df_daily['ds'], y=df_daily['y'],
    mode='lines+markers', name='Actual New Users',
    line=dict(color='blue'),
    marker=dict(size=4),
    hovertemplate='Date: %{x}<br>Actual: %{y}<extra></extra>'
))

# 2. Forecasted yhat
fig.add_trace(go.Scatter(
    x=forecast['ds'], y=forecast['yhat'],
    mode='lines', name='Forecasted New Users',
    line=dict(color='red', width=2, dash='dash'),
    hovertemplate='Date: %{x}<br>Forecast: %{y:.0f}<extra></extra>'
))

# 3. Add invisible trace for tooltips of bounds
fig.add_trace(go.Scatter(
    x=forecast['ds'], y=forecast['yhat_upper'],
    mode='lines', name='Upper Bound',
    line=dict(width=0),
    hovertemplate='Upper Bound: %{y:.0f}<extra></extra>',
    showlegend=False
))

fig.add_trace(go.Scatter(
    x=forecast['ds'], y=forecast['yhat_lower'],
    mode='lines', name='Lower Bound',
    line=dict(width=0),
    fill='tonexty',
    fillcolor='rgba(255, 0, 0, 0.2)',
    hovertemplate='Lower Bound: %{y:.0f}<extra></extra>',
    showlegend=True  # Shows in legend as "Confidence Interval"
))

# Layout
fig.update_layout(
    title='New Users Forecast (Next 31 Days)',
    xaxis_title='Date',
    yaxis_title='New Users',
    template='plotly_white',
    hovermode='x unified',
    legend=dict(x=0.01, y=0.99)
)

fig.show()


In [13]:
from prophet.plot import plot_components_plotly
fig = plot_components_plotly(model, forecast)
fig.show()