In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from prophet import Prophet
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set up plotting parameters
plt.style.use('default')
plt.rcParams['figure.figsize'] = (12, 8)


In [None]:
# Generate sample daily sales data for a retail business
np.random.seed(42)

# Create date range for 2 years of daily data
start_date = datetime(2022, 1, 1)
end_date = datetime(2023, 12, 31)
dates = pd.date_range(start=start_date, end=end_date, freq='D')

# Generate realistic sales data with trend and seasonality
base_sales = 1000
trend = np.linspace(0, 500, len(dates))  # Growing trend
seasonal = 200 * np.sin(2 * np.pi * np.arange(len(dates)) / 365.25)  # Yearly seasonality
weekly = 100 * np.sin(2 * np.pi * np.arange(len(dates)) / 7)  # Weekly seasonality
noise = np.random.normal(0, 50, len(dates))  # Random noise

# Combine components to create realistic sales data
sales = base_sales + trend + seasonal + weekly + noise

# Create DataFrame
df = pd.DataFrame({
    'date': dates,
    'sales': sales
})

In [None]:
# Format data for Prophet (rename columns to 'ds' and 'y')
prophet_df = df.copy()
prophet_df = prophet_df.rename(columns={'date': 'ds', 'sales': 'y'})

# Ensure ds column is datetime
prophet_df['ds'] = pd.to_datetime(prophet_df['ds'])

In [None]:
# Plot the original time series data
plt.figure(figsize=(14, 6))
plt.plot(prophet_df['ds'], prophet_df['y'], linewidth=1, alpha=0.8)
plt.title('Daily Sales Data - Original Time Series', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Initialize Prophet model with default parameters
model = Prophet(
    daily_seasonality=True,
    weekly_seasonality=True,
    yearly_seasonality=True,
    seasonality_mode='additive'
)
model.fit(prophet_df)

In [None]:
# Create future dataframe for next 90 days
future_periods = 90
future = model.make_future_dataframe(periods=future_periods)

In [None]:
# Generate forecast
forecast = model.predict(future)

# Display key forecast columns
forecast_columns = ['ds', 'yhat', 'yhat_lower', 'yhat_upper', 'trend', 'seasonal']

In [None]:
# Plot forecast using Prophet's built-in plotting
fig1 = model.plot(forecast, figsize=(14, 8))
plt.title('Sales Forecast with Prophet', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Create custom detailed forecast plot
plt.figure(figsize=(16, 8))

# Plot historical data
historical_data = forecast[forecast['ds'] <= prophet_df['ds'].max()]
future_data = forecast[forecast['ds'] > prophet_df['ds'].max()]

plt.plot(prophet_df['ds'], prophet_df['y'], 'ko', markersize=2, alpha=0.6, label='Historical Data')
plt.plot(historical_data['ds'], historical_data['yhat'], 'b-', linewidth=2, label='Fitted Values')
plt.plot(future_data['ds'], future_data['yhat'], 'r-', linewidth=2, label='Forecast')

# Add confidence intervals
plt.fill_between(future_data['ds'], 
                future_data['yhat_lower'], 
                future_data['yhat_upper'], 
                alpha=0.3, color='red', label='Confidence Interval')

plt.title('Detailed Sales Forecast Analysis', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.legend(fontsize=10)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Plot forecast components
fig2 = model.plot_components(forecast, figsize=(14, 10))
plt.suptitle('Forecast Components Analysis', fontsize=16, fontweight='bold', y=0.98)
plt.tight_layout()
plt.show()

# Extract and display component insights
print("Component Analysis:")
print("=" * 50)

# Trend analysis
trend_start = forecast['trend'].iloc[0]
trend_end = forecast['trend'].iloc[-1]
trend_change = trend_end - trend_start
print(f"Overall Trend Change: ${trend_change:.2f}")
print(f"Average Daily Trend: ${trend_change/len(forecast):.2f}")

# Seasonality insights
yearly_seasonal = forecast['yearly'].iloc[-365:].max() - forecast['yearly'].iloc[-365:].min()
weekly_seasonal = forecast['weekly'].iloc[-7:].max() - forecast['weekly'].iloc[-7:].min()
print(f"Yearly Seasonality Range: ${yearly_seasonal:.2f}")
print(f"Weekly Seasonality Range: ${weekly_seasonal:.2f}")

In [None]:
# Create holiday dataframe for US holidays
holidays = pd.DataFrame({
    'holiday': 'christmas',
    'ds': pd.to_datetime(['2022-12-25', '2023-12-25']),
    'lower_window': -2,  # 2 days before
    'upper_window': 1,   # 1 day after
})

# Add more holidays
new_year = pd.DataFrame({
    'holiday': 'new_year',
    'ds': pd.to_datetime(['2022-01-01', '2023-01-01', '2024-01-01']),
    'lower_window': -1,
    'upper_window': 1,
})

thanksgiving = pd.DataFrame({
    'holiday': 'thanksgiving',
    'ds': pd.to_datetime(['2022-11-24', '2023-11-23']),
    'lower_window': -1,
    'upper_window': 1,
})

black_friday = pd.DataFrame({
    'holiday': 'black_friday',
    'ds': pd.to_datetime(['2022-11-25', '2023-11-24']),
    'lower_window': 0,
    'upper_window': 2,
})

# Combine all holidays
all_holidays = pd.concat([holidays, new_year, thanksgiving, black_friday], ignore_index=True)

In [None]:
# Create external regressor data (e.g., marketing spend, weather, promotions)
np.random.seed(42)

# Add marketing spend regressor
marketing_spend = np.random.normal(5000, 1000, len(prophet_df))
marketing_spend = np.maximum(marketing_spend, 1000)  # Ensure positive values

# Add promotion indicator (binary regressor)
promotion_days = np.random.choice([0, 1], size=len(prophet_df), p=[0.9, 0.1])

# Add temperature effect (continuous regressor)
temperature = 70 + 20 * np.sin(2 * np.pi * np.arange(len(prophet_df)) / 365.25) + np.random.normal(0, 5, len(prophet_df))

# Add regressors to the dataframe
prophet_df_enhanced = prophet_df.copy()
prophet_df_enhanced['marketing_spend'] = marketing_spend
prophet_df_enhanced['promotion'] = promotion_days
prophet_df_enhanced['temperature'] = temperature


In [None]:
# Initialize enhanced Prophet model with holidays and regressors
enhanced_model = Prophet(
    holidays=all_holidays,
    daily_seasonality=True,
    weekly_seasonality=True,
    yearly_seasonality=True,
    seasonality_mode='additive',
    holidays_prior_scale=10.0,  # Increase holiday effect
    seasonality_prior_scale=10.0
)

# Add regressors
enhanced_model.add_regressor('marketing_spend', prior_scale=0.5)
enhanced_model.add_regressor('promotion', prior_scale=0.5)
enhanced_model.add_regressor('temperature', prior_scale=0.5)

In [None]:
# Fit the enhanced model
print("Fitting enhanced Prophet model...")
enhanced_model.fit(prophet_df_enhanced)
print("Enhanced model fitting completed!")

# Create future dataframe with regressors
future_enhanced = enhanced_model.make_future_dataframe(periods=future_periods)

# Add future regressor values (in practice, these would be planned/predicted values)
np.random.seed(42)
future_marketing = np.random.normal(5000, 1000, future_periods)
future_marketing = np.maximum(future_marketing, 1000)

future_promotion = np.random.choice([0, 1], size=future_periods, p=[0.85, 0.15])

# Extend temperature pattern
future_temp_base = np.arange(len(prophet_df_enhanced), len(prophet_df_enhanced) + future_periods)
future_temperature = 70 + 20 * np.sin(2 * np.pi * future_temp_base / 365.25) + np.random.normal(0, 5, future_periods)

# Add regressors to future dataframe
future_enhanced.loc[len(prophet_df_enhanced):, 'marketing_spend'] = np.concatenate([marketing_spend, future_marketing])
future_enhanced.loc[len(prophet_df_enhanced):, 'promotion'] = np.concatenate([promotion_days, future_promotion])
future_enhanced.loc[len(prophet_df_enhanced):, 'temperature'] = np.concatenate([temperature, future_temperature])

# Fill any missing values in historical period
future_enhanced['marketing_spend'].fillna(method='ffill', inplace=True)
future_enhanced['promotion'].fillna(method='ffill', inplace=True)
future_enhanced['temperature'].fillna(method='ffill', inplace=True)

print("Future dataframe with regressors prepared")

In [None]:
# Generate enhanced forecast
print("Generating enhanced forecast...")
enhanced_forecast = enhanced_model.predict(future_enhanced)
print("Enhanced forecast generated!")

# Compare basic vs enhanced model performance
print("\nModel Comparison:")
print("=" * 50)

# Calculate metrics for historical period
historical_mask = enhanced_forecast['ds'] <= prophet_df['ds'].max()
historical_enhanced = enhanced_forecast[historical_mask]
historical_basic = forecast[forecast['ds'] <= prophet_df['ds'].max()]

# Mean Absolute Error
mae_basic = np.mean(np.abs(historical_basic['yhat'] - prophet_df['y']))
mae_enhanced = np.mean(np.abs(historical_enhanced['yhat'] - prophet_df['y']))

print(f"Basic Model MAE: ${mae_basic:.2f}")
print(f"Enhanced Model MAE: ${mae_enhanced:.2f}")
print(f"Improvement: {((mae_basic - mae_enhanced) / mae_basic * 100):.1f}%")

In [None]:
# Plot enhanced forecast
fig3 = enhanced_model.plot(enhanced_forecast, figsize=(16, 8))
plt.title('Enhanced Sales Forecast with Holidays and Regressors', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Plot enhanced components
fig4 = enhanced_model.plot_components(enhanced_forecast, figsize=(14, 12))
plt.suptitle('Enhanced Forecast Components with Holidays and Regressors', fontsize=16, fontweight='bold', y=0.98)
plt.tight_layout()
plt.show()

# Compare forecasts side by side
plt.figure(figsize=(18, 10))

# Subplot 1: Basic Model
plt.subplot(2, 1, 1)
historical_basic = forecast[forecast['ds'] <= prophet_df['ds'].max()]
future_basic = forecast[forecast['ds'] > prophet_df['ds'].max()]

plt.plot(prophet_df['ds'], prophet_df['y'], 'ko', markersize=2, alpha=0.6, label='Historical Data')
plt.plot(future_basic['ds'], future_basic['yhat'], 'b-', linewidth=2, label='Basic Forecast')
plt.fill_between(future_basic['ds'], future_basic['yhat_lower'], future_basic['yhat_upper'], 
                alpha=0.3, color='blue')
plt.title('Basic Prophet Model Forecast', fontsize=14, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Sales ($)')
plt.legend()
plt.grid(True, alpha=0.3)

# Subplot 2: Enhanced Model
plt.subplot(2, 1, 2)
historical_enhanced = enhanced_forecast[enhanced_forecast['ds'] <= prophet_df['ds'].max()]
future_enhanced_viz = enhanced_forecast[enhanced_forecast['ds'] > prophet_df['ds'].max()]

plt.plot(prophet_df['ds'], prophet_df['y'], 'ko', markersize=2, alpha=0.6, label='Historical Data')
plt.plot(future_enhanced_viz['ds'], future_enhanced_viz['yhat'], 'r-', linewidth=2, label='Enhanced Forecast')
plt.fill_between(future_enhanced_viz['ds'], future_enhanced_viz['yhat_lower'], future_enhanced_viz['yhat_upper'], 
                alpha=0.3, color='red')
plt.title('Enhanced Prophet Model Forecast (with Holidays & Regressors)', fontsize=14, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Sales ($)')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()
Subtask 3.7: Analyze Holiday and Regressor Effects
# Analyze holiday effects
print("Holiday Effects Analysis:")
print("=" * 50)

holiday_effects = enhanced_model.params['holidays']
for holiday in all_holidays['holiday'].unique():
    effect = holiday_effects[holiday_effects.index.str.contains(holiday)]
    if not effect.empty:
        avg_effect = effect.mean()
        print(f"{holiday.title()}: ${avg_effect:.2f} average impact")

# Analyze regressor effects
print("\nRegressor Effects Analysis:")
print("=" * 50)

regressor_effects = enhanced_model.params['beta']
for regressor in ['marketing_spend', 'promotion', 'temperature']:
    if regressor in regressor_effects.index:
        effect = regressor_effects[regressor]
        print(f"{regressor.replace('_', ' ').title()}: {effect:.4f} coefficient")

# Create regressor impact visualization
plt.figure(figsize=(15, 5))

# Marketing spend impact
plt.subplot(1, 3, 1)
marketing_impact = enhanced_forecast['marketing_spend'] * regressor_effects['marketing_spend']
plt.plot(enhanced_forecast['ds'], marketing_impact, 'g-', linewidth=2)
plt.title('Marketing Spend Impact', fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Sales Impact ($)')
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)

# Promotion impact
plt.subplot(1, 3, 2)
promotion_impact = enhanced_forecast['promotion'] * regressor_effects['promotion']
plt.plot(enhanced_forecast['ds'], promotion_impact, 'orange', linewidth=2)
plt.title('Promotion Impact', fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Sales Impact ($)')
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)

# Temperature impact
plt.subplot(1, 3, 3)
temperature_impact = enhanced_forecast['temperature'] * regressor_effects['temperature']
plt.plot(enhanced_forecast['ds'], temperature_impact, 'purple', linewidth=2)
plt.title('Temperature Impact', fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Sales Impact ($)')
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

In [None]:
# Generate business insights
print("BUSINESS INSIGHTS AND RECOMMENDATIONS")
print("=" * 60)

# Future forecast summary
future_forecast = enhanced_forecast[enhanced_forecast['ds'] > prophet_df['ds'].max()]
avg_future_sales = future_forecast['yhat'].mean()
total_future_sales = future_forecast['yhat'].sum()

print(f"Next {future_periods} Days Forecast Summary:")
print(f"- Average Daily Sales: ${avg_future_sales:,.2f}")
print(f"- Total Projected Sales: ${total_future_sales:,.2f}")
print(f"- Confidence Range: ${future_forecast['yhat_lower'].mean():,.2f} - ${future_forecast['yhat_upper'].mean():,.2f}")

# Identify peak and low periods
max_sales_date = future_forecast.loc[future_forecast['yhat'].idxmax(), 'ds']
min_sales_date = future_forecast.loc[future_forecast['yhat'].idxmin(), 'ds']
max_sales_value = future_forecast['yhat'].max()
min_sales_value = future_forecast['yhat'].min()

print(f"\nPeak Performance:")
print(f"- Highest Sales Day: {max_sales_date.strftime('%Y-%m-%d')} (${max_sales_value:,.2f})")
print(f"- Lowest Sales Day: {min_sales_date.strftime('%Y-%m-%d')} (${min_sales_value:,.2f})")

# Marketing recommendations
high_marketing_days = future_enhanced[future_enhanced['marketing_spend'] > future_enhanced['marketing_spend'].quantile(0.75)]
print(f"\nMarketing Insights:")
print(f"- High marketing spend planned for {len(high_marketing_days)} days")
print(f"- Expected ROI from marketing: {regressor_effects['marketing_spend']*1000:.2f}$ per $1000 spent")

# Seasonal recommendations
print(f"\nSeasonal Recommendations:")
print(f"- Weekly seasonality shows {weekly_seasonal:.0f}$ variation")
print(f"- Plan inventory and staffing around weekly patterns")
print(f"- Annual seasonality shows {yearly_seasonal:.0f}$ variation")

print("\nModel Performance Summary:")
print(f"- Enhanced model shows {((mae_basic - mae_enhanced) / mae_basic * 100):.1f}% improvement over basic model")
print(f"- Holiday effects successfully captured")
print(f"- External regressors provide additional predictive power")

'''Troubleshooting Common Issues

Issue 1: Date Format Problems
# If you encounter date parsing issues
# Ensure your date column is properly formatted
df['ds'] = pd.to_datetime(df['ds'], format='%Y-%m-%d')
# For different date formats, specify the format
# df['ds'] = pd.to_datetime(df['ds'], format='%m/%d/%Y')

Issue 2: Missing Values in Regressors
# Handle missing values in regressors
future_df['regressor_name'].fillna(method='ffill', inplace=True)
# Or use interpolation
future_df['regressor_name'].interpolate(inplace=True)

Issue 3: Model Convergence Issues
# If model doesn't converge, try adjusting parameters
model = Prophet(
    changepoint_prior_scale=0.01,  # Reduce for less flexible trend
    seasonality_prior_scale=1.0,   # Reduce for less flexible seasonality
    holidays_prior_scale=1.0       # Reduce for less flexible holidays
)'''