# 📈 Sales Forecasting with XGBoost vs ARIMA (Simulated Example)

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Simulate the sales data
np.random.seed(42)
dates = pd.date_range(start='2022-01-01', periods=200)
sales = np.random.normal(loc=8000, scale=1500, size=len(dates))
trend = np.linspace(0, 1000, len(dates))
seasonality = 500 * np.sin(np.linspace(0, 3 * np.pi, len(dates)))
noise = np.random.normal(0, 300, len(dates))
sales_data = sales + trend + seasonality + noise

df = pd.DataFrame({'Date': dates, 'Sales': sales_data})

# Train-test split
train_size = int(len(df) * 0.8)
train = df[:train_size]
test = df[train_size:]

# ARIMA-like baseline: mean + noise
arima_pred = test['Sales'].mean() + np.random.normal(0, 500, len(test))

# XGBoost-like simulated predictions
xgb_pred = test['Sales'] + np.random.normal(0, 300, len(test)) * 0.7

# Evaluate
mae_arima = mean_absolute_error(test['Sales'], arima_pred)
rmse_arima = np.sqrt(mean_squared_error(test['Sales'], arima_pred))
mae_xgb = mean_absolute_error(test['Sales'], xgb_pred)
rmse_xgb = np.sqrt(mean_squared_error(test['Sales'], xgb_pred))

print(f"ARIMA -> MAE: {mae_arima:.2f}, RMSE: {rmse_arima:.2f}")
print(f"XGBoost -> MAE: {mae_xgb:.2f}, RMSE: {rmse_xgb:.2f}")


In [None]:

# Plot results
plt.figure(figsize=(12, 6))
plt.plot(test['Date'], test['Sales'], label='Actual Sales', linewidth=2)
plt.plot(test['Date'], arima_pred, label='ARIMA Prediction', linestyle='--')
plt.plot(test['Date'], xgb_pred, label='XGBoost Prediction', linestyle='--')
plt.title('Sales Forecasting: Actual vs Predicted')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
