# Sales Forecasting using ARIMA
This notebook demonstrates sales trend analysis and forecasting using ARIMA model.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load the Dataset
df = pd.read_csv('sample_sales_data.csv')
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df.head()

In [None]:
plt.figure(figsize=(12,6))
plt.plot(df['Sales'], label='Sales')
plt.title('Sales Over Time')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.show()

In [None]:
df['Sales_MA30'] = df['Sales'].rolling(window=30).mean()
plt.figure(figsize=(12,6))
plt.plot(df['Sales'], label='Original Sales')
plt.plot(df['Sales_MA30'], label='30-Day Moving Average', color='red')
plt.title('Sales with Moving Average')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.show()

In [None]:
result = adfuller(df['Sales'].dropna())
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
if result[1] > 0.05:
    print('Series is Non-Stationary, applying differencing...')
    df['Sales_diff'] = df['Sales'] - df['Sales'].shift(1)
    result = adfuller(df['Sales_diff'].dropna())
    print(f'After Differencing ADF Statistic: {result[0]}')
    print(f'After Differencing p-value: {result[1]}')

In [None]:
plot_acf(df['Sales_diff'].dropna(), lags=30)
plt.show()
plot_pacf(df['Sales_diff'].dropna(), lags=30)
plt.show()

In [None]:
model = ARIMA(df['Sales'], order=(1,1,1))
model_fit = model.fit()
print(model_fit.summary())

In [None]:
forecast_steps = 30  # Forecast next 30 days
forecast = model_fit.forecast(steps=forecast_steps)
forecast_dates = pd.date_range(start=df.index[-1]+pd.Timedelta(days=1), periods=forecast_steps)
forecast_df = pd.DataFrame({'Date': forecast_dates, 'Forecasted_Sales': forecast.values})
forecast_df

In [None]:
plt.figure(figsize=(12,6))
plt.plot(df['Sales'], label='Historical Sales')
plt.plot(forecast_df['Date'], forecast_df['Forecasted_Sales'], label='Forecasted Sales', color='green')
plt.title('Actual vs Forecasted Sales')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.show()

In [None]:
# Optional Evaluation if test data is available
y_true = df['Sales'][-forecast_steps:]
y_pred = model_fit.predict(start=len(df)-forecast_steps, end=len(df)-1, typ='levels')
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mape = mean_absolute_percentage_error(y_true, y_pred)
print(f'RMSE: {rmse:.2f}')
print(f'MAPE: {mape*100:.2f}%')