<p style="text-align:center; ">
# Store Sales Time Series Forecasting
</p>

This notebook implements time series forecasting for store sales data using SARIMA and Prophet models.

1. Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Set default style for visualizations
plt.style.use('default')
%matplotlib inline

2. Load and Merge Datasets

In [None]:
# Load all datasets with absolute paths
train_df = pd.read_csv('d:/intern/task3/train.csv')
test_df = pd.read_csv('d:/intern/task3/test.csv')
stores_df = pd.read_csv('d:/intern/task3/stores.csv')
features_df = pd.read_csv('d:/intern/task3/features.csv')

# Display basic information about datasets
print("Train Dataset Info:")
print(train_df.info())
print("\nTest Dataset Info:")
print(test_df.info())
print("\nStores Dataset Info:")
print(stores_df.info())
print("\nFeatures Dataset Info:")
print(features_df.info())

3. Data Preprocessing

In [None]:
# Convert date columns to datetime
train_df['Date'] = pd.to_datetime(train_df['Date'])
test_df['Date'] = pd.to_datetime(test_df['Date'])
features_df['Date'] = pd.to_datetime(features_df['Date'])

# Merge datasets
train_data = train_df.merge(stores_df, on='Store', how='left')
train_data = train_data.merge(features_df, on=['Store', 'Date'], how='left')

# Sort by date
train_data = train_data.sort_values('Date')

# Display first few rows
print("\nMerged Dataset Head:")
print(train_data.head())

4. Exploratory Data Analysis

In [None]:
# Time series plot of sales
plt.figure(figsize=(15, 6))
plt.plot(train_data['Date'], train_data['Weekly_Sales'])
plt.title('Weekly Sales Over Time')
plt.xticks(rotation=45)
plt.show()

# Sales distribution by store type
plt.figure(figsize=(10, 6))
sns.boxplot(data=train_data, x='Type', y='Weekly_Sales')
plt.title('Sales Distribution by Store Type')
plt.show()

# Monthly sales trends
train_data['Month'] = train_data['Date'].dt.month
monthly_sales = train_data.groupby('Month')['Weekly_Sales'].mean().reset_index()

plt.figure(figsize=(12, 6))
plt.bar(monthly_sales['Month'], monthly_sales['Weekly_Sales'])
plt.title('Average Monthly Sales')
plt.xlabel('Month')
plt.ylabel('Average Weekly Sales')
plt.show()

5. Prepare Data for Store 1

In [None]:
# Filter data for Store 1
store_1_data = train_data[train_data['Store'] == 1].sort_values('Date')

# Create time series for Store 1
store_1_sales = store_1_data.set_index('Date')['Weekly_Sales']

# Plot Store 1 sales
plt.figure(figsize=(15, 6))
plt.plot(store_1_sales.index, store_1_sales.values)
plt.title('Store 1 Weekly Sales')
plt.show()

6. SARIMA Model

In [None]:
# Fit SARIMA model for Store 1
sarima_model = SARIMAX(store_1_sales,
                       order=(1, 1, 1),
                       seasonal_order=(1, 1, 1, 52))
sarima_results = sarima_model.fit()

# Generate predictions
sarima_forecast = sarima_results.get_forecast(steps=8)
sarima_mean = sarima_forecast.predicted_mean

# Plot SARIMA results
plt.figure(figsize=(15, 6))
plt.plot(store_1_sales.index, store_1_sales.values, label='Actual')
plt.plot(sarima_mean.index, sarima_mean.values, 'r--', label='SARIMA Forecast')
plt.title('Store 1 Sales - SARIMA Forecast')
plt.legend()
plt.show()

7. Prophet Model

In [None]:
# Prepare data for Prophet
prophet_data = store_1_data[['Date', 'Weekly_Sales']].rename(
    columns={'Date': 'ds', 'Weekly_Sales': 'y'})

# Initialize and train Prophet model
prophet_model = Prophet(yearly_seasonality=True,
                       weekly_seasonality=True,
                       daily_seasonality=False)
prophet_model.fit(prophet_data)

# Create future dates for forecasting
future_dates = prophet_model.make_future_dataframe(periods=8, freq='W')
prophet_forecast = prophet_model.predict(future_dates)

# Plot Prophet results
fig = prophet_model.plot(prophet_forecast)
plt.title('Store 1 Sales - Prophet Forecast')
plt.show()

8. Compare Model Performance

In [None]:
# Calculate metrics for both models
actual_values = store_1_sales[-8:]

# SARIMA metrics
sarima_mae = mean_absolute_error(actual_values, sarima_mean)
sarima_rmse = np.sqrt(mean_squared_error(actual_values, sarima_mean))
sarima_mape = np.mean(np.abs((actual_values - sarima_mean) / actual_values)) * 100

print('SARIMA Metrics:')
print(f'MAE: {sarima_mae:.2f}')
print(f'RMSE: {sarima_rmse:.2f}')
print(f'MAPE: {sarima_mape:.2f}%\n')

# Prophet metrics
prophet_predictions = prophet_forecast.tail(8)['yhat']
prophet_mae = mean_absolute_error(actual_values, prophet_predictions)
prophet_rmse = np.sqrt(mean_squared_error(actual_values, prophet_predictions))
prophet_mape = np.mean(np.abs((actual_values - prophet_predictions) / actual_values)) * 100

print('Prophet Metrics:')
print(f'MAE: {prophet_mae:.2f}')
print(f'RMSE: {prophet_rmse:.2f}')
print(f'MAPE: {prophet_mape:.2f}%')

9. Visualize Forecasts Together

In [None]:
# Create interactive plot with both forecasts
fig = go.Figure()

# Plot actual values
fig.add_trace(go.Scatter(x=store_1_sales.index, 
                        y=store_1_sales.values,
                        mode='lines',
                        name='Actual Sales',
                        line=dict(color='blue')))

# Plot SARIMA forecast
fig.add_trace(go.Scatter(x=sarima_mean.index, 
                        y=sarima_mean.values,
                        mode='lines',
                        name='SARIMA Forecast',
                        line=dict(color='red', dash='dash')))

# Plot Prophet forecast
fig.add_trace(go.Scatter(x=prophet_forecast.tail(8)['ds'], 
                        y=prophet_forecast.tail(8)['yhat'],
                        mode='lines',
                        name='Prophet Forecast',
                        line=dict(color='green', dash='dash')))

fig.update_layout(title='Store 1 Sales Forecasts Comparison',
                  xaxis_title='Date',
                  yaxis_title='Weekly Sales',
                  template='plotly_white')

fig.show()