<p style="text-align:center; ">
# Store Sales Time Series Forecasting
</p>

This notebook implements time series forecasting for store sales data using multiple models (SARIMA and Prophet) to predict future trends based on historical data.

1. Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Set style for better visualizations
plt.style.use('seaborn')
%matplotlib inline

2. Load and Merge Datasets

In [None]:
# Load all datasets
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
stores_df = pd.read_csv('stores.csv')
features_df = pd.read_csv('features.csv')

# Display basic information about datasets
print("Train Dataset Info:")
print(train_df.info())
print("\nTest Dataset Info:")
print(test_df.info())
print("\nStores Dataset Info:")
print(stores_df.info())
print("\nFeatures Dataset Info:")
print(features_df.info())

3. Data Preprocessing

In [None]:
# Convert date columns to datetime
train_df['Date'] = pd.to_datetime(train_df['Date'])
test_df['Date'] = pd.to_datetime(test_df['Date'])
features_df['Date'] = pd.to_datetime(features_df['Date'])

# Merge datasets
train_data = train_df.merge(stores_df, on='Store', how='left')
train_data = train_data.merge(features_df, on=['Store', 'Date'], how='left')

# Sort by date
train_data = train_data.sort_values('Date')

4. Exploratory Data Analysis

In [None]:
# Time series plot of sales
plt.figure(figsize=(15, 6))
sns.lineplot(data=train_data, x='Date', y='Weekly_Sales')
plt.title('Weekly Sales Over Time')
plt.xticks(rotation=45)
plt.show()

# Sales distribution by store type
plt.figure(figsize=(10, 6))
sns.boxplot(data=train_data, x='Type', y='Weekly_Sales')
plt.title('Sales Distribution by Store Type')
plt.show()

5. Feature Engineering

In [None]:
# Extract time-based features
train_data['Year'] = train_data['Date'].dt.year
train_data['Month'] = train_data['Date'].dt.month
train_data['Week'] = train_data['Date'].dt.isocalendar().week

# Calculate lag features
train_data['Sales_Lag1'] = train_data.groupby('Store')['Weekly_Sales'].shift(1)
train_data['Sales_Lag2'] = train_data.groupby('Store')['Weekly_Sales'].shift(2)

6. Model Development - SARIMA

In [None]:
def train_sarima(data, store_id=1):
    # Filter data for specific store
    store_data = data[data['Store'] == store_id].sort_values('Date')
    
    # Fit SARIMA model
    model = SARIMAX(store_data['Weekly_Sales'],
                    order=(1, 1, 1),
                    seasonal_order=(1, 1, 1, 52))
    results = model.fit()
    
    return results

# Train model for store 1
sarima_model = train_sarima(train_data, store_id=1)

7. Model Development - Prophet

In [None]:
def train_prophet(data, store_id=1):
    # Filter and prepare data for Prophet
    store_data = data[data['Store'] == store_id].sort_values('Date')
    prophet_data = store_data[['Date', 'Weekly_Sales']].rename(
        columns={'Date': 'ds', 'Weekly_Sales': 'y'})
    
    # Initialize and train Prophet model
    model = Prophet(yearly_seasonality=True,
                   weekly_seasonality=True,
                   daily_seasonality=False)
    model.fit(prophet_data)
    
    return model

# Train Prophet model for store 1
prophet_model = train_prophet(train_data, store_id=1)

8. Model Evaluation

In [None]:
def evaluate_models(y_true, y_pred_sarima, y_pred_prophet):
    # Calculate metrics for both models
    metrics = {}
    
    for name, pred in [('SARIMA', y_pred_sarima), ('Prophet', y_pred_prophet)]:
        mae = mean_absolute_error(y_true, pred)
        rmse = np.sqrt(mean_squared_error(y_true, pred))
        mape = np.mean(np.abs((y_true - pred) / y_true)) * 100
        
        metrics[name] = {
            'MAE': mae,
            'RMSE': rmse,
            'MAPE': mape
        }
        
        print(f'{name} Metrics:')
        print(f'MAE: {mae:.2f}')
        print(f'RMSE: {rmse:.2f}')
        print(f'MAPE: {mape:.2f}%\n')
    
    return metrics

9. Visualization of Results

In [None]:
def plot_forecasts(actual, sarima_pred, prophet_pred, dates):
    fig = go.Figure()
    
    # Plot actual values
    fig.add_trace(go.Scatter(x=dates, y=actual,
                            mode='lines',
                            name='Actual Sales',
                            line=dict(color='blue')))
    
    # Plot SARIMA predictions
    fig.add_trace(go.Scatter(x=dates, y=sarima_pred,
                            mode='lines',
                            name='SARIMA Forecast',
                            line=dict(color='red')))
    
    # Plot Prophet predictions
    fig.add_trace(go.Scatter(x=dates, y=prophet_pred,
                            mode='lines',
                            name='Prophet Forecast',
                            line=dict(color='green')))
    
    fig.update_layout(title='Sales Forecasts Comparison',
                      xaxis_title='Date',
                      yaxis_title='Weekly Sales',
                      template='plotly_white')
    
    fig.show()