# Retail Sales Prediction & Dashboard
This notebook performs EDA and time-series forecasting on `RetailSales.csv` and creates outputs suitable for the internship submission.
Includes:
- Data loading & cleaning
- Exploratory Data Analysis (tables + plots)
- Time-series forecasting (SARIMAX fallback if Prophet not available)
- Save forecast to `forecast.csv`

Run all cells in order.

In [None]:
# 1. Load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import warnings
warnings.filterwarnings('ignore')

# For forecasting fallback
try:
    from prophet import Prophet
    PROPHET_AVAILABLE = True
except Exception:
    PROPHET_AVAILABLE = False
    from statsmodels.tsa.statespace.sarimax import SARIMAX

print('Prophet available:', PROPHET_AVAILABLE)


In [None]:
# 2. Load dataset
df = pd.read_csv('/mnt/data/RetailSales.csv')
print('Shape:', df.shape)
df.head()


In [None]:
# 3. Preprocess
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')
df = df.sort_values('Date')
# Ensure numeric
df['Total_Sales'] = pd.to_numeric(df['Total_Sales'], errors='coerce')
print('Date range:', df['Date'].min(), 'to', df['Date'].max())
display(df.info())


In [None]:
# 4. Aggregate to daily sales
daily = df.groupby('Date', as_index=True)['Total_Sales'].sum().rename('Sales').to_frame()
daily = daily.asfreq('D').fillna(0)
daily.head()


In [None]:
# 5. EDA - plots
plt.figure(figsize=(12,5))
plt.plot(daily.index, daily['Sales'])
plt.title('Daily Total Sales')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.tight_layout()
plt.show()


In [None]:
# Category-wise summary
cat = df.groupby('Category')['Total_Sales'].sum().sort_values(ascending=False).to_frame()
cat.columns = ['Total_Sales']
cat


In [None]:
# Top products
top_products = df.groupby('Product_Name')['Total_Sales'].sum().sort_values(ascending=False).head(10).to_frame()
top_products.columns=['Total_Sales']
top_products


In [None]:
# 6. Forecasting - next 30 days
H = 30
if PROPHET_AVAILABLE:
    m = Prophet()
    prophet_df = daily.reset_index().rename(columns={'Date':'ds','Sales':'y'})
    m.fit(prophet_df)
    future = m.make_future_dataframe(periods=H)
    fcst = m.predict(future)[['ds','yhat','yhat_lower','yhat_upper']].set_index('ds')
    forecast = fcst[['yhat','yhat_lower','yhat_upper']].rename(columns={'yhat':'Forecast'})
else:
    # SARIMAX fallback: simple approach
    series = daily['Sales'].fillna(0)
    # Fit a simple SARIMAX(1,1,1)(1,1,1,7) to capture weekly seasonality
    model = SARIMAX(series, order=(1,1,1), seasonal_order=(1,1,1,7), enforce_stationarity=False, enforce_invertibility=False)
    res = model.fit(disp=False)
    pred = res.get_forecast(steps=H)
    idx = pd.date_range(start=series.index[-1]+pd.Timedelta(days=1), periods=H, freq='D')
    forecast = pd.DataFrame({
        'Forecast': pred.predicted_mean.values,
        'Lower': pred.conf_int().iloc[:,0].values,
        'Upper': pred.conf_int().iloc[:,1].values
    }, index=idx)

forecast.to_csv('/mnt/data/forecast.csv')
print('Forecast saved to /mnt/data/forecast.csv')
forecast.head()


## Conclusions & Next steps
- Forecast file `forecast.csv` created.
- To build a dashboard, see `app.py` included in the project zip.