In [1]:
# 🛒 FMCG Sales Forecasting - Streamlit Dashboard

# --- 1. Install streamlit (if not already installed) ---
!pip install streamlit prophet xgboost --quiet

# --- 2. Save this file as app.py when deploying ---
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import statsmodels.api as sm
from prophet import Prophet
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt

# --- 3. Load dataset ---
data_path = "/kaggle/input/fmcg-daily-sales-data-to-2022-2024/"
df = pd.read_csv(os.path.join(data_path, "FMCG_2022_2024.csv"))

df['date'] = pd.to_datetime(df['date'])
df['revenue'] = df['units_sold'] * df['price_unit']

daily = (
    df.groupby('date')['revenue']
    .sum()
    .reset_index()
    .rename(columns={'revenue': 'daily_revenue'})
)

# --- 4. Streamlit UI ---
st.title("📊 FMCG Sales Forecasting Dashboard (2022–2024)")

st.sidebar.header("⚙️ Settings")
model_choice = st.sidebar.selectbox("Select Forecast Model", ["ARIMA", "Prophet", "XGBoost"])
forecast_days = st.sidebar.slider("Forecast Horizon (days)", 30, 180, 90)

st.write("### Historical Daily Revenue")
st.line_chart(daily.set_index("date")["daily_revenue"])

# --- 5. Train/Test Split ---
train = daily.iloc[:-forecast_days]
test = daily.iloc[-forecast_days:]

# --- 6. Forecasting Functions ---
def evaluate(y_true, y_pred):
    rmse = sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return rmse, mae, mape

if model_choice == "ARIMA":
    model = sm.tsa.ARIMA(train['daily_revenue'], order=(5,1,2))
    fit = model.fit()
    forecast = fit.forecast(steps=forecast_days)
    y_pred = forecast
    rmse, mae, mape = evaluate(test['daily_revenue'], y_pred)
    st.subheader("ARIMA Forecast")
    st.line_chart(pd.DataFrame({"Actual": test['daily_revenue'].values,
                                "Forecast": y_pred.values},
                                index=test['date']))

elif model_choice == "Prophet":
    prophet_df = train.rename(columns={'date':'ds','daily_revenue':'y'})
    model = Prophet(yearly_seasonality=True)
    model.fit(prophet_df)
    future = model.make_future_dataframe(periods=forecast_days)
    forecast = model.predict(future)
    y_pred = forecast[['ds','yhat']].set_index('ds').loc[test['date']]
    rmse, mae, mape = evaluate(test['daily_revenue'], y_pred['yhat'])
    st.subheader("Prophet Forecast")
    st.line_chart(pd.DataFrame({"Actual": test['daily_revenue'].values,
                                "Forecast": y_pred['yhat'].values},
                                index=test['date']))

elif model_choice == "XGBoost":
    lag_df = daily.copy()
    for lag in [1,7,30]:
        lag_df[f"lag_{lag}"] = lag_df['daily_revenue'].shift(lag)
    lag_df = lag_df.dropna()
    train_lag = lag_df.iloc[:-forecast_days]
    test_lag = lag_df.iloc[-forecast_days:]
    X_train, y_train = train_lag.drop(['date','daily_revenue'], axis=1), train_lag['daily_revenue']
    X_test, y_test = test_lag.drop(['date','daily_revenue'], axis=1), test_lag['daily_revenue']
    xgb = XGBRegressor(n_estimators=200, learning_rate=0.1, max_depth=5)
    xgb.fit(X_train, y_train)
    y_pred = xgb.predict(X_test)
    rmse, mae, mape = evaluate(y_test, y_pred)
    st.subheader("XGBoost Forecast")
    st.line_chart(pd.DataFrame({"Actual": test_lag['daily_revenue'].values,
                                "Forecast": y_pred},
                                index=test_lag['date']))

# --- 7. Display Metrics ---
st.write("### 📈 Model Performance")
st.write(f"**RMSE:** {rmse:.2f}")
st.write(f"**MAE:** {mae:.2f}")
st.write(f"**MAPE:** {mape:.2f}%")

st.success("✅ Forecasting complete!")


[0m[31mERROR: Could not find a version that satisfies the requirement streamlit (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for streamlit[0m[31m
[0m

ModuleNotFoundError: No module named 'streamlit'