In [1]:
!pip install pandas numpy joblib scikit-learn
!pip install prophet
!pip install streamlit pyngrok

Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.4.0-py3-none-any.whl.metadata (8.1 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m76.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.4.0-py3-none-any.whl (25 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m101.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, pydeck, streamlit
Successfully installed pydeck-0.9.1 pyngrok-7.4.0 streamlit-1.50.0


In [2]:
import pandas as pd
import numpy as np
from prophet import Prophet
import joblib
from sklearn.metrics import mean_absolute_percentage_error

try:
    df_train = pd.read_csv('/content/train.csv', low_memory=False)
    df_store = pd.read_csv('/content/store.csv', low_memory=False)
except FileNotFoundError:
    print(" ERROR: One or more data files (train.csv, store.csv) are missing.")
    exit()

df_merged = pd.merge(df_train, df_store, on='Store', how='left')

store_id = 1
df_prophet = df_merged[df_merged['Store'] == store_id].copy()

df_prophet = df_prophet[(df_prophet['Open'] == 1) & (df_prophet['Sales'] > 0)]

df_prophet = df_prophet.rename(columns={'Date': 'ds', 'Sales': 'y'})
df_prophet['ds'] = pd.to_datetime(df_prophet['ds'])

TEST_SIZE = 90
df_train_eval = df_prophet.iloc[:-TEST_SIZE]
df_test_eval = df_prophet.iloc[-TEST_SIZE:]

df_prophet.to_csv('full_store_data.csv', index=False)

print(f" Data prepared and split successfully. Training on {df_train_eval.shape[0]} days, testing on {df_test_eval.shape[0]} days.")

✅ Data prepared and split successfully. Training on 330 days, testing on 90 days.


In [3]:
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    y_true[y_true == 0] = 1e-6
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


model_eval = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    seasonality_mode='additive',
    n_changepoints=5
)

print("⏳ Starting model evaluation training (on Train_Eval Set)...")
model_eval.fit(df_train_eval)

future_test = df_test_eval[['ds']].copy()
forecast_test = model_eval.predict(future_test)

y_true = df_test_eval['y'].values
y_pred = forecast_test['yhat'].values
y_pred[y_pred < 0] = 0

mape = mean_absolute_percentage_error(y_true, y_pred)
rmse = sqrt(mean_squared_error(y_true, y_pred))
mae = mean_absolute_error(y_true, y_pred)

print("\n---  Model Performance Metrics on Test Set (Last 90 Days) ---")
print(f"RMSE (Root Mean Square Error): {rmse:.2f}")
print(f"MAE (Mean Absolute Error): {mae:.2f}")
print(f"MAPE (Mean Absolute Percentage Error): {mape:.2f}% (STABLE ADDITIVE MODEL)")
print("---------------------------------------------------------")



MAPE_SCORE = mape
joblib.dump(MAPE_SCORE, 'mape_score.pkl')

print("\n⏳ Retraining FINAL PRODUCTION model on FULL historical data...")
final_model = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    seasonality_mode='additive',
    n_changepoints=5
)
final_model.fit(df_prophet)

MODEL_FILE_NAME = 'prophet_final_model_store_1.pkl'
joblib.dump(final_model, MODEL_FILE_NAME)

print(f" Final Production Model saved to: {MODEL_FILE_NAME}")
print(f" MAPE score saved to 'mape_score.pkl'.")

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmp0ha4ufof/_xunqk9a.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp0ha4ufof/lz19iauq.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.12/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=65304', 'data', 'file=/tmp/tmp0ha4ufof/_xunqk9a.json', 'init=/tmp/tmp0ha4ufof/lz19iauq.json', 'output', 'file=/tmp/tmp0ha4ufof/prophet_model1hkkoxzn/prophet_model-20250929235604.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
23:56:04 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
23:56:04 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


⏳ Starting model evaluation training (on Train_Eval Set)...


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmp0ha4ufof/xy8s5ro7.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp0ha4ufof/p4b17syq.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.12/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=92751', 'data', 'file=/tmp/tmp0ha4ufof/xy8s5ro7.json', 'init=/tmp/tmp0ha4ufof/p4b17syq.json', 'output', 'file=/tmp/tmp0ha4ufof/prophet_modelve_uuovz/prophet_model-20250929235604.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
23:56:04 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
23:56:04 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing



--- 📊 Model Performance Metrics on Test Set (Last 90 Days) ---
RMSE (Root Mean Square Error): 922.35
MAE (Mean Absolute Error): 787.05
MAPE (Mean Absolute Percentage Error): 17.74% (STABLE ADDITIVE MODEL)
---------------------------------------------------------

⏳ Retraining FINAL PRODUCTION model on FULL historical data...
✅ Final Production Model saved to: prophet_final_model_store_1.pkl
✅ MAPE score saved to 'mape_score.pkl'.


In [4]:
%%writefile app.py
import streamlit as st
import joblib
import pandas as pd
from prophet import Prophet
import plotly.graph_objects as go
import numpy as np

MODEL_FILE = 'prophet_final_model_store_1.pkl'
DATA_FILE = 'full_store_data.csv'
MAPE_FILE = 'mape_score.pkl'

@st.cache_resource
def load_assets():
    try:
        model = joblib.load(MODEL_FILE)
        df_history = pd.read_csv(DATA_FILE)
        df_history['ds'] = pd.to_datetime(df_history['ds'])
        mape_score = joblib.load(MAPE_FILE)
        return model, df_history, mape_score
    except Exception as e:
        st.error(f" ERROR loading assets: Ensure .pkl and .csv files exist after training. Error: {e}")
        return None, None, 0.0

model, df_history, mape_score = load_assets()

st.set_page_config(page_title="Sales Forecasting", layout="wide")

st.title("💰 Sales Forecast for Store 1 (Rossmann)")
st.markdown("This model provides a 60-day sales forecast, crucial for inventory and financial planning.")
st.markdown(f"**Model Performance (MAPE on Test Set): {mape_score:.2f}%**")
st.divider()

if model is not None and not df_history.empty:

    future_periods = 60
    future = model.make_future_dataframe(periods=future_periods, include_history=True)
    forecast = model.predict(future)


    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df_history['ds'], y=df_history['y'], mode='lines', name='Actual Sales', line=dict(color='blue')
    ))

    fig.add_trace(go.Scatter(
        x=forecast['ds'], y=forecast['yhat'], mode='lines', name='Forecast', line=dict(color='red', dash='dash')
    ))

    fig.add_trace(go.Scatter(
        x=forecast['ds'], y=forecast['yhat_upper'], line=dict(color='rgba(255, 0, 0, 0)'), showlegend=False
    ))
    fig.add_trace(
        go.Scatter(
            x=forecast['ds'], y=forecast['yhat_lower'], fill='tonexty', fillcolor='rgba(255, 0, 0, 0.2)',
            line=dict(color='rgba(255, 0, 0, 0)'), name='80% Confidence Interval'
        )
    )

    last_historical_date = df_history['ds'].max()
    fig.add_vrect(
        x0=last_historical_date, x1=forecast['ds'].max(),
        fillcolor="yellow", opacity=0.1, line_width=0,
        annotation_text="Forecast Period", annotation_position="top left"
    )

    fig.update_layout(
        title='Historical Data vs. 60-Day Sales Forecast',
        yaxis_title="Sales",
        xaxis_title="Date",
        hovermode="x unified"
    )

    st.plotly_chart(fig, use_container_width=True)

    st.subheader("Numerical Forecast for the Next 60 Days")

    future_60_days = forecast[forecast['ds'] > last_historical_date]

    st.dataframe(
        future_60_days[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
        .rename(columns={'ds': 'Date', 'yhat': 'Forecasted Sales', 'yhat_lower': 'Lower Bound', 'yhat_upper': 'Upper Bound'}),
        hide_index=True
    )
else:
    st.warning(" Please ensure the model training and saving steps (Cells 2 & 3) have been executed successfully.")

Writing app.py


In [6]:
from pyngrok import ngrok
import subprocess
import time

ngrok.set_auth_token("33N8aDY3T5EQb72svd5Sy3jy3G8_4z8iguAK4mpfxkCd1Aah4") # تأكد أن هذا السطر يبدأ من أقصى اليسار

print(" Starting Streamlit deployment...")
p = subprocess.Popen(['streamlit', 'run', 'app.py'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

time.sleep(5)

try:
    public_url = ngrok.connect(8501)
    print(f"\n---  Streamlit App Deployed! ---")
    print(f"Public URL: {public_url}")
    print("\nClick the link above to access the forecasting app.")

except Exception as e:
    print(f"\n ngrok connection failed: {e}")
    print("Ensure you have a valid ngrok Authtoken and the line is uncommented.")

⏳ Starting Streamlit deployment...

---  Streamlit App Deployed! ---
Public URL: NgrokTunnel: "https://gratuitous-debroah-distraughtly.ngrok-free.dev" -> "http://localhost:8501"

Click the link above to access the forecasting app.
