In [1]:
!pip install statsmodels
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_absolute_percentage_error

data = pd.read_csv('2023-2024-Horana-cleaned.csv') 
data['DATE'] = pd.to_datetime(data['DATE'])
data['NetAmount'] = data['NetAmount'].abs()

daily_sales = data.groupby(['DATE', 'SalesPersonCode'])['NetAmount'].sum().reset_index()

def evaluate_model_daily(sales_code):
    person_data = daily_sales[daily_sales['SalesPersonCode'] == sales_code]
    person_data = person_data.set_index('DATE').asfreq('D').fillna(0)  # Ensure daily frequency

    train = person_data.iloc[:-7]  # Training data
    test = person_data.iloc[-7:]   # Last 7 days for evaluation

    try:
        model = ARIMA(train['NetAmount'], order=(5, 1, 0))
        model_fit = model.fit()
        forecast = model_fit.forecast(steps=7)

        results_df = pd.DataFrame({
            'DATE': pd.date_range(start=test.index[-1] + pd.Timedelta(days=1), periods=7, freq='D'),
            'Predicted NetAmount': forecast.values
        })

        mape = mean_absolute_percentage_error(test['NetAmount'], forecast) * 100
        print(f"Model Evaluation (ARIMA) for {sales_code}: MAPE: {mape:.2f}%")

        if mape > 40:
            print(f"High MAPE ({mape:.2f}%). Trying ETS model for {sales_code}.")
            return evaluate_ets_daily(sales_code, test)

        return results_df

    except Exception as e:
        print(f"ARIMA failed for {sales_code} due to {e}. Switching to ETS...")
        return evaluate_ets_daily(sales_code, test)

def evaluate_ets_daily(sales_code, test):
    person_data = daily_sales[daily_sales['SalesPersonCode'] == sales_code]
    
    if len(person_data) < 10:
        print(f"Not enough data for {sales_code}, using mean-based prediction.")
        mean_forecast = [person_data['NetAmount'].mean()] * 7
        return pd.DataFrame({'DATE': pd.date_range(start=test.index[-1] + pd.Timedelta(days=1), periods=7, freq='D'),
                             'Predicted NetAmount': mean_forecast})

    train = person_data.iloc[:-7]

    model = ExponentialSmoothing(train['NetAmount'], trend="add", seasonal=None)
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=7)

    results_df = pd.DataFrame({
        'DATE': pd.date_range(start=test.index[-1] + pd.Timedelta(days=1), periods=7, freq='D'),
        'Predicted NetAmount': forecast.values
    })

    mape = mean_absolute_percentage_error(test['NetAmount'], forecast) * 100
    print(f"Model Evaluation (ETS) for {sales_code}: MAPE: {mape:.2f}%")

    return results_df

sales_code = "254"  
forecast_results = evaluate_model_daily(sales_code)
print(forecast_results)


Model Evaluation (ARIMA) for 254: MAPE: 29.00%
        DATE  Predicted NetAmount
0 2025-01-01        173583.292294
1 2025-01-02        171408.384007
2 2025-01-03        179209.143717
3 2025-01-04        191182.312598
4 2025-01-05        178874.497916
5 2025-01-06        181174.418667
6 2025-01-07        179734.586818


In [3]:
import pandas as pd
import numpy as np
import shap
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_absolute_percentage_error

data = pd.read_csv('2023-2024-Horana-cleaned.csv') 
data['DATE'] = pd.to_datetime(data['DATE'])
data['NetAmount'] = data['NetAmount'].abs()

daily_sales = data.groupby(['DATE', 'SalesPersonCode'])['NetAmount'].sum().reset_index()

def evaluate_model_daily(sales_code):
    person_data = daily_sales[daily_sales['SalesPersonCode'] == sales_code]
    person_data = person_data.set_index('DATE').asfreq('D').fillna(0)  # Ensure daily frequency

    train = person_data.iloc[:-7] 
    test = person_data.iloc[-7:]   

    try:
        model = ARIMA(train['NetAmount'], order=(5, 1, 0))
        model_fit = model.fit()
        forecast = model_fit.forecast(steps=7)

        results_df = pd.DataFrame({
            'DATE': pd.date_range(start=test.index[-1] + pd.Timedelta(days=1), periods=7, freq='D'),
            'Predicted NetAmount': forecast.values
        })

        mape = mean_absolute_percentage_error(test['NetAmount'], forecast) * 100
        print(f"Model Evaluation (ARIMA) for {sales_code}: MAPE: {mape:.2f}%")

        if mape > 40:
            print(f"High MAPE ({mape:.2f}%). Trying ETS model for {sales_code}.")
            return evaluate_ets_daily(sales_code, test)

        generate_shap_report(model_fit, train, test)

        return results_df

    except Exception as e:
        print(f"ARIMA failed for {sales_code} due to {e}. Switching to ETS...")
        return evaluate_ets_daily(sales_code, test)

def evaluate_ets_daily(sales_code, test):
    person_data = daily_sales[daily_sales['SalesPersonCode'] == sales_code]
    
    if len(person_data) < 10:
        print(f"Not enough data for {sales_code}, using mean-based prediction.")
        mean_forecast = [person_data['NetAmount'].mean()] * 7
        return pd.DataFrame({'DATE': pd.date_range(start=test.index[-1] + pd.Timedelta(days=1), periods=7, freq='D'),
                             'Predicted NetAmount': mean_forecast})

    train = person_data.iloc[:-7]

    model = ExponentialSmoothing(train['NetAmount'], trend="add", seasonal=None)
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=7)

    results_df = pd.DataFrame({
        'DATE': pd.date_range(start=test.index[-1] + pd.Timedelta(days=1), periods=7, freq='D'),
        'Predicted NetAmount': forecast.values
    })

    mape = mean_absolute_percentage_error(test['NetAmount'], forecast) * 100
    print(f"Model Evaluation (ETS) for {sales_code}: MAPE: {mape:.2f}%")

    return results_df

def generate_shap_report(model_fit, train, test):
    explainer = shap.Explainer(model_fit.predict, train['NetAmount'])
    shap_values = explainer(test['NetAmount'])

    plt.figure(figsize=(10, 6))
    shap.summary_plot(shap_values, show=False)
    plt.title("SHAP Report for ARIMA Model")
    plt.show()

sales_code = "265"  
forecast_results = evaluate_model_daily(sales_code)
print(forecast_results)


Model Evaluation (ARIMA) for 265: MAPE: 34.30%
ARIMA failed for 265 due to tuple index out of range. Switching to ETS...
Model Evaluation (ETS) for 265: MAPE: 33.24%
        DATE  Predicted NetAmount
0 2025-01-01        149007.728762
1 2025-01-02        151650.037461
2 2025-01-03        154292.346159
3 2025-01-04        156934.654858
4 2025-01-05        159576.963557
5 2025-01-06        162219.272255
6 2025-01-07        164861.580954


  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


In [5]:
import shap

explainer = shap.DeepExplainer(model, X_train)
shap_values = explainer.shap_values(X_test)

shap.initjs()
shap.force_plot(explainer.expected_value[0], shap_values[0][0], X_test[0], feature_names=["NetAmount"])

shap.summary_plot(shap_values, X_test, feature_names=["NetAmount"])

shap.dependence_plot(0, shap_values[0], X_test, feature_names=["NetAmount"])

Expected: keras_tensor
Received: inputs=['Tensor(shape=(9, 12, 1))']
Expected: keras_tensor
Received: inputs=['Tensor(shape=(18, 12, 1))']


StagingError: in user code:

    File "/home/pasiya/sales_forcast/jupytercode/tf/lib/python3.10/site-packages/shap/explainers/_deep/deep_tf.py", line 269, in grad_graph  *
        x_grad = tape.gradient(out, shap_rAnD)

    LookupError: gradient registry has no entry for: shap_TensorListStack
