In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning
import os

# Suppress RuntimeWarning
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.simplefilter('ignore', ConvergenceWarning)

In [2]:
# Create directories to save the plots
trend_seasonal_combinations = [('add', 'add'), ('add', 'mul'), ('mul', 'add'), ('mul', 'mul')]
for trend, seasonal in trend_seasonal_combinations:
    folder_name = f'holt_winters_{trend}_{seasonal}'
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

In [3]:
# Load the data
data = pd.read_csv('/kaggle/input/inventory/inventory.csv')

In [4]:
data = data[(data['Type'] == 'Material') & (data['BFP'] == 0)].copy()

# Convert the 'Date' column to datetime
data['Date'] = pd.to_datetime(data['Date'])

# Set the 'Date' column as the index
data.set_index('Date', inplace=True)

In [5]:
# Get unique material codes
material_codes = data['Material Code'].unique()

# Create a dictionary to hold DataFrames for each material code
material_data = {material: data[data['Material Code'] == material] for material in material_codes}


In [6]:
# Initialize a DataFrame to store the evaluation results
evaluation_results = pd.DataFrame(columns=['Material Code', 'Trend', 'Seasonal', 'MAE', 'MSE', 'RMSE', 'MAPE'])

In [7]:
# Calculate and plot Holt-Winters Exponential Smoothing for each material code
for material, df in tqdm(material_data.items(), total=len(material_data), desc="Processing Material Codes"):
    df = df.dropna()
    if df.empty:
        print(f"No data available for material {material}")
        continue

    # Ensure the index has a frequency
    df = df.asfreq('D')

    # Split the data into training and test sets
    train_size = int(len(df) * 0.8)
    train = df.iloc[:train_size].copy()
    test = df.iloc[train_size:].copy()

    for trend, seasonal in trend_seasonal_combinations:
        # Ensure data is strictly positive for multiplicative components
        if 'mul' in [trend, seasonal]:
            train_positive = train.copy()
            test_positive = test.copy()
            min_value = train_positive['Material Issued'].min()
            if min_value <= 0:
                train_positive['Material Issued'] += abs(min_value) + 1
                test_positive['Material Issued'] += abs(min_value) + 1
        else:
            train_positive = train
            test_positive = test

        # Fit the Holt-Winters model on the training set
        model = ExponentialSmoothing(train_positive['Material Issued'], trend=trend, seasonal=seasonal, seasonal_periods=30)
        fit = model.fit()

        # Forecast using the Holt-Winters model
        forecast = fit.forecast(steps=len(test_positive))
        test_positive[f'Holt-Winters_{trend}_{seasonal}'] = forecast

        # Ensure there are no NaN, infinity, or excessively large values in the forecast
        if test_positive[f'Holt-Winters_{trend}_{seasonal}'].isnull().any() or \
           np.isinf(test_positive[f'Holt-Winters_{trend}_{seasonal}']).any() or \
           np.isnan(test_positive[f'Holt-Winters_{trend}_{seasonal}']).any():
            print(f"Invalid values found in forecast for material {material} with trend {trend} and seasonal {seasonal}")
            continue

        # Calculate evaluation metrics
        mae = mean_absolute_error(test_positive['Material Issued'], test_positive[f'Holt-Winters_{trend}_{seasonal}'])
        mse = mean_squared_error(test_positive['Material Issued'], test_positive[f'Holt-Winters_{trend}_{seasonal}'])
        rmse = np.sqrt(mse)
        mape = mean_absolute_percentage_error(test_positive['Material Issued'], test_positive[f'Holt-Winters_{trend}_{seasonal}'])

        # Append the results to the DataFrame
        result = pd.DataFrame([{
            'Material Code': material,
            'Trend': trend,
            'Seasonal': seasonal,
            'MAE': mae,
            'MSE': mse,
            'RMSE': rmse,
            'MAPE': mape
        }])
        evaluation_results = pd.concat([evaluation_results, result], ignore_index=True)

        # Plot the original time series data and the Holt-Winters forecast
        plt.figure(figsize=(12, 6))
        plt.plot(train['Material Issued'], label='Training Data')
        plt.plot(test['Material Issued'], label='Test Data')
        plt.plot(test_positive.index, test_positive[f'Holt-Winters_{trend}_{seasonal}'], label=f'Holt-Winters Forecast ({trend}, {seasonal})', color='grey')
        plt.title(f'Holt-Winters Exponential Smoothing for Material Code: {material} ({trend}, {seasonal})')
        plt.xlabel('Date')
        plt.ylabel('Material Issued')
        plt.legend()

        # Save the plot in the corresponding folder
        folder_name = f'holt_winters_{trend}_{seasonal}'
        plt.savefig(f'{folder_name}/holt_winters_material_{material}_{trend}_{seasonal}.png')
        plt.close()  # Close the plot to free up memory


Processing Material Codes:   0%|          | 0/642 [00:00<?, ?it/s]

Invalid values found in forecast for material 10002341 with trend mul and seasonal add
Invalid values found in forecast for material 10099477 with trend mul and seasonal add
Invalid values found in forecast for material 10115514 with trend mul and seasonal add
Invalid values found in forecast for material 10120308 with trend mul and seasonal add


In [8]:
# Display the evaluation results
display(evaluation_results)

Unnamed: 0,Material Code,Trend,Seasonal,MAE,MSE,RMSE,MAPE
0,10000033,add,add,1.197805e+00,6.254003e+00,2.500801e+00,2.099525e+15
1,10000033,add,mul,1.286513e+00,6.411562e+00,2.532106e+00,7.546383e-01
2,10000033,mul,add,1.167460e+00,6.225251e+00,2.495045e+00,5.981829e-01
3,10000033,mul,mul,1.161841e+00,6.179822e+00,2.485925e+00,5.838557e-01
4,10000304,add,add,3.233464e+00,1.406148e+02,1.185811e+01,4.979341e+15
...,...,...,...,...,...,...,...
2559,10136729,mul,mul,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
2560,10136730,add,add,1.801048e-07,3.324888e-14,1.823428e-07,8.111199e+08
2561,10136730,add,mul,3.804056e-15,1.924184e-29,4.386552e-15,3.804056e-15
2562,10136730,mul,add,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00


In [9]:
evaluation_results.describe()

Unnamed: 0,MAE,MSE,RMSE,MAPE
count,2564.0,2564.0,2564.0,2564.0
mean,5.642658e+173,inf,inf,2.9349850000000004e+173
std,inf,,,inf
min,0.0,0.0,0.0,0.0
25%,3.804056e-15,1.924184e-29,4.386552e-15,3.804056e-15
50%,0.09301903,0.0759333,0.27556,0.8135117
75%,2.752279,31.2319,5.588547,811119900.0
max,1.446778e+177,inf,inf,7.525301e+176


In [10]:
# Optionally, save the evaluation results to a CSV file
evaluation_results.to_csv('evaluation_results_holt_winters.csv', index=False)