In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from math import sqrt
import itertools
from tqdm.notebook import tqdm
from statsmodels.tsa.stattools import adfuller
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning

# Suppress specific warnings
warnings.simplefilter('ignore', ConvergenceWarning)
warnings.simplefilter('ignore', UserWarning)

In [2]:
# Load the data
data = pd.read_csv('/kaggle/input/inventory/inventory.csv')

In [3]:
# Filter data based on 'Material' Type and 'BFP' being 0
data = data[(data['Type'] == 'Material') & (data['BFP'] == 0)].copy()

# Convert the 'Date' column to datetime
data['Date'] = pd.to_datetime(data['Date'])

# Set the 'Date' column as the index
data.set_index('Date', inplace=True)

In [4]:
# Get unique material codes
material_codes = data['Material Code'].unique()

# Create a dictionary to hold DataFrames for each material code
material_data = {material: data[data['Material Code'] == material] for material in material_codes}


In [5]:
# Define the parameter grid for ARIMA
p = range(0, 5)
d = range(0, 2)
q = range(0, 5)
pdq = list(itertools.product(p, d, q))


In [6]:
# Initialize a DataFrame to store the results
arima_results = pd.DataFrame(columns=['Material Code', 'p', 'd', 'q', 'AIC', 'BIC', 'RMSE', 'MAE', 'MSE', 'MAPE'])

In [7]:
# Hyperparameter tuning for each material code
for material, df in tqdm(material_data.items(), total=len(material_data), desc="Processing Material Codes"):
    # Drop missing values
    df = df.dropna()
    if df.empty:
        print(f"No data available for material {material}")
        continue

    try:
        # Check for stationarity and apply differencing if necessary
        result = adfuller(df['Material Issued'])
        if result[1] > 0.05:
            print(f"Data for material {material} is non-stationary. Applying differencing.")
            df['Material Issued'] = df['Material Issued'].diff().dropna()
    except Exception as e:
        print(f"Error performing ADF test or differencing for material {material}: {e}")
        continue

    df = df.asfreq('D')
    # Initialize best metrics
    best_aic = float('inf')
    best_bic = float('inf')
    best_rmse = float('inf')
    best_mae = float('inf')
    best_mse = float('inf')
    best_mape = float('inf')
    best_params = None

    for param in pdq:
        try:
            # Fit ARIMA model
            model = ARIMA(df['Material Issued'], order=param)
            model_fit = model.fit()

            aic = model_fit.aic
            bic = model_fit.bic

            # Forecast the next steps
            forecast = model_fit.forecast(steps=len(df))

            # Calculate error metrics
            if len(df['Material Issued']) == len(forecast):
                rmse = np.sqrt(mean_squared_error(df['Material Issued'], forecast))
                mae = mean_absolute_error(df['Material Issued'], forecast)
                mse = mean_squared_error(df['Material Issued'], forecast)
                mape = mean_absolute_percentage_error(df['Material Issued'], forecast)

                # Update the best parameters if the current model is better
                if aic < best_aic:
                    best_aic = aic
                    best_bic = bic
                    best_rmse = rmse
                    best_mae = mae
                    best_mse = mse
                    best_mape = mape
                    best_params = param

        except Exception as e:
            print(f"Error fitting ARIMA model for material {material} with params {param}: {e}")
            continue

    # Check if best_params is not None before appending results
    if best_params is not None:
        # Store results
        result_dict = {
            'Material Code': material,
            'p': best_params[0],
            'd': best_params[1],
            'q': best_params[2],
            'AIC': best_aic,
            'BIC': best_bic,
            'RMSE': best_rmse,
            'MAE': best_mae,
            'MSE': best_mse,
            'MAPE': best_mape
        }

        # Print result for current material
        print(result_dict)

        # Append the results to the DataFrame
        arima_results = pd.concat([arima_results, pd.DataFrame([result_dict])], ignore_index=True)
    else:
        print(f"No valid ARIMA model found for material {material}")

Processing Material Codes:   0%|          | 0/642 [00:00<?, ?it/s]

{'Material Code': 10000033, 'p': 0, 'd': 0, 'q': 0, 'AIC': 3542.3834965371107, 'BIC': 3551.5336482183097, 'RMSE': 2.8535203565353724, 'MAE': 1.2459837512514422, 'MSE': 8.142578425161759, 'MAPE': 2735091133473816.0}


  arima_results = pd.concat([arima_results, pd.DataFrame([result_dict])], ignore_index=True)


{'Material Code': 10000304, 'p': 0, 'd': 1, 'q': 1, 'AIC': 5903.224929285138, 'BIC': 5912.372289619059, 'RMSE': 14.810530879823297, 'MAE': 2.4701253188991665, 'MSE': 219.3518249421994, 'MAPE': 5508491054411577.0}
{'Material Code': 10000314, 'p': 0, 'd': 0, 'q': 0, 'AIC': 2196.2064054776074, 'BIC': 2205.3565571588065, 'RMSE': 1.1160512172660322, 'MAE': 0.21879000686369482, 'MSE': 1.2455703195609922, 'MAPE': 492660064602233.06}
{'Material Code': 10000316, 'p': 0, 'd': 0, 'q': 0, 'AIC': 4383.526992851655, 'BIC': 4392.677144532855, 'RMSE': 5.1300988123315, 'MAE': 1.6384806871294297, 'MSE': 26.31791382428507, 'MAPE': 3630975101824885.5}
{'Material Code': 10000318, 'p': 4, 'd': 1, 'q': 4, 'AIC': 5419.849883654763, 'BIC': 5461.013005157409, 'RMSE': 10.817548192824972, 'MAE': 3.0343632187428446, 'MSE': 117.01934890409083, 'MAPE': 6630369617803515.0}
Error performing ADF test or differencing for material 10000320: Invalid input, x is constant
{'Material Code': 10000325, 'p': 0, 'd': 0, 'q': 0, 

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


{'Material Code': 10092088, 'p': 0, 'd': 0, 'q': 0, 'AIC': -1101.1621931072739, 'BIC': -1092.0120414260746, 'RMSE': 0.1119588968499353, 'MAE': 0.008351550071827, 'MSE': 0.012534794583854455, 'MAPE': 18794774391117.707}
{'Material Code': 10092104, 'p': 0, 'd': 0, 'q': 0, 'AIC': 181.8584028879822, 'BIC': 191.00855456918146, 'RMSE': 0.27392280010119396, 'MAE': 0.03332780359443067, 'MSE': 0.07503370041527868, 'MAPE': 75036310121984.11}
Error performing ADF test or differencing for material 10092237: Invalid input, x is constant
{'Material Code': 10093597, 'p': 0, 'd': 0, 'q': 2, 'AIC': 4124.553657870786, 'BIC': 4142.853961233184, 'RMSE': 5.908571529076196, 'MAE': 0.6322708968718181, 'MSE': 34.911217514209824, 'MAPE': 1421368848713953.2}
{'Material Code': 10093598, 'p': 0, 'd': 0, 'q': 2, 'AIC': 4024.5744685127593, 'BIC': 4042.874771875158, 'RMSE': 5.5905361642476326, 'MAE': 0.5888530273030508, 'MSE': 31.254094603760628, 'MAPE': 1324058874182884.2}
{'Material Code': 10093599, 'p': 2, 'd': 1

In [8]:
# Display the ARIMA model metrics DataFrame
display(arima_results)

# Optionally, save the DataFrame to a CSV file
arima_results.to_csv('arima_results.csv', index=False)


Unnamed: 0,Material Code,p,d,q,AIC,BIC,RMSE,MAE,MSE,MAPE
0,10000033,0,0,0,3542.383497,3551.533648,2.853520,1.245984,8.142578,2.735091e+15
1,10000304,0,1,1,5903.224929,5912.372290,14.810531,2.470125,219.351825,5.508491e+15
2,10000314,0,0,0,2196.206405,2205.356557,1.116051,0.218790,1.245570,4.926601e+14
3,10000316,0,0,0,4383.526993,4392.677145,5.130099,1.638481,26.317914,3.630975e+15
4,10000318,4,1,4,5419.849884,5461.013005,10.817548,3.034363,117.019349,6.630370e+15
...,...,...,...,...,...,...,...,...,...,...
387,10126369,0,0,0,-1890.872798,-1881.722646,0.064549,0.008328,0.004167,1.874239e+13
388,10128891,1,1,1,-2667.814954,-2654.093914,2.994260,2.993024,8.965594,1.346055e+16
389,10130819,2,0,4,997.754063,1034.354670,0.484240,0.068842,0.234488,1.549758e+14
390,10131757,3,0,0,2906.314064,2929.189443,1.898981,0.289305,3.606128,6.507432e+14
