# Loading Modules

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt
from tqdm.notebook import tqdm
import os

# Creating Folders

In [2]:
folders = ['seasonal_decomposition',  'differenced', 'adf', 'arima']
for folder in folders:
  if not os.path.exists(folder):
    os.makedirs(folder)

# Loading Data

In [3]:
data = pd.read_csv('/kaggle/input/inventory/inventory.csv')

# Preparing Data

In [4]:
data = data[(data['Type'] == 'Material') & (data['BFP'] == 0)].copy()

In [5]:
data['Date'] = pd.to_datetime(data['Date'])
data['Material Code'] = data['Material Code'].astype(str)

In [6]:
data.set_index('Date', inplace=True)

In [7]:
data.head()

Unnamed: 0_level_0,Material Code,Material Description,Unit,Open Stock,Material Issued,Material Received,Closing Stock,Prefix,Type,PDT,BFP,Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2023-01-17,10000033,STRUCTURE; PMT 250/500 KVA COMPLETE,EA,80.0,0.0,0.0,80.0,1,Material,90,0,163999.11
2023-01-17,10000304,TAPE; RUBBER ETHYLENE PROPYLENE,EA,731.0,0.0,0.0,731.0,1,Material,120,0,1278.07
2023-01-17,10000314,JOINT; BOX HT ST 300MM2,EA,27.0,0.0,0.0,27.0,1,Material,120,0,18634.49
2023-01-17,10000316,TERMINATION; HT OUTDOOR 300MM2,EA,185.0,0.0,0.0,185.0,1,Material,120,0,12087.65
2023-01-17,10000318,TERMINATION; HT INDOOR 300MM2,EA,212.0,0.0,0.0,212.0,1,Material,120,0,10159.79


# Creating Material Wise Dataframe

In [8]:
# Create a dictionary to hold DataFrames for each material code
material_codes = data['Material Code'].unique()
material_data = {}
for material in tqdm(material_codes, desc="Creating Material DataFrames"):  # Add tqdm here
    material_data[material] = data[data['Material Code'] == material].sort_index(ascending=True)

Creating Material DataFrames:   0%|          | 0/642 [00:00<?, ?it/s]

# Seasonal Decomposition of each Material Code

In [9]:
# Set the default figure size
plt.rcParams['figure.figsize'] = (14, 8)
plt.ioff()

for material in tqdm(material_data.keys(), desc="Processing Materials"):  # Wrap with tqdm
    df = material_data[material].asfreq('D')
    decomposition = seasonal_decompose(df['Material Issued'], model='additive')
    decomposition.plot()
    # Adjust the position of the suptitle
    plt.subplots_adjust(top=0.9)
    plt.suptitle(f'Seasonal Decomposition for Material Code: {material}', y=0.95)
    plt.savefig(f'seasonal_decomposition/seasonal_decomposition_{material}.png')
    #plt.show()
    plt.close()

Processing Materials:   0%|          | 0/642 [00:00<?, ?it/s]

# The Augmented Dickey-Fuller (ADF) Test

In [10]:
# Check for stationarity for each material code and save results
adf_results = pd.DataFrame(columns=['MaterialCode', 'ADF Statistic', 'p-value', 
                                   'Critical Value 1%', 'Critical Value 5%', 'Critical Value 10%'])

for material in tqdm(material_data.keys(), desc="Performing ADF Tests"):
    df = material_data[material]
    
    try:
        result = adfuller(df['Material Issued'].dropna())
        # print(f'Material Code: {material}')
        # print('ADF Statistic:', result[0])
        # print('p-value:', result[1])
        # print('Critical Values:', result[4])
        # print('\n')

        # Create a temporary DataFrame for the current result
        temp_df = pd.DataFrame({
            'MaterialCode': [material],
            'ADF Statistic': [result[0]],
            'p-value': [result[1]],
            'Critical Value 1%': [result[4]['1%']],
            'Critical Value 5%': [result[4]['5%']],
            'Critical Value 10%': [result[4]['10%']]
        })

        # Concatenate the temporary DataFrame to adf_results
        adf_results = pd.concat([adf_results, temp_df], ignore_index=True)

    except ValueError as e:
        print(f"Error with Material Code: {material} - {e}")
        # You can add more specific handling here, like logging the error

# Display the ADF test results DataFrame
display(adf_results.head())

Performing ADF Tests:   0%|          | 0/642 [00:00<?, ?it/s]

  adf_results = pd.concat([adf_results, temp_df], ignore_index=True)


Error with Material Code: 10000320 - Invalid input, x is constant
Error with Material Code: 10000328 - Invalid input, x is constant
Error with Material Code: 10000362 - Invalid input, x is constant
Error with Material Code: 10000924 - Invalid input, x is constant
Error with Material Code: 10000995 - Invalid input, x is constant
Error with Material Code: 10001027 - Invalid input, x is constant
Error with Material Code: 10001183 - Invalid input, x is constant
Error with Material Code: 10001212 - Invalid input, x is constant
Error with Material Code: 10001534 - Invalid input, x is constant
Error with Material Code: 10001745 - Invalid input, x is constant
Error with Material Code: 10001752 - Invalid input, x is constant
Error with Material Code: 10001844 - Invalid input, x is constant
Error with Material Code: 10002010 - Invalid input, x is constant
Error with Material Code: 10002148 - Invalid input, x is constant
Error with Material Code: 10002166 - Invalid input, x is constant
Error with

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


Error with Material Code: 10097997 - Invalid input, x is constant
Error with Material Code: 10098041 - Invalid input, x is constant
Error with Material Code: 10098284 - Invalid input, x is constant
Error with Material Code: 10098345 - Invalid input, x is constant
Error with Material Code: 10098346 - Invalid input, x is constant
Error with Material Code: 10098347 - Invalid input, x is constant
Error with Material Code: 10098850 - Invalid input, x is constant
Error with Material Code: 10099375 - Invalid input, x is constant
Error with Material Code: 10099378 - Invalid input, x is constant
Error with Material Code: 10099406 - Invalid input, x is constant
Error with Material Code: 10099452 - Invalid input, x is constant
Error with Material Code: 10099455 - Invalid input, x is constant
Error with Material Code: 10099458 - Invalid input, x is constant
Error with Material Code: 10099459 - Invalid input, x is constant
Error with Material Code: 10099462 - Invalid input, x is constant
Error with

Unnamed: 0,MaterialCode,ADF Statistic,p-value,Critical Value 1%,Critical Value 5%,Critical Value 10%
0,10000033,-26.205909,0.0,-3.439516,-2.865585,-2.568924
1,10000304,-26.923568,0.0,-3.439516,-2.865585,-2.568924
2,10000314,-26.989575,0.0,-3.439516,-2.865585,-2.568924
3,10000316,-26.52545,0.0,-3.439516,-2.865585,-2.568924
4,10000318,-6.807485,2.158159e-09,-3.439713,-2.865672,-2.56897


In [11]:
adf_results.to_csv('adf/adf_results.csv', index=False)

# ARIMA Model

In [12]:
# Build, forecast, and evaluate the ARIMA model for each material code
results_df = pd.DataFrame(columns=['Material Code', 'MSE', 'RMSE'])  # DataFrame for results

for material in tqdm(material_data.keys(), desc="Building and Evaluating ARIMA Models"):
    try:
        df = material_data[material]
        df = df.asfreq('D')
        # Model Building and Summary (optional, you can comment this out)
        model = ARIMA(df['Material Issued'], order=(5, 1, 0), freq='D')
        model_fit = model.fit()
        # print(f'ARIMA Model Summary for Material Code: {material}')
        # print(model_fit.summary())
        # print('\n')

        # Save the ARIMA model summary to a text file
        with open(f'arima/arima_summary_{material}.txt', 'w') as f:
            f.write(f'ARIMA Model Summary for Material Code: {material}\n')
            f.write(str(model_fit.summary()))  # Write the summary to the file
            f.write('\n')

        # Forecasting (optional, you can comment this out)
        forecast = model_fit.forecast(steps=10)
        # print(f'Forecast for Material Code: {material}')
        # print(forecast)
        # print('\n')
        plt.ioff()
        plt.figure(figsize=(10, 6))
        plt.plot(df['Material Issued'], label='Observed')
        plt.plot(pd.Series(forecast, index=pd.date_range(start=df.index[-1], periods=10, freq='D')), label='Forecast')
        plt.title(f'Time Series Forecast for Material Code: {material}')
        plt.xlabel('Date')
        plt.ylabel('Value')
        plt.legend()
        plt.savefig(f'arima/time_series_forecast_{material}.png')
        #plt.show()
        plt.close()

        # Model Evaluation
        train_size = int(len(df) * 0.8)
        train, test = df['Material Issued'][0:train_size], df['Material Issued'][train_size:]
        model = ARIMA(train, order=(5, 1, 0))
        model_fit = model.fit()
        forecast = model_fit.forecast(steps=len(test))
        mse = mean_squared_error(test, forecast)
        rmse = sqrt(mse)

        # Store results in the DataFrame
        results_df = pd.concat([results_df, pd.DataFrame({'MaterialCode': [material], 'MSE': [mse], 'RMSE': [rmse]})], ignore_index=True)

    except Exception as e:
        print(f"Error with Material Code: {material} - {e}")

# Display the evaluation results DataFrame
display(results_df)

Building and Evaluating ARIMA Models:   0%|          | 0/642 [00:00<?, ?it/s]

  results_df = pd.concat([results_df, pd.DataFrame({'MaterialCode': [material], 'MSE': [mse], 'RMSE': [rmse]})], ignore_index=True)
  return self.params / self.bse
  test_statistic = numer_squared_sum / denom_squared_sum
  acf = avf[: nlags + 1] / avf[0]
  test_statistic = numer_squared_sum / denom_squared_sum
  return self.params / self.bse
  test_statistic = numer_squared_sum / denom_squared_sum
  acf = avf[: nlags + 1] / avf[0]
  return self.params / self.bse
  test_statistic = numer_squared_sum / denom_squared_sum
  acf = avf[: nlags + 1] / avf[0]
  test_statistic = numer_squared_sum / denom_squared_sum
  return self.params / self.bse
  test_statistic = numer_squared_sum / denom_squared_sum
  acf = avf[: nlags + 1] / avf[0]
  return self.params / self.bse
  test_statistic = numer_squared_sum / denom_squared_sum
  acf = avf[: nlags + 1] / avf[0]
  return self.params / self.bse
  test_statistic = numer_squared_sum / denom_squared_sum
  acf = avf[: nlags + 1] / avf[0]
  return self.pa

Unnamed: 0,Material Code,MSE,RMSE,MaterialCode
0,,6.869630,2.620998,10000033
1,,129.336486,11.372620,10000304
2,,4.451389,2.109831,10000314
3,,3.840896,1.959820,10000316
4,,431.416667,20.770572,10000318
...,...,...,...,...
637,,0.000000,0.000000,10136726
638,,0.000000,0.000000,10136727
639,,0.000000,0.000000,10136728
640,,0.000000,0.000000,10136729


In [13]:
results_df.to_csv('arima/arima_results.csv', index=False)