In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

import os

In [2]:
# Create a directory to save the plots
if not os.path.exists('sma'):
    os.makedirs('sma')

In [3]:
# Load the data
data = pd.read_csv('/kaggle/input/inventory/inventory.csv')

In [4]:

data = data[(data['Type'] == 'Material') & (data['BFP'] == 0)].copy()

# Convert the 'Date' column to datetime
data['Date'] = pd.to_datetime(data['Date'])

# Set the 'Date' column as the index
data.set_index('Date', inplace=True)


In [5]:
# Get unique material codes
material_codes = data['Material Code'].unique()

# Create a dictionary to hold DataFrames for each material code
material_data = {material: data[data['Material Code'] == material] for material in material_codes}

# Define the window size for the Simple Moving Average
window_size = 30  # Adjust as needed


In [6]:
# Initialize a DataFrame to store the evaluation results
evaluation_results = pd.DataFrame(columns=['Material Code', 'MAE', 'MSE', 'RMSE', 'MAPE'])

In [7]:
# Calculate and plot Simple Moving Average for each material code
for material, df in tqdm(material_data.items(), total=len(material_data), desc="Processing Material Codes"):
    df = df.dropna()
    if df.empty:
        print(f"No data available for material {material}")
        continue

    # Ensure the index has a frequency
    df = df.asfreq('D')

    # Split the data into training and test sets
    train_size = int(len(df) * 0.8)
    train = df.iloc[:train_size].copy()
    test = df.iloc[train_size:].copy()

    # Calculate Simple Moving Average on the training set
    train.loc[:, 'SMA'] = train['Material Issued'].rolling(window=window_size).mean()

    # Forecast using the SMA model
    test.loc[:, 'SMA'] = train['SMA'].iloc[-1]  # Use the last SMA value from the training set

    # Calculate evaluation metrics
    mae = mean_absolute_error(test['Material Issued'], test['SMA'])
    mse = mean_squared_error(test['Material Issued'], test['SMA'])
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(test['Material Issued'], test['SMA'])

    # Append the results to the DataFrame
    result = pd.DataFrame([{
        'Material Code': material,
        'MAE': mae,
        'MSE': mse,
        'RMSE': rmse,
        'MAPE': mape
    }])
    evaluation_results = pd.concat([evaluation_results, result], ignore_index=True)
    
    # Plot the original time series data and the Simple Moving Average
    plt.figure(figsize=(12, 6))
    plt.plot(train['Material Issued'], label='Training Data')
    plt.plot(test['Material Issued'], label='Test Data')
    plt.plot(test['SMA'], label=f'SMA (Window={window_size})', color='orange')
    plt.title(f'Simple Moving Average for Material Code: {material}')
    plt.xlabel('Date')
    plt.ylabel('Material Issued')
    plt.legend()

    # Save the plot in the 'sma' folder
    plt.savefig(f'sma/sma_material_{material}_{window_size}.png')
    plt.close()  # Close the plot to free up memory

    # Print the first few rows of the DataFrame with SMA
    # print(f'SMA forecasting of {material} is done.')
    #display(df.head())


Processing Material Codes:   0%|          | 0/642 [00:00<?, ?it/s]

  evaluation_results = pd.concat([evaluation_results, result], ignore_index=True)


In [8]:
# Display the evaluation results
display(evaluation_results)

# Optionally, save the evaluation results to a CSV file
evaluation_results.to_csv(f'evaluation_results_{window_size}.csv', index=False)

Unnamed: 0,Material Code,MAE,MSE,RMSE,MAPE
0,10000033,1.169444,5.957222,2.440742,2.545785e+15
1,10000304,2.319444,129.564815,11.382654,2.376900e+15
2,10000314,0.380556,4.404444,2.098677,4.409775e+14
3,10000316,1.237500,3.676111,1.917319,1.970325e+15
4,10000318,3.811111,426.438148,20.650379,3.089970e+15
...,...,...,...,...,...
637,10136726,0.000000,0.000000,0.000000,0.000000e+00
638,10136727,0.000000,0.000000,0.000000,0.000000e+00
639,10136728,0.000000,0.000000,0.000000,0.000000e+00
640,10136729,0.000000,0.000000,0.000000,0.000000e+00
