In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import datetime
from prophet import Prophet,plot

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def forecast_accuracy(forecast, actual):
    mape = np.mean(np.abs(forecast - actual)/np.abs(actual))
    return mape

In [3]:
directory = os.getcwd()
directory

'c:\\Users\\Shraddha.Mishra\\OneDrive - Shell\\Career_development\\Projects\\Ultimate Potential South Africa'

In [4]:
folder = 'EDA'
filename = 'ProcessedData.csv'
write_path = os.path.join(directory,folder,filename)

In [5]:
df = pd.read_csv(os.path.join(write_path))

In [6]:
agg = {
    'Sales Volumes in L15': 'sum',
    'Holiday Flag': 'max',
    'Holiday Desc': 'max',
    'LockDown Flag': 'max',
    'Fuel Price': 'mean',
    'Loyalty Flag': 'max',
    'Estimated Budget in ZAR': 'sum',
    'Marketing Flag': 'max',
    'Estimated Budget in USD': 'sum'

}

sales_data = df.groupby(['Material Number','date']).aggregate(agg).reset_index()
sales_data['date'] = pd.to_datetime(sales_data['date'],format = "%Y-%m-%d")
sales_data.sort_values(by = ['Material Number','date'], inplace = True)
sales_data

Unnamed: 0,Material Number,date,Sales Volumes in L15,Holiday Flag,Holiday Desc,LockDown Flag,Fuel Price,Loyalty Flag,Estimated Budget in ZAR,Marketing Flag,Estimated Budget in USD
0,400003139,2019-01-01,1432540.057,1.0,New Year's Day,0,14.945,1,2.127600e+09,1,1.517810e+08
1,400003139,2019-01-02,2818215.807,0.0,,0,13.715,1,9.536400e+09,1,6.803179e+08
2,400003139,2019-01-03,2392044.042,0.0,,0,13.715,1,2.408400e+09,1,1.718130e+08
3,400003139,2019-01-04,2434184.613,0.0,,0,13.715,1,2.332800e+09,1,1.664198e+08
4,400003139,2019-01-05,2805953.994,0.0,,0,13.715,1,2.613600e+09,1,1.864518e+08
...,...,...,...,...,...,...,...,...,...,...,...
9386,400006090,2024-03-14,978908.307,0.0,,0,22.620,0,0.000000e+00,1,2.320809e+08
9387,400006090,2024-03-15,1090319.624,0.0,,0,22.620,0,0.000000e+00,1,2.378035e+08
9388,400006090,2024-03-16,893323.043,0.0,,0,22.620,0,0.000000e+00,1,2.346243e+08
9389,400006090,2024-03-17,601858.116,0.0,,0,22.620,0,0.000000e+00,1,2.231792e+08


In [None]:
# Create a new column 'Fuel Price L1' that holds the previous day's fuel price for each 'Material Number'
sales_data['Fuel Price L1'] = sales_data.groupby(['Material Number'])['Fuel Price'].shift(1)
# Calculate the percentage change in fuel price compared to the previous day ('Fuel Price L1')
# The formula used is: ((current fuel price - previous day's fuel price) / previous day's fuel price) * 100
sales_data['Fuel Price PCT Change'] = ((sales_data['Fuel Price'] - sales_data['Fuel Price L1']) / sales_data['Fuel Price L1']) * 100
# Replace any NaN values (e.g., for the first record where there's no previous day's price) with 0
sales_data['Fuel Price PCT Change'].fillna(0, inplace=True)

In [None]:
#Reading the data
train_date = ['2023-11-30','2023-12-31','2024-01-31','2024-02-29']
test_date = ['2023-12-01','2024-01-01','2024-02-01','2024-03-01']
end_date = ['2023-12-31','2024-01-31','2024-02-29','2024-03-17']
periods = [31,31,29,17]
material_list = sales_data['Material Number'].unique()

In [None]:
# Building Baseline Prophet Model
predicted_df = pd.DataFrame()
for in in range(0,4):
    for x in material_list:
        train_data = sales_data[(sales_data['Material Number'] == x) & (sales_data['date'] <= train_date[i])]
        df = train_data[['date','Sales Volumes in L15']]
        df.rename({'date':'ds' , 'Sales Volumes in L15': 'y'},axis ='columns', inplace = True)

        #Prophet model
        m = Prophet()
        m.fit(df)

        #forecasting
        future = m.make_future_dataframe(periods = periods[i])

        #Predictions
        forecast = m.predict(future)
        pred = forecast[['ds', 'yhat','yhat_lower','yhat_upper']]
        pred = pred[pred['ds'] >= test_date[i] & (pred['ds'] <= end_date[i])]
        test_data = sales_data[(sales_data['Material Number'] == x) & (sales_data['date'] >= test_date[i]) & (sales_data['date'] <= end_date[i])]
        pred.reset_index(inplace = True)
        test_data.reset_index(inplace = True)
        forecast = pd.concat([test_data,pred], axis = 1)
        predicted_df = pd.concat([predicted_df, forecast], axis = 0)