In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv('/Users/harshitgupta/Desktop/vs /VS-Data-Den/rm_dataset_filtered.csv')


In [4]:
from prophet import Prophet

In [5]:
df['date_field'] = pd.to_datetime(df['date_field'])


In [6]:
data = df.rename(columns={'date_field': 'ds', 'sales': 'y'})

In [7]:
best_smape = 10000
best_params = {}


In [8]:
for changepoint_prior_scale in [0.01, 0.1, 0.5, 1, 10]:
    for seasonality_prior_scale in [0.01, 0.1, 1, 10]:
        for seasonality_mode in ['additive', 'multiplicative']:
            # Initialize and fit Prophet model with adjusted parameters
            model = Prophet(changepoint_prior_scale=changepoint_prior_scale,
                            seasonality_mode=seasonality_mode,
                            seasonality_prior_scale=seasonality_prior_scale)
            model.fit(data)
            
            # Calculate SMAPE
            forecast = model.predict(data)
            actual = data['y'].values
            forecast_values = forecast['yhat'].values
            numerator = np.abs(actual - forecast_values)
            denominator = (np.abs(actual) + np.abs(forecast_values)) / 2
            smape = np.mean(numerator / denominator) * 100
            
            # Update best SMAPE and parameters if SMAPE is better
            if smape < best_smape:
                best_smape = smape
                best_params = {'changepoint_prior_scale': changepoint_prior_scale,
                               'seasonality_mode': seasonality_mode,
                               'seasonality_prior_scale': seasonality_prior_scale}

11:29:07 - cmdstanpy - INFO - Chain [1] start processing
11:29:08 - cmdstanpy - INFO - Chain [1] done processing
11:29:11 - cmdstanpy - INFO - Chain [1] start processing
11:29:12 - cmdstanpy - INFO - Chain [1] done processing
11:29:15 - cmdstanpy - INFO - Chain [1] start processing
11:29:15 - cmdstanpy - INFO - Chain [1] done processing
11:29:18 - cmdstanpy - INFO - Chain [1] start processing
11:29:18 - cmdstanpy - INFO - Chain [1] done processing
11:29:21 - cmdstanpy - INFO - Chain [1] start processing
11:29:22 - cmdstanpy - INFO - Chain [1] done processing
11:29:24 - cmdstanpy - INFO - Chain [1] start processing
11:29:25 - cmdstanpy - INFO - Chain [1] done processing
11:29:28 - cmdstanpy - INFO - Chain [1] start processing
11:29:29 - cmdstanpy - INFO - Chain [1] done processing
11:29:31 - cmdstanpy - INFO - Chain [1] start processing
11:29:32 - cmdstanpy - INFO - Chain [1] done processing
11:29:34 - cmdstanpy - INFO - Chain [1] start processing
11:29:36 - cmdstanpy - INFO - Chain [1]

In [9]:
final_model = Prophet(**best_params)
final_model.fit(data)


11:31:59 - cmdstanpy - INFO - Chain [1] start processing
11:32:02 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x136a25a30>

In [11]:
import pickle

In [15]:
model_path = 'prophet_model.pkl'
with open(model_path, 'wb') as f:
    pickle.dump(final_model, f)

In [16]:
with open(model_path, 'rb') as f:
    model = pickle.load(f)


In [None]:
import os

In [17]:
regional_masters = df['regional_master'].unique()

In [18]:
forecasted_sales = {}

In [19]:
for regional_master in regional_masters:
    # Filter data for the current regional master
    regional_data = data[data['regional_master'] == regional_master]
    
    # Make prediction for the regional master
    forecast = model.predict(regional_data)
    
    # Extract forecasted sales
    forecasted_sales[regional_master] = forecast[['ds', 'yhat']]


In [20]:
for regional_master, forecast in forecasted_sales.items():
    print(f"Forecasted sales for {regional_master}:")
    print(forecast)
    print()

Forecasted sales for 50000001.0:
            ds           yhat
0   2018-01-07  220205.494525
1   2018-01-14  225976.241169
2   2018-01-21  230807.170343
3   2018-01-28  235284.503015
4   2018-02-04  239956.957774
..         ...            ...
307 2023-12-03  347329.372823
308 2023-12-10  335246.858950
309 2023-12-17  326160.994103
310 2023-12-24  323519.379753
311 2023-12-31  326074.193959

[312 rows x 2 columns]

Forecasted sales for 50000002.0:
            ds           yhat
0   2018-01-07  220205.494525
1   2018-01-14  225976.241169
2   2018-01-21  230807.170343
3   2018-01-28  235284.503015
4   2018-02-04  239956.957774
..         ...            ...
307 2023-12-03  347329.372823
308 2023-12-10  335246.858950
309 2023-12-17  326160.994103
310 2023-12-24  323519.379753
311 2023-12-31  326074.193959

[312 rows x 2 columns]

Forecasted sales for 50000004.0:
            ds           yhat
0   2018-01-07  220205.494525
1   2018-01-14  225976.241169
2   2018-01-21  230807.170343
3   2018-01

In [14]:
date = '2024-01-01'  # Example date
forecast = forecast_sales(date)
if forecast is not None:
    print(f"Forecasted sales on {date}: {forecast}")
else:
    print("No model found")

Forecasted sales on 2024-01-01: 326661.0778083757
