# **Neural Prophet Parameters**
### ******Input Parameters from Interface******

In [23]:
# 1. We have two files (Sampledata.csv and actualData.csv)  I want to merge these two and have only one input file
# Split the data into training and testing data
# 2. Add a parameters prediction_days

In [24]:
import os
import sys
print(os.getcwd())
os.chdir('/home/ajaz/DemandForecasting/models')
sys.path.append('/home/ajaz/DemandForecasting')

/home/ajaz/DemandForecasting/models


In [26]:
#NeuralProphet
 
#1. **Growth Parameters:**
detectGrowth=True #bool
growth='off'  #Literal['off', 'linear', 'discontinuous']

#2. **Changepoints Parameters:**
detectChangepoints=True #bool
changepoints= None #Optional[list]
n_changepoints =None    #0, #int
changepoints_range=None     #0.8 #float

#3. **Seasonality Parameters:**
#To Control Seasonality
yearly_seasonality= None  #'auto'ss
weekly_seasonality= None   #'auto'
daily_seasonality = None   #'auto'

seasonality_mode='multiplicative' #['additive', 'multiplicative']
seasonality_reg= None    #float 0

#4. **Confidence Interval Parameters:**
confidence_lv = 0.9
#quantiles  = None      #[]   #List[float]
quantiles = [round(((1 - confidence_lv) / 2), 2), round((confidence_lv + (1 - confidence_lv) / 2), 2)]

#5. **Missing Data Handling:**
impute_missing= None,     #bool
impute_linear = None,    #int
impute_rolling= None,  #int
drop_missing  = None   #bool

#6. **Normalization Parameters:**
normalize=None         #'off'     # Literal['auto', 'soft', 'soft1', 'minmax', 'standardize', 'off']

#7. **Lags and Forecasts:**
n_lags=None        #0    # int 0    
n_forecasts=None   #0 #int 1

#8. **Autoregression Parameters:**  
ar_layers=None     #[]    #Optional[list]   
ar_reg= None       #Optional[float]     
lagged_reg_layers= None    #[]   #Optional[list]       
learning_rate= 0.1   #Optional[float]

#9. **Training Parameters:**
epochs= None      #Optional[int]
batch_size= None   #Optional[int]
loss_func=None     #'Huber'
optimizer=None     #'AdamW'

#10. **Global/Local Parameters:**
season_global_local=None #['global', 'local']
trend_global_local= None   #str 'global', 'local'

#11. **Trend Parameters:**
trend_reg= None  #Optional[float]
trend_reg_threshold =None  #Optional[Union[bool, float]]
newer_samples_weight= None  # float
newer_samples_start =None   #float

#12. **Additional Configuration:**
collect_metrics= None      #Union [bool, list, dict]

global_normalization=None
global_time_normalization=None
unknown_data_normalization=None

accelerator=None  #Optional[str] None
trainer_config=None # dict {},
prediction_frequency=None  # Optional[dict]

In [27]:
#Additional  Seasonality regressors

#Custom holidays
country_name= 'SA'   #   'SA' # Country Code  (ISO 3166-2) for holidayss

yearly_add_seasonality=True
yearly_season_period=365.25
yearly_season_fourier_order=2

quarterly_add_seasonality=False
quarterly_season_period=None
quarterly_season_fourier_order=None

monthly_add_seasonality=False
monthly_season_period=None
monthly_season_fourier_order=None

# Weekend days (0-6, Mon-Sun)
weekend_days = [4]  # 4 is Friday

Weekend_add_seasonality=False
weekendDaysCount=1
Weekends_fourier_order=5

WorkingDays_add_seasonality=False
workingDaysCount=6
WorkingDays_fourier_order=5

ramadan_add_seasonality=False
ramadan_period=29.33
ramadan_fourier_order=10

#### ******Parameters used in other calc, other than the model******


In [28]:
# Penalty sensitivity for PELT algorithm: 'High', 'Medium', 'Low' : Used to determine the penalty value for the PELT algo which is used for changepoint detection
PenaltySensitivity ="High"  

# Model type for changepoint detection: 'l1' (linear 1), 'l2' (linear 2), 'rbf' (radial basis function)
pltModelType = "l2"  # "l2", "rbf"

detectOutliers =False # If True, outliers are detected and removed from the data else outliers are not detected and not removed from the data

#IQR stands for Interquartile Range, which is a measure of statistical dispersion of data
#IQR Range for outlier detection (1.5 is default) 3 is too high ,  upper_bound = Q3 + IQRRange * IQR and lower_bound = Q1 - IQRRange * IQR 

IQRRange=1.5

# ****Prophet Algorithm****

### ****Importing Libraries****

In [29]:
from neuralprophet import NeuralProphet, set_log_level
import pandas as pd

import matplotlib.pyplot as plt

import math
import ruptures as rpt
import warnings
import holidays
from hijri_converter import convert
from datetime import date,datetime, timedelta
from prophet.diagnostics import performance_metrics, cross_validation

from Utitlies.fileIO import loadCsvExcelFile

from Utitlies.dataAnalysis import detectGrowth 
from Utitlies.dataAnalysis import detectChangepoints
from Utitlies.dataAnalysis import detectOutliers
from Utitlies.dateGeneration import generateRamadanDates
from Utitlies.dateGeneration import generateWeekends

warnings.filterwarnings("ignore")

### ****Importing the dataset****

##### Importing Data using the Function

In [31]:
prediction_days=30

#Excel file Path
file_path= '/home/ajaz/DemandForecasting/Data/data.csv'  


def split_data(data, prediction_days):
    """
    Splits the given data into training and testing sets.

    Parameters:
    - data: The input data to be split.
    - prediction_days: The number of days to be used for testing.

    Returns:
    - train_data: The training data containing all but the last `prediction_days` rows.
    - test_data: The testing data containing the last `prediction_days` rows.
    """
    return data.iloc[:-prediction_days], data.iloc[-prediction_days:]

trainData , testData = split_data(loadCsvExcelFile(file_path), prediction_days)


#Load Data 
#data = loadCsvExcelFile(file_path)

trainData['ds'] = pd.to_datetime(trainData['ds'])
""" trainData.rename(columns={'ds':'ds','y':'y'},inplace=True) """
# Get the first and last dates of the filtered data
startDate = pd.to_datetime( trainData['ds'].iloc[0])
endDate = pd.to_datetime( trainData['ds'].iloc[-1])

startYear = startDate.year
endYear = endDate.year

trainData.head()

Unnamed: 0,ds,y
0,2018-01-01,50930.49
1,2018-01-02,73204.65
2,2018-01-03,60450.73
3,2018-01-04,101558.6
4,2018-01-05,0.0


In [32]:
#Growth Detection
if detectGrowth:
    growth = detectGrowth(trainData)
    print("Growth Detected : ",growth)
else:
    print("Manual, Growth Detection is Off")

Growth Detected :  linear


In [33]:
#detect Change points
if detectChangepoints:
    changepoints= detectChangepoints(trainData, pltModelType, PenaltySensitivity)
    print("Sucessfully detected Change points")
    #print("Change points : ",changepoints)
else:
    print("Manual, Change points Detection is Off")

Sucessfully detected Change points


In [34]:
#detect Outliers
if detectOutliers:
    lower_bound, upper_bound = detectOutliers(trainData, IQRRange)
    outliers = trainData[((trainData['y'] < lower_bound) | (trainData['y'] > upper_bound))]
    trainData.loc[outliers.index, 'y'] = trainData['y'].mean()
    print("Sucessfully Removed the Outliers")
print("Outlier Detection  is disabled")

count      2008.000000
mean      66824.202083
std       59345.057409
min           0.000000
25%       13335.772500
50%       54712.280000
75%      111361.900000
max      347483.000000
Name: y, dtype: float64
Sucessfully Removed the Outliers
Outlier Detection  is disabled


In [35]:
#Add Ramadan Seasonality

if ramadan_add_seasonality:
    ramadan_df = generateRamadanDates(startYear, endYear)
    trainData['is_ramadan'] = trainData['ds'].isin(ramadan_df['ds']).astype(int)
    print("Sucessfully Added Ramadan dates in prophet Training Data")
    print(trainData)
else:
    print("Ramadan Seasonality is disabled")

Ramadan Seasonality is disabled


In [36]:
#Variables for Weekend and Working days must be imported from input parameters
startDate = '2018-01-01'
endDate = '2023-07-31'
weekendDays = [4]  # 4 is Friday

if Weekend_add_seasonality:
    df_weekends = generateWeekends(startDate, endDate, *weekendDays)
    trainData['is_weekend'] = trainData['ds'].isin(df_weekends['ds']).astype(int)  
    print("Sucessfully Added Weekend dates in prophet Training Data")
else:
    print("Weekend Seasonality is disabled")

if WorkingDays_add_seasonality:
    trainData['is_weekday'] = (trainData['is_weekend'] == 0).astype(int)
    print("Sucessfully Added Working days in prophet Training Data")
else:
    print("Working days Seasonality is disabled")

Weekend Seasonality is disabled
Working days Seasonality is disabled


# Create a Prophet model with flexible parameters

In [37]:
neuralprophet_params = {
'growth':growth,
'changepoints':changepoints,
'n_changepoints':n_changepoints,
'changepoints_range':changepoints_range,

'yearly_seasonality':yearly_seasonality,
'weekly_seasonality':weekly_seasonality,
'daily_seasonality':daily_seasonality,

'seasonality_mode':seasonality_mode,
'seasonality_reg':seasonality_reg,

'quantiles':quantiles,

'impute_missing':impute_missing,
'impute_linear':impute_linear,
'impute_rolling':impute_rolling,
'drop_missing':drop_missing,
'normalize':normalize,

'n_lags':n_lags,
'n_forecasts':n_forecasts,

'ar_layers':ar_layers,
'ar_reg':ar_reg,
'lagged_reg_layers':lagged_reg_layers,
'learning_rate':learning_rate,

'epochs':epochs,
'batch_size':batch_size,
'loss_func':loss_func,
'optimizer':optimizer,

'season_global_local':season_global_local,

'trend_reg':trend_reg,
'trend_reg_threshold':trend_reg_threshold,
'trend_global_local':trend_global_local,

'newer_samples_weight':newer_samples_weight,
'newer_samples_start':newer_samples_start,

'collect_metrics':collect_metrics,

'global_normalization':global_normalization,
'global_time_normalization':global_time_normalization,
'unknown_data_normalization':unknown_data_normalization,

'accelerator':accelerator,
'trainer_config':trainer_config,
'prediction_frequency':prediction_frequency

}
print(neuralprophet_params)
# Remove parameters with value None
neuralprophet_params = {key: value for key, value in neuralprophet_params.items() if value is not None}
print(neuralprophet_params)

{'growth': 'linear', 'changepoints': 0      2018-01-01
1      2018-01-02
2      2018-01-03
3      2018-01-04
4      2018-01-05
          ...    
1951   2023-06-23
1952   2023-06-24
1953   2023-06-25
1954   2023-06-26
1955   2023-07-01
Name: ds, Length: 1956, dtype: datetime64[ns], 'n_changepoints': None, 'changepoints_range': None, 'yearly_seasonality': None, 'weekly_seasonality': None, 'daily_seasonality': None, 'seasonality_mode': 'multiplicative', 'seasonality_reg': None, 'quantiles': [0.05, 0.95], 'impute_missing': (None,), 'impute_linear': (None,), 'impute_rolling': (None,), 'drop_missing': None, 'normalize': None, 'n_lags': None, 'n_forecasts': None, 'ar_layers': None, 'ar_reg': None, 'lagged_reg_layers': None, 'learning_rate': 0.1, 'epochs': None, 'batch_size': None, 'loss_func': None, 'optimizer': None, 'season_global_local': None, 'trend_reg': None, 'trend_reg_threshold': None, 'trend_global_local': None, 'newer_samples_weight': None, 'newer_samples_start': None, 'collect_metr

# **Training Model**

#### ****Initialize the Model**** ####

In [38]:
""" print(neuralprophet_params) """

' print(neuralprophet_params) '

In [39]:
model = NeuralProphet (**neuralprophet_params)

In [40]:
""" trainData.head(2) """

' trainData.head(2) '

### ****Custom  Seasonalties****

In [41]:
#Custom seasonality
#if the  passed condition is True or Not none then it  execute the below code
#Check the names of the variables from the variables

if country_name:
    model.add_country_holidays(country_name=country_name)

if yearly_add_seasonality:
    model.add_seasonality(name='yearly_season' ,period=yearly_season_period ,fourier_order=yearly_season_fourier_order )

if quarterly_add_seasonality:
    model.add_seasonality(name='quarterly_season' ,period=quarterly_season_period ,fourier_order=quarterly_season_fourier_order )

if monthly_add_seasonality:
    model.add_seasonality(name='monthly_season' ,period=monthly_season_period ,fourier_order=monthly_season_fourier_order )

if Weekend_add_seasonality:
    model.add_seasonality(name='Weekends_season' ,period=weekendDaysCount ,fourier_order=Weekends_fourier_order ,condition_name="is_weekend")

if WorkingDays_add_seasonality:
    model.add_seasonality(name='WorkingDays_season' ,period=workingDaysCount ,fourier_order=WorkingDays_fourier_order ,condition_name="is_weekday")

if ramadan_add_seasonality:
    model.add_seasonality(name='ramadan_season' ,period=ramadan_period ,fourier_order=ramadan_fourier_order ,condition_name="is_ramadan")

#### ****Fit the model to the data**** ####

In [42]:
# Use static plotly in notebooks
""" model.set_plotting_backend("plotly-static") """
model.fit(trainData)

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.95% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 64
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 158
Missing logger folder: /home/ajaz/DemandForecasting/models/lightning_logs


Training: 0it [00:00, ?it/s]

Unnamed: 0,MAE,RMSE,Loss,RegLoss,epoch
0,62743.882812,80887.531250,0.284113,0.0,0
1,46821.507812,61369.121094,0.167408,0.0,1
2,36720.859375,47320.765625,0.122335,0.0,2
3,32122.310547,41927.324219,0.095330,0.0,3
4,30535.824219,39852.500000,0.084812,0.0,4
...,...,...,...,...,...
153,22157.884766,30964.023438,0.045843,0.0,153
154,22101.423828,31034.095703,0.045814,0.0,154
155,22065.798828,30970.031250,0.045937,0.0,155
156,22029.527344,30765.593750,0.045423,0.0,156


## Generate future Dataframe Dates

In [43]:
# Create a new dataframe reaching 365 into the future for our forecast, n_historic_predictions also shows historic data
df_future = model.make_future_dataframe(trainData, n_historic_predictions=True, periods=30)
 # Predict the future
forecast = model.predict(df_future)

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.95% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.951% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.951% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.data.processing._handle_missing_data) - Dropped 30 rows at the end with NaNs in 'y' column.


Predicting: 32it [00:00, ?it/s]

INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column


In [44]:
forecast

Unnamed: 0,ds,y,yhat1,yhat1 5.0%,yhat1 95.0%,trend,season_yearly,season_weekly,season_yearly_season,events_additive,event_A National Day,event_Arafat Day (estimated),event_Eid al-Adha Holiday (estimated),"event_Eid al-Adha Holiday (observed, estimated)",event_Eid al-Fitr Holiday (estimated),"event_Eid al-Fitr Holiday (observed, estimated)",event_Founding Day Holiday,event_National Day Holiday,event_National Day Holiday (observed)
0,2018-01-01,50930.49,110445.500000,64738.070312,160527.281250,90863.281250,-36566.890625,10304.182617,45844.933594,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2018-01-02,73204.65,112275.703125,66012.187500,164302.187500,90965.328125,-36320.773438,12067.197266,45563.957031,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2018-01-03,60450.73,110386.476562,65260.292969,160266.140625,91097.093750,-36079.386719,10099.584961,45269.187500,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2018-01-04,101558.60,109221.914062,63495.968750,152848.609375,91266.007812,-35845.042969,8835.786133,44965.160156,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2018-01-05,0.00,9005.727539,-3612.971191,16597.900391,91445.546875,-35605.015625,-91472.335938,44637.523438,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2033,2023-07-27,,35265.914062,19512.695312,57649.066406,34662.382812,9438.405273,3347.832275,-12182.708984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2034,2023-07-28,,-2930.459229,-7250.537109,2466.309326,34668.660156,9556.372070,-34678.417969,-12477.073242,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2035,2023-07-29,,44296.351562,25686.595703,69025.671875,34674.933594,9677.266602,12711.673828,-12767.522461,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2036,2023-07-30,,37671.894531,22751.193359,57972.500000,34681.207031,9800.680664,6243.208008,-13053.203125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Visualize the forecast
""" model.plot(forecast) """

In [None]:
""" model.plot_components(forecast) """

In [None]:
""" model.plot_parameters() """

In [None]:
""" model.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast) """

In [None]:
""" #last 30 days of the forecast
forecast.tail(2) """

### Comparision of actual data and forecasted data

In [None]:
""" #testData = pd.read_csv('/home/ajaz/DemandForecasting/Data/actualdata.csv')
testData """

In [48]:
""" testData = testData.rename(columns={'date': 'ds', 'GroupCostPrice': 'actual'})
#Convert a dataframe column to date only
testData['ds']=pd.to_datetime(testData['ds'])  """
forecast = forecast[['ds','yhat1']].tail(30)

forecast.to_csv('/home/ajaz/DemandForecasting/Data/Output/forecast.csv', index=False)

print("Sucessfully Generated the forecast Output for NeuralProphet")

Sucessfully Generated the forecast Output for NeuralProphet


In [None]:
""" #Exporting the data 

# Assuming 'actualdata' and 'forecast' are already defined and preprocessed as per your snippet

# Step 2: Filter 'actualdata' to only include dates that are in 'forecast'
filtered_actualdata = testData[testData['ds'].isin(forecast['ds'])]

# Step 3: Merge 'filtered_actualdata' and 'forecast' on the 'ds' column
combined_df = pd.merge(filtered_actualdata, forecast, on='ds', how='inner')

# Rename columns for clarity
combined_df.rename(columns={'yhat1': 'forecast', 'y': 'actual'}, inplace=True)

# Now 'combined_df' contains the date ('ds'), the actual values, and the forecasted values ('forecast')

# Export to CSV

combined_df.to_csv('/home/ajaz/DemandForecasting/Data/Output/NeuralProphetforecast.csv', index=False)


# Export to Excel
#combined_df.to_excel('/home/ajaz/DemandForecasting/Data/forecast_vs_actual.xlsx', index=False, engine='openpyxl')
 """

In [None]:
""" fig, ax = plt.subplots(figsize=(15, 5))
ax.plot(forecast['ds'], forecast['yhat1'], label='forecast')
ax.plot(testData['ds'], testData['value'], label='actual')
ax.legend(loc='upper left')

plt.show() """

## Validation and Reproducibility

In [None]:
""" df = trainData.copy()
df_train, df_test = model.split_df(df=df, freq="D", valid_p=0.2)
# Split the dataset into training and validation sets
forecast_test = model.predict(df=df_test)
metrics_test = model.test(df=df_test)
metrics_test[['MAE_val']] """