In [None]:
import pandas as pd
from fbprophet import Prophet
from sklearn.model_selection import ParameterGrid, TimeSeriesSplit
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv("sales_data.csv")

# Preprocess the dataset
data['ds'] = pd.to_datetime(data['ds'])
data['y'] = pd.to_numeric(data['y'])

# Split the dataset into train and test sets
train_size = int(len(data) * 0.8)
train_data = data[:train_size]
test_data = data[train_size:]

# Hyperparameter tuning
param_grid = {
    'changepoint_prior_scale': [0.01, 0.1, 1, 10],
    'seasonality_prior_scale': [0.01, 0.1, 1, 10],
    'seasonality_mode': ['additive', 'multiplicative']
}

best_mae = float('inf')
best_params = {}

for params in ParameterGrid(param_grid):
    model = Prophet(**params)
    model.fit(train_data)

    # Cross-validation
    tscv = TimeSeriesSplit(n_splits=5)
    mae_scores = []

    for train_index, val_index in tscv.split(train_data):
        train_subset = train_data.iloc[train_index]
        val_subset = train_data.iloc[val_index]

        model.fit(train_subset)
        forecast = model.predict(val_subset['ds'].to_frame())

        predictions = forecast['yhat']
        actual_values = val_subset['y']
        mae = mean_absolute_error(actual_values, predictions)
        mae_scores.append(mae)

    avg_mae = sum(mae_scores) / len(mae_scores)

    if avg_mae < best_mae:
        best_mae = avg_mae
        best_params = params

# Train the best model on the entire training set with optimized hyperparameters
best_model = Prophet(**best_params)
best_model.fit(train_data)

# Generate future dates for forecasting
future_dates = best_model.make_future_dataframe(periods=len(test_data))

# Perform the forecasting
forecast = best_model.predict(future_dates)

# Extract the predicted sales values for the test set period
predictions = forecast['yhat'].tail(len(test_data))

# Evaluate the model
actual_values = test_data['y']
mae_baseline = mean_absolute_error(actual_values, train_data['y'].mean())
mae_forecast = mean_absolute_error(actual_values, predictions)

# Calculate improvement in forecast accuracy
improvement = (mae_baseline - mae_forecast) / mae_baseline * 100

# Plot the actual vs. predicted sales values
plt.figure(figsize=(10, 6))
plt.plot(test_data['ds'], actual_values, label='Actual')
plt.plot(test_data['ds'], predictions, label='Predicted')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.title('Actual vs. Predicted Sales')
plt.legend()
plt.show()

# Print the improvement in forecast accuracy and best hyperparameters
print("Improvement in Forecast Accuracy: {:.2f}%".format(improvement))
print("Best Hyperparameters:", best_params)
