In [1]:
# STEP 1: Install Prophet
!pip install prophet

# STEP 2: Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
from prophet.plot import plot_plotly
import plotly.graph_objs as go
import plotly.express as px
from statsmodels.tsa.seasonal import seasonal_decompose
# STEP 3: Load and Preprocess Data
df = pd.read_csv('/kaggle/input/sample-sales-data/sales_data_sample.csv', encoding='latin1')

# Rename for clarity
df.rename(columns={'ORDERDATE': 'OrderDate', 'SALES': 'Sales', 'PRODUCTLINE': 'ProductLine'}, inplace=True)

# Convert date
df['OrderDate'] = pd.to_datetime(df['OrderDate'])

# Check unique product lines
print(df['ProductLine'].unique())
# STEP 4: Group Monthly Sales for One Product Line (Change as needed)
category = 'Classic Cars'
df_group = df[df['ProductLine'] == category].groupby(pd.Grouper(key='OrderDate', freq='M'))['Sales'].sum().reset_index()

# Rename for Prophet
df_group.rename(columns={'OrderDate': 'ds', 'Sales': 'y'}, inplace=True)

# Drop missing
df_group.dropna(inplace=True)

# Plot original data
plt.figure(figsize=(10,5))
sns.lineplot(data=df_group, x='ds', y='y', linewidth=2.5)
plt.title(f"Monthly Sales for {category}")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.grid(True)
plt.show()
# STEP 5: Time Series Decomposition
decomposition = seasonal_decompose(df_group.set_index('ds')['y'], model='multiplicative')
decomposition.plot()
plt.tight_layout()
plt.show()
# STEP 6: Train Prophet with Multiplicative Seasonality
model = Prophet(seasonality_mode='multiplicative')
model.fit(df_group)

# Predict future
future = model.make_future_dataframe(periods=6, freq='M')
forecast = model.predict(future)

# Plotly interactive forecast
plot_plotly(model, forecast)
# STEP 7: Model Evaluation – MAE, RMSE, MAPE
# Trim prediction to match known values
predicted = forecast.iloc[:len(df_group)]

y_true = df_group['y'].values
y_pred = predicted['yhat'].values

mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAPE: {mape:.2f}%")
# STEP 8: Actual vs Predicted Plot
plt.figure(figsize=(10,6))
sns.lineplot(x=predicted['ds'], y=y_true, label='Actual', linewidth=2.5)
sns.lineplot(x=predicted['ds'], y=y_pred, label='Predicted', linestyle='--', linewidth=2.5)
plt.title(f'Actual vs Predicted Sales - {category}')
plt.xlabel("Date")
plt.ylabel("Sales")
plt.legend()
plt.grid(True)
plt.show()
# Find month with highest predicted sales
peak_month = forecast[['ds', 'yhat']].sort_values(by='yhat', ascending=False).iloc[0]

print(f"🔍 Forecast shows {category} sales will peak around: {peak_month['ds'].date()} with approx ${peak_month['yhat']:.2f}")

# YoY growth (if enough data)
if len(df_group) >= 24:
    this_year = df_group['y'].values[-12:]
    last_year = df_group['y'].values[-24:-12]
    growth = ((sum(this_year) - sum(last_year)) / sum(last_year)) * 100
    print(f"📈 YoY Growth in Sales: {growth:.2f}%")



FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/sample-sales-data/sales_data_sample.csv'