<a href="https://colab.research.google.com/github/Gajeshgif/Gajesh/blob/main/sales_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
kyanyoga_sample_sales_data_path = kagglehub.dataset_download('kyanyoga/sample-sales-data')

print('Data source import complete.')

Importing necessary Libraries.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
from prophet.plot import plot_plotly
from statsmodels.tsa.seasonal import seasonal_decompose

Data loading and Preprocessing

In [None]:
sales_df = pd.read_csv('/kaggle/input/sample-sales-data/sales_data_sample.csv', encoding='latin1')

sales_df.head()


In [None]:
sales_df['ORDERDATE'] = pd.to_datetime(sales_df['ORDERDATE'])

print(sales_df['PRODUCTLINE'].unique())

Monthly Sales Analysis for Motercycles

In [None]:
product = 'Vintage Cars'
sales_group = sales_df[sales_df['PRODUCTLINE'] == product].groupby(pd.Grouper(key='ORDERDATE', freq='ME'))['SALES'].sum().reset_index()
sales_group.rename(columns={'ORDERDATE': 'ds', 'SALES': 'y'}, inplace=True)

sales_group.dropna(inplace=True)

plt.figure(figsize=(12, 6))
plt.plot(sales_group['ds'], sales_group['y'])
plt.title(f'Monthly Sales for {product}')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.grid(True)

Time Series Decomposition

In [None]:

decomposition = seasonal_decompose(sales_group.set_index('ds')['y'], model='multiplicative')
decomposition.plot()
plt.show()

Train model with Additive Seasonality

In [None]:
model = Prophet(seasonality_mode='multiplicative')
model.fit(sales_group)

future = model.make_future_dataframe(periods=6, freq='ME')
forecast = model.predict(future)

plot_plotly(model, forecast)

Model Evaluation

In [None]:
predicted = forecast.iloc[:len(sales_group)]

y_true = sales_group['y'].values
y_pred = predicted['yhat'].values

mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAPE: {mape:.2f}%")



Actual vs Predicted Sales

In [None]:
plt.figure(figsize=(10,6))
sns.lineplot(x=predicted['ds'], y=y_true, label='Actual', linewidth=2.5)
sns.lineplot(x=predicted['ds'], y=y_pred, label='Predicted', linestyle='--', linewidth=2, color='r')
plt.title(f'Actual vs Predicted Sales - {product}')
plt.xlabel("Date")
plt.ylabel("Sales")
plt.legend()
plt.grid(True)
plt.show()

Month with Highest Sales

In [None]:
high_sales = forecast[['ds', 'yhat']].sort_values(by='yhat', ascending=False).iloc[0]

print(f"🔍 Forecast shows {product} sales will peak around: {high_sales['ds'].date()} with approx ${high_sales['yhat']:.2f}")


if len(sales_group) >= 24:
    this_year = sales_group['y'].values[-12:]
    last_year = sales_group['y'].values[-24:-12]
    growth = ((sum(this_year) - sum(last_year)) / sum(last_year)) * 100
    print(f"📈 YoY Growth in Sales: {growth:.2f}%")