<h2>Sales Prediction using Simple Linear Regession Model<h2>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error 

# Let's create some synthetic sales data! We're simulating a whole year's worth of sales.
np.random.seed(42)
dates = pd.date_range(start="2023-01-01", periods=365, freq="D")  # One year of daily data
sales = np.random.poisson(lam=100, size=len(dates)) + np.arange(len(dates)) * 0.5  # Sales with a slight upward trend

data = pd.DataFrame({"Date": dates, "Sales": sales})
data.set_index("Date", inplace=True)

#Converting dates to ordinal (days since the start date)
data["Days"] = (data.index - data.index.min()).days

# Features and target: Our model will learn to predict Sales based on Days
X = data[["Days"]]
y = data["Sales"]

# Splitting the data: We'll train on 80% and test on the remaining 20%
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the linear regression model: This is our sales prediction wizard!
model = LinearRegression()
model.fit(X_train, y_train)  # Teach the model to find patterns in our training data

# Make predictions: Let's see how well our wizard can predict sales!
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

# Evaluation time! How well did our model perform?
mae = mean_absolute_error(y_test, y_pred_test)  # Average error in our predictions
rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))  # How far off are our predictions, on average?

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

# Let's visualize the magic! Plot actual vs predicted sales
plt.figure(figsize=(10, 6))
plt.plot(X_test, y_test, "b.", label="Actual Sales")  # The real sales data
plt.plot(X_test, y_pred_test, "r-", label="Predicted Sales")  # What our model guessed
plt.title("Actual vs Predicted Sales")
plt.xlabel("Days")
plt.ylabel("Sales")
plt.legend()
plt.show()

# Predicting the future! What do the next 30 days look like?
future_days = pd.DataFrame({"Days": np.arange(data["Days"].max() + 1, data["Days"].max() + 31)})
future_sales = model.predict(future_days)  

# Let's plot the forecasted sales along with historical data
plt.figure(figsize=(10, 6))
plt.plot(data.index, data["Sales"], label="Historical Sales")  
plt.plot(pd.date_range(data.index[-1] + pd.Timedelta(days=1), periods=30), future_sales, label="Forecasted Sales")  
plt.title("Sales Forecast")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.legend()
plt.show()


<h2>Creating Synthetic Sales Data<h2>

In [None]:
import pandas as pd
import numpy as np

# Generate synthetic sales data
np.random.seed(42)
dates = pd.date_range(start="2023-01-01", periods=365, freq="D")
sales = np.random.poisson(lam=100, size=len(dates)) + np.arange(len(dates)) * 0.5


data = pd.DataFrame({"Date": dates, "Sales": sales})

# Save the DataFrame to a CSV file
data.to_csv("synthetic_sales_data.csv", index=False)

print("CSV file 'synthetic_sales_data.csv' has been created.")


<h2>Sales Prediction using Prophet model<h2>

In [None]:
import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as plt

data = pd.read_csv("synthetic_sales_data.csv")

# Rename columns to match Prophet's requirements
data.rename(columns={"Date": "ds", "Sales": "y"}, inplace=True)

data['ds'] = pd.to_datetime(data['ds'])

# Initialize the Prophet model
model = Prophet()

model.fit(data)

# Create a DataFrame to hold future dates (e.g., next 30 days)
future = model.make_future_dataframe(periods=30)

forecast = model.predict(future)

print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())

fig1 = model.plot(forecast)
plt.title("Sales Forecast with Prophet")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.show()

fig2 = model.plot_components(forecast)
plt.show()


<h2>Using the ARIMA (AutoRegressive Integrated Moving Average) model<h2>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

data = pd.read_csv("synthetic_sales_data.csv")

data['Date'] = pd.to_datetime(data['Date'])

data.set_index('Date', inplace=True)

plt.figure(figsize=(10, 6))
plt.plot(data.index, data['Sales'], label="Sales")
plt.title("Sales Over Time")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.legend()
plt.show()

# Define the ARIMA model
model = ARIMA(data['Sales'], order=(5, 1, 0))

# Fit the model
model_fit = model.fit()

# Summary of the model
print(model_fit.summary())

data['Predicted'] = model_fit.predict(start=0, end=len(data)-1, dynamic=False)

# Forecast future sales for the next 30 days
forecast = model_fit.forecast(steps=30)
forecast_dates = pd.date_range(start=data.index[-1], periods=30, freq='D')

forecast_df = pd.DataFrame({'Date': forecast_dates, 'Forecast': forecast})
forecast_df.set_index('Date', inplace=True)

print(forecast_df.head())

# Plot the actual vs predicted sales
plt.figure(figsize=(10, 6))
plt.plot(data.index, data['Sales'], label="Actual Sales")
plt.plot(data.index, data['Predicted'], label="In-sample Predictions", color='red')
plt.plot(forecast_df.index, forecast_df['Forecast'], label="Out-of-sample Forecast", color='green')
plt.title("Sales Forecast with ARIMA")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.legend()
plt.show()

# Calculate the Root Mean Squared Error (RMSE) for in-sample predictions
rmse = np.sqrt(mean_squared_error(data['Sales'], data['Predicted']))
print(f"RMSE: {rmse}")
