**Project 1: Sales Forecasting with Linear Regression
**

***Importing necessary libraries***

In [None]:
# %pip install pandas
# %pip install numpy
# %pip install matplot
# %pip install scikit-learn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

***DataSet from kaggle***

In [None]:
df = pd.read_csv("sales_data.csv")
print(df)

***Data Preprocessing***

In [None]:
df.columns = [col.strip() for col in df.columns]  
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")

if "turnover" not in df.columns:
    df["turnover"] = df["Price Each"] * df["Quantity Ordered"]

# Extract month and year from the date
df["month"] = df["Date"].dt.month
df["year"] = df["Date"].dt.year

***Handling Null Values, Features and Target***

In [None]:
df.dropna(subset=["month", "year", "Quantity Ordered", "turnover"], inplace=True)

# Features and target
X = df[["month", "year", "Quantity Ordered"]]
y = df["turnover"]

***Training Linear Regression Model***

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

***Predictions and Evalauation of Model***

In [None]:

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"Mean Absolute Error: {mae:.2f}")
print(f"Mean Squared Error: {mse:.2f}")

# Performance Metrics
r2 = r2_score(y_test, y_pred)
print(f"R^2 Score: {r2:.2f}")

***Actual vs Predicted Plot***

In [None]:
plt.figure(figsize=(10,5))
plt.scatter(y_test, y_pred, color="blue", alpha=0.5, label="Predictions")
plt.plot([y.min(), y.max()], [y.min(), y.max()], linestyle="--", color="red", label="Perfect Fit")
plt.xlabel("Actual Revenue")
plt.ylabel("Predicted Revenue")
plt.title("Actual vs Predicted Sales")
plt.legend()
plt.tight_layout()
plt.show()

***Graphical Forecast: Plot actual vs predicted revenue***

In [None]:
y_pred_series = pd.Series(y_pred, index=y_test.index)
sorted_idx = y_test.sort_index().index
actual_sorted = y_test.loc[sorted_idx]
pred_sorted = y_pred_series.loc[sorted_idx]
residuals = actual_sorted - pred_sorted


In [None]:
# Prepare consistent x-axis positions
x = np.arange(len(actual_sorted))
bar_width = 0.4

fig, axs = plt.subplots(2, 1, figsize=(14, 14), sharex=True,gridspec_kw={'hspace': 0.4}, constrained_layout=True)

# Line Plot
axs[0].plot(x, actual_sorted.values, label="Actual", color="green", marker='o', linestyle='-', linewidth=2)
axs[0].plot(x, pred_sorted.values, label="Predicted", color="darkorange", marker='x', linestyle='--', linewidth=2)
axs[0].set_title("Forecast Trend: Actual vs Predicted", fontsize=14, fontweight='bold')
axs[0].set_ylabel("Revenue")
axs[0].legend(loc="upper left")
axs[0].grid(True, linestyle='--', alpha=0.5)

# Residual Plot
axs[1].fill_between(x, residuals.values, 0, color='gray', alpha=0.3, label="Residuals")
axs[1].plot(x, residuals.values, color="purple", linestyle='-', marker='d', label="Error Line")
axs[1].axhline(0, color='red', linestyle='--')
axs[1].set_title("Forecast Error (Residuals)", fontsize=14, fontweight='bold')
axs[1].set_xlabel("Sample Index")
axs[1].set_ylabel("Error")
axs[1].legend(loc="lower left")
axs[1].grid(True, linestyle='--', alpha=0.4)

plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.suptitle("Graphical Forecast Dashboard", fontsize=16, fontweight='bold')
plt.show()


***Tabular Forecasts:A sample of actual vs predicted in a table***

In [None]:
forecast_df = pd.DataFrame({
    "Actual": y_test,
    "Predicted": y_pred
}).reset_index(drop=True)
print("\nTabular Forecasts  (first 20 rows):")
print(forecast_df.head(20))

***Forcast for upcoming year 2026***

In [None]:
future_months = pd.DataFrame({
    "month": [1, 2, 3, 4, 5, 6,7,8,9,10,11,12],
    "year": [2026] * 12,
    "Quantity Ordered": [df["Quantity Ordered"].mean()] * 12
})
future_predictions = model.predict(future_months)
future_months["Predicted Turnover"] = future_predictions

# as bar chart
plt.figure(figsize=(14,6))
plt.bar(future_months["month"], future_months["Predicted Turnover"], color='cornflowerblue')
plt.xlabel("Month")
plt.ylabel("Predicted Revenue")
plt.title("Forecasted Sales Revenue for 2020 (Jan–Dec)")
plt.xticks(future_months["month"])
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()

print("Forecast for upcoming year 2026:\n")
for month, prediction in zip(future_months["month"], future_months["Predicted Turnover"]):
    print(f"Month {month}: Predicted Turnover = {prediction:,.2f}")

# Save to CSV if user wants to save then change index
future_months.to_csv("future_sales_forecast.csv", index=False)