In [2]:
!pip install xgboost

Collecting xgboost
  Obtaining dependency information for xgboost from https://files.pythonhosted.org/packages/24/ec/ad387100fa3cc2b9b81af0829b5ecfe75ec5bb19dd7c19d4fea06fb81802/xgboost-2.0.3-py3-none-win_amd64.whl.metadata
  Downloading xgboost-2.0.3-py3-none-win_amd64.whl.metadata (2.0 kB)
Downloading xgboost-2.0.3-py3-none-win_amd64.whl (99.8 MB)
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB 262.6 kB/s eta 0:06:20
   ---------------------------------------- 0.1/99.8 MB 328.2 kB/s eta 0:05:04
   ---------------------------------------- 0.1/99.8 MB 711.9 kB/s eta 0:02:20
   ---------------------------------------- 0.1/99.8 MB 711.9 kB/s eta 0:02:20
   ---------------------------------------- 0.4/99.8

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
import datetime
import pickle
# Load the data
data = pd.read_csv("E:\product_salesdata.csv")
# Convert the Date column to datetime
data['Date'] = pd.to_datetime(data['Date'])

#Aggregate the data on a weekly basis
data_weekly = data.groupby(['ProductID', pd.Grouper(key='Date', freq='W-MON')]).agg({
    'Sales': 'sum',
    'Price': 'first',  # Assume price remains constant within a week
    'CompetitionPrice': 'first',# Assume competition price remains constant within a week
    'Holiday':'sum',
    'StartingWeek':'sum',
    'Weekend':'sum'

}).reset_index()


for lag in range(1, 4):  # Add lag features for the previous 3 weeks
    data_weekly[f'Sales_lag_{lag}'] = data_weekly.groupby('ProductID')['Sales'].shift(lag)

# Drop rows with missing values due to lag features
data_weekly.dropna(inplace=True)
data_weekly[250:300]

Unnamed: 0,ProductID,Date,Sales,Price,CompetitionPrice,Holiday,StartingWeek,Weekend,Sales_lag_1,Sales_lag_2,Sales_lag_3
256,2,2020-11-30,355.5,174.87,209.3,0,0,2,359.0,349.0,312.5
257,2,2020-12-07,371.0,233.98,251.02,0,7,2,355.5,359.0,349.0
258,2,2020-12-14,340.0,256.87,249.83,0,0,2,371.0,355.5,359.0
259,2,2020-12-21,339.5,288.7,279.61,0,0,2,340.0,371.0,355.5
260,2,2020-12-28,389.0,229.92,258.28,1,0,2,339.5,340.0,371.0
261,2,2021-01-04,300.0,213.26,179.23,0,4,2,389.0,339.5,340.0
262,2,2021-01-11,369.5,231.79,229.47,0,3,2,300.0,389.0,339.5
263,2,2021-01-18,348.0,290.01,329.45,0,0,2,369.5,300.0,389.0
264,2,2021-01-25,360.0,245.42,285.48,0,0,2,348.0,369.5,300.0
265,2,2021-02-01,346.0,263.09,219.32,0,1,2,360.0,348.0,369.5


In [5]:
# Select features and target variable
X = data_weekly.drop(['Date', 'Sales'], axis=1)  # Features: Exclude Date and Sales columns
y = data_weekly['Sales']  # Target variable: Sales

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)

# Define the XGBoost model
model = XGBRegressor(objective ='reg:squarederror', n_estimators=100, max_depth=3, learning_rate=0.1, random_state=42)

# Train the model
model.fit(X_train, y_train)
pickle.dump(model,open('ProductForecast.pkl','wb'))
# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Calculate Root Mean Squared Percentage Error (RMSPE)
rmspe = np.sqrt(np.mean(np.square((y_test- y_pred) / y_test))) * 100

print(f"Root Mean Squared Percentage Error: {rmspe:.2f}%")

Mean Squared Error: 439.9384756500165
Root Mean Squared Percentage Error: 10.54%


# **For forecasting**

In [6]:
model=pickle.load(open("ProductForecast.pkl",'rb'))

# Define the product ID and week numbers of the new month
new_month_product_id = 5  # Example: Product 5
num_weeks_in_month = 4  # Adjust based on the number of weeks in the month
week_numbers = list(range(1, num_weeks_in_month + 1))

new_month_data = pd.DataFrame({
    'ProductID': [new_month_product_id] * num_weeks_in_month,
    'WeekNumber': week_numbers
})

# Filter features based on the product ID
product_features = X[X['ProductID'] == new_month_product_id]

# Predict sales for the new month
predicted_sales = model.predict(product_features)

# Display the predicted sales
print("Predicted Sales for Product", new_month_product_id, "in the New Month:")
for week, sales in zip(week_numbers, predicted_sales):
    print(f"Week {week}: {sales}")


Predicted Sales for Product 5 in the New Month:
Week 1: 139.12161254882812
Week 2: 141.47061157226562
Week 3: 141.73562622070312
Week 4: 122.6465072631836
