In [8]:
#Import Required Libraries

import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor

import warnings
warnings.filterwarnings("ignore")


In [9]:
#Load Dataset
df = pd.read_csv(r'C:\Users\pooji\OneDrive\Attachments\Desktop\Priceoptima\based_pricing.csv', parse_dates=["Date"])
df = df.sort_values("Date").reset_index(drop=True)

df.head()


Unnamed: 0,Date,Product ID,Product Name,Category,Pricing_Type,Price,Cost Price,Units Sold,Visitors,Conversion Rate %,...,rolling_units_30,rolling_volatility_7,price_change_pct,pct_change_units,elasticity,elasticity_class,profit_per_unit,profit_margin_clean,weekend_price_interaction,season_discount_interaction
0,2024-01-01,P039,Rice 5kg,Grocery,Discounted,267,242,347,2718,12.77,...,,,,,,,25,0.09,0,26.7
1,2024-01-02,P029,Rice 5kg,Snacks,Surge,462,346,14,1461,0.96,...,,,0.730337,-0.959654,-1.313988,Highly Elastic,116,0.25,0,0.0
2,2024-01-03,P015,Shampoo,Snacks,Surge,466,166,413,3330,12.4,...,,,0.008658,28.5,3291.75,Highly Inelastic,300,0.64,0,0.0
3,2024-01-04,P043,Shampoo,Personal Care,Surge,365,35,352,321,109.66,...,,,-0.216738,-0.1477,0.681466,Inelastic,330,0.9,0,0.0
4,2024-01-05,P008,Chocolate,Dairy,Discounted,266,138,135,192,70.31,...,,,-0.271233,-0.616477,2.272871,Highly Inelastic,128,0.48,0,26.6


In [10]:
#Handle Missing Values
df.fillna(method="ffill", inplace=True)
df.fillna(method="bfill", inplace=True)

df.isnull().sum()


Date                           0
Product ID                     0
Product Name                   0
Category                       0
Pricing_Type                   0
Price                          0
Cost Price                     0
Units Sold                     0
Visitors                       0
Conversion Rate %              0
Revenue                        0
Profit                         0
Profit Margin %                0
Stock Level                    0
Inventory Turnover             0
Competitor Price               0
competitor_price_diff          0
competitor_cheaper             0
YEAR                           0
MONTH                          0
DAY                            0
day_of_week                    0
is_weekend                     0
Season                         0
price_lag_1                    0
price_lag_7                    0
units_lag_1                    0
units_lag_7                    0
rolling_units_7                0
rolling_units_30               0
rolling_vo

In [11]:
#Encode Categorical Variables
cat_cols = df.select_dtypes(include="object").columns

le = LabelEncoder()
for col in cat_cols:
    df[col] = le.fit_transform(df[col])

print("Categorical encoding completed")


Categorical encoding completed


In [23]:
#Define Features (X) and Target (y)
target = "Units Sold"
features = [c for c in df.columns if c not in ["Units Sold", "Revenue", "Date"]]

X = df[features]
y = df[target]

print("Total features:", len(features))


Total features: 36


In [24]:
#Time-Based Train-Test Split
split_date = df["Date"].quantile(0.80)

train_idx = df["Date"] <= split_date
test_idx  = df["Date"] > split_date

X_train, X_test = X[train_idx], X[test_idx]
y_train, y_test = y[train_idx], y[test_idx]


In [14]:
#Train XGBoost Regressor
from sklearn.ensemble import GradientBoostingRegressor

gbr = GradientBoostingRegressor(random_state=42)

gbr.fit(X_train, y_train)
gbr_preds = gbr.predict(X_test)

gbr_mse = mean_squared_error(y_test, gbr_preds)
gbr_rmse = np.sqrt(gbr_mse)
gbr_mae = mean_absolute_error(y_test, gbr_preds)

print(f"GBR RMSE: {gbr_rmse:.2f}")
print(f"GBR MAE : {gbr_mae:.2f}")


GBR RMSE: 28.89
GBR MAE : 20.01


In [15]:
#Train LightGBM Regressor
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(random_state=42)

rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)

rf_mse = mean_squared_error(y_test, rf_preds)
rf_rmse = np.sqrt(rf_mse)
rf_mae = mean_absolute_error(y_test, rf_preds)

print(f"RF RMSE: {rf_rmse:.2f}")
print(f"RF MAE : {rf_mae:.2f}")


RF RMSE: 33.02
RF MAE : 20.41


In [16]:
#ML-Based Dynamic Pricing Logic
test_data = df.loc[test_idx].copy()


In [17]:
def rule_based_price(price, stock):
    if stock < 50:
        return price * 1.10
    elif stock > 200:
        return price * 0.90
    else:
        return price

test_data["Rule_Based_Price"] = test_data.apply(
    lambda x: rule_based_price(x["Price"], x["Stock Level"]),
    axis=1
)


In [18]:
test_data["Predicted_Demand"] = rf_preds  # or gbr_preds

avg_demand = test_data["Predicted_Demand"].mean()

def ml_price(price, demand):
    if demand > avg_demand:
        return price * 1.05
    else:
        return price * 0.95

test_data["ML_Price"] = test_data.apply(
    lambda x: ml_price(x["Price"], x["Predicted_Demand"]),
    axis=1
)


In [19]:
#Backtesting Revenue Calculation
test_data["Static_Revenue"] = test_data["Price"] * test_data["Units Sold"]
test_data["Rule_Based_Revenue"] = test_data["Rule_Based_Price"] * test_data["Units Sold"]
test_data["ML_Revenue"] = test_data["ML_Price"] * test_data["Units Sold"]


In [20]:
#Revenue Lift Calculation
static_rev = test_data["Static_Revenue"].sum()
rule_rev   = test_data["Rule_Based_Revenue"].sum()
ml_rev     = test_data["ML_Revenue"].sum()

revenue_lift = ((ml_rev - static_rev) / static_rev) * 100

print(f"Static Revenue     : ₹{static_rev:,.2f}")
print(f"Rule-Based Revenue : ₹{rule_rev:,.2f}")
print(f"ML-Based Revenue   : ₹{ml_rev:,.2f}")
print(f"Revenue Lift (%)   : {revenue_lift:.2f}%")


Static Revenue     : ₹14,423,871.00
Rule-Based Revenue : ₹13,311,618.40
ML-Based Revenue   : ₹14,832,347.35
Revenue Lift (%)   : 2.83%


In [21]:
#Revenue Comparison Table
pd.DataFrame({
    "Pricing Strategy": ["Static", "Rule-Based", "ML-Based"],
    "Revenue": [static_rev, rule_rev, ml_rev]
})


Unnamed: 0,Pricing Strategy,Revenue
0,Static,14423871.0
1,Rule-Based,13311618.4
2,ML-Based,14832347.35
