# How can we develop a Python-based dynamic pricing model that adjusts product or service prices in real time—based on factors like demand, supply, competitor pricing, and historical sales—to maximize revenue and profit?

In [11]:
# 📦 Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_squared_error

In [13]:
# Load dataset
df = pd.read_csv(r"C:\Users\tajud\OneDrive\Desktop\NRIT\sudents and projects\S1\Regression\dynamic_pricing.csv")
df.dropna()

Unnamed: 0,Number_of_Riders,Number_of_Drivers,Location_Category,Customer_Loyalty_Status,Number_of_Past_Rides,Average_Ratings,Time_of_Booking,Vehicle_Type,Expected_Ride_Duration,Historical_Cost_of_Ride
0,90,45,Urban,Silver,13,4.47,Night,Premium,90,284.257273
1,58,39,Suburban,Silver,72,4.06,Evening,Economy,43,173.874753
2,42,31,Rural,Silver,0,3.99,Afternoon,Premium,76,329.795469
3,89,28,Rural,Regular,67,4.31,Afternoon,Premium,134,470.201232
4,78,22,Rural,Regular,74,3.77,Afternoon,Economy,149,579.681422
...,...,...,...,...,...,...,...,...,...,...
995,33,23,Urban,Gold,24,4.21,Morning,Premium,11,91.389526
996,84,29,Urban,Regular,92,4.55,Morning,Premium,94,424.155987
997,44,6,Suburban,Gold,80,4.13,Night,Premium,40,157.364830
998,53,27,Suburban,Regular,78,3.63,Night,Premium,58,279.095048


In [15]:
# 🔁 Encode all categorical columns
df = pd.get_dummies(df, drop_first=True)  # Handles 'Silver', 'Gold', etc.

In [17]:
# 🎯 Define features and target
X = df.drop(columns=['Historical_Cost_of_Ride'])  # Replace with your target column
y = df['Historical_Cost_of_Ride']

In [19]:
# 📏 Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [21]:
# 🔀 Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [23]:
# Define regression models
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "XGBoost": XGBRegressor(eval_metric='rmse', use_label_encoder=False, random_state=42)
}

In [25]:
# Train and evaluate
results = []
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    r2 = r2_score(y_test, preds)
    rmse = mean_squared_error(y_test, preds, squared=False)
    results.append((name, round(r2, 4), round(rmse, 2)))

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [27]:
# Display results
results_df = pd.DataFrame(results, columns=["Model", "R2_Score", "RMSE"]).sort_values(by="R2_Score", ascending=False)
print(results_df)

               Model  R2_Score   RMSE
2   Lasso Regression    0.8756  67.34
1   Ridge Regression    0.8753  67.44
0  Linear Regression    0.8752  67.44
3      Random Forest    0.8525  73.33
4            XGBoost    0.8352  77.51
