In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
df=pd.read_csv('dynamic_pricing.csv')

In [3]:
print("shape:",df.shape)

shape: (1000, 10)


In [4]:
mc=df.isnull().sum()
dc=df.duplicated().sum()
print("missing value: ",mc)
print("duplicate value: ",dc)

missing value:  Number_of_Riders           0
Number_of_Drivers          0
Location_Category          0
Customer_Loyalty_Status    0
Number_of_Past_Rides       0
Average_Ratings            0
Time_of_Booking            0
Vehicle_Type               0
Expected_Ride_Duration     0
Historical_Cost_of_Ride    0
dtype: int64
duplicate value:  0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Number_of_Riders         1000 non-null   int64  
 1   Number_of_Drivers        1000 non-null   int64  
 2   Location_Category        1000 non-null   object 
 3   Customer_Loyalty_Status  1000 non-null   object 
 4   Number_of_Past_Rides     1000 non-null   int64  
 5   Average_Ratings          1000 non-null   float64
 6   Time_of_Booking          1000 non-null   object 
 7   Vehicle_Type             1000 non-null   object 
 8   Expected_Ride_Duration   1000 non-null   int64  
 9   Historical_Cost_of_Ride  1000 non-null   float64
dtypes: float64(2), int64(4), object(4)
memory usage: 78.3+ KB


In [6]:
print(df.columns)
df.describe()

Index(['Number_of_Riders', 'Number_of_Drivers', 'Location_Category',
       'Customer_Loyalty_Status', 'Number_of_Past_Rides', 'Average_Ratings',
       'Time_of_Booking', 'Vehicle_Type', 'Expected_Ride_Duration',
       'Historical_Cost_of_Ride'],
      dtype='object')


Unnamed: 0,Number_of_Riders,Number_of_Drivers,Number_of_Past_Rides,Average_Ratings,Expected_Ride_Duration,Historical_Cost_of_Ride
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,60.372,27.076,50.031,4.25722,99.588,372.502623
std,23.701506,19.068346,29.313774,0.435781,49.16545,187.158756
min,20.0,5.0,0.0,3.5,10.0,25.993449
25%,40.0,11.0,25.0,3.87,59.75,221.365202
50%,60.0,22.0,51.0,4.27,102.0,362.019426
75%,81.0,38.0,75.0,4.6325,143.0,510.497504
max,100.0,89.0,100.0,5.0,180.0,836.116419


In [7]:
num_cols=df.select_dtypes(include=np.number).columns.tolist()
cat_cols=df.select_dtypes(exclude=np.number).columns.tolist()
print("Numerical Columns:")
for num in num_cols:
    print(num)
print("\nCategorical Columns:")
for cat in cat_cols:
    print(cat)

Numerical Columns:
Number_of_Riders
Number_of_Drivers
Number_of_Past_Rides
Average_Ratings
Expected_Ride_Duration
Historical_Cost_of_Ride

Categorical Columns:
Location_Category
Customer_Loyalty_Status
Time_of_Booking
Vehicle_Type


In [8]:
# IQR Method
num_cols = df.select_dtypes(include=["int64","float64"]).columns

outlier_summary = {}

for col in num_cols:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    outliers = df[(df[col] < lower) | (df[col] > upper)][col]
    outlier_summary[col] = len(outliers)

outlier_summary



{'Number_of_Riders': 0,
 'Number_of_Drivers': 10,
 'Number_of_Past_Rides': 0,
 'Average_Ratings': 0,
 'Expected_Ride_Duration': 0,
 'Historical_Cost_of_Ride': 0}

In [9]:
for col in num_cols:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    df[col] = np.where(df[col] < lower, lower,
                       np.where(df[col] > upper, upper, df[col]))
    print(f"Outliers in {col}: {len(df[(df[col] < lower) | (df[col] > upper)])}")


Outliers in Number_of_Riders: 0
Outliers in Number_of_Drivers: 0
Outliers in Number_of_Past_Rides: 0
Outliers in Average_Ratings: 0
Outliers in Expected_Ride_Duration: 0
Outliers in Historical_Cost_of_Ride: 0


In [10]:


df["Price"] = df["Historical_Cost_of_Ride"]
df["Completed_Rides"] = df[["Number_of_Riders", "Number_of_Drivers"]].min(axis=1)
df["Booking_Intents"] = df["Number_of_Riders"]
df["Cancelled_Rides"] = df["Booking_Intents"] - df["Completed_Rides"]
df["Cost"] = df["Price"] * 0.7   # assume 70% of fare is operating cost




df["Revenue"] = df["Price"] * df["Completed_Rides"]
df["Profit"] = (df["Price"] - df["Cost"]) * df["Completed_Rides"]


baseline_price = df["Price"].mean()
df["Baseline_Revenue"] = baseline_price * df["Completed_Rides"]
df["Revenue_Lift_%"] = ((df["Revenue"] - df["Baseline_Revenue"]) / df["Baseline_Revenue"]) * 100


df["Gross_Margin_%"] = ((df["Revenue"] - (df["Cost"] * df["Completed_Rides"])) / df["Revenue"]) * 100


df["Conversion_Rate_%"] = (df["Completed_Rides"] / df["Booking_Intents"]) * 100
df["Cancellation_Rate_%"] = (df["Cancelled_Rides"] / df["Booking_Intents"]) * 100


df["Price_Shift"] = df["Price"].shift(1)
df["Price_Change_%"] = ((df["Price"] - df["Price_Shift"]) / df["Price_Shift"]) * 100
df["Price_Change_Flag"] = np.where(df["Price_Change_%"].abs() > 15, 1, 0)


kpi_summary = {
    "Revenue (₹)": df["Revenue"].sum(),
    "Profit (₹)": df["Profit"].sum(),
    "Revenue Lift (%)": df["Revenue_Lift_%"].mean(),
    "Gross Margin (%)": df["Gross_Margin_%"].mean(),
    "Conversion Rate (%)": df["Conversion_Rate_%"].mean(),
    "Price Change Rate (%)": df["Price_Change_Flag"].mean() * 100,
    "Cancellation Rate (%)": df["Cancellation_Rate_%"].mean()
}

kpi_table = pd.DataFrame([kpi_summary])


print("KPI Summary Table:")
print(kpi_table.to_string(index=False))



KPI Summary Table:
 Revenue (₹)   Profit (₹)  Revenue Lift (%)  Gross Margin (%)  Conversion Rate (%)  Price Change Rate (%)  Cancellation Rate (%)
1.013314e+07 3.039942e+06     -4.206413e-15              30.0            43.763674                   86.6              56.236326


In [17]:
from getpass import getpass

# Enter your GitHub token (hidden input)
token = getpass('Enter your GitHub token: ')

username = "Deepikadhinakaran"
repo = "AI_price_optima"

# Build the repo URL with token
repo_url = f"https://{username}:{token}@github.com/{username}/{repo}.git"

# Clone using Python variable (note the curly braces)
!git clone {repo_url}


Enter your GitHub token: ··········
Cloning into 'AI_price_optima'...
