In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [5]:
df = pd.read_csv("/content/dynamic_pricing.csv")
print(df.head())

   Number_of_Riders  Number_of_Drivers Location_Category  \
0                90                 45             Urban   
1                58                 39          Suburban   
2                42                 31             Rural   
3                89                 28             Rural   
4                78                 22             Rural   

  Customer_Loyalty_Status  Number_of_Past_Rides  Average_Ratings  \
0                  Silver                    13             4.47   
1                  Silver                    72             4.06   
2                  Silver                     0             3.99   
3                 Regular                    67             4.31   
4                 Regular                    74             3.77   

  Time_of_Booking Vehicle_Type  Expected_Ride_Duration  \
0           Night      Premium                      90   
1         Evening      Economy                      43   
2       Afternoon      Premium                      76  

# Explotary Data Analysis

In [6]:
print(df.describe())

       Number_of_Riders  Number_of_Drivers  Number_of_Past_Rides  \
count       1000.000000        1000.000000           1000.000000   
mean          60.372000          27.076000             50.031000   
std           23.701506          19.068346             29.313774   
min           20.000000           5.000000              0.000000   
25%           40.000000          11.000000             25.000000   
50%           60.000000          22.000000             51.000000   
75%           81.000000          38.000000             75.000000   
max          100.000000          89.000000            100.000000   

       Average_Ratings  Expected_Ride_Duration  Historical_Cost_of_Ride  
count      1000.000000              1000.00000              1000.000000  
mean          4.257220                99.58800               372.502623  
std           0.435781                49.16545               187.158756  
min           3.500000                10.00000                25.993449  
25%           3.8

In [7]:
first_fig = px.scatter(df,  x= 'Expected_Ride_Duration', y= 'Historical_Cost_of_Ride', title = 'Expected Ride Duration vs Historical cost of Ride', trendline = 'ols')

first_fig.show()

In [8]:
second_fig = px.box(df, x = 'Vehicle_Type', y= 'Historical_Cost_of_Ride', title = 'Historical Cost of Ride Distribution by vehicle Type')
second_fig.show()

In [9]:
print(df.columns)


Index(['Number_of_Riders', 'Number_of_Drivers', 'Location_Category',
       'Customer_Loyalty_Status', 'Number_of_Past_Rides', 'Average_Ratings',
       'Time_of_Booking', 'Vehicle_Type', 'Expected_Ride_Duration',
       'Historical_Cost_of_Ride'],
      dtype='object')


In [10]:
for col in df.columns:
    try:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    except:
        pass
correlation_matrix = df.corr()

In [11]:
third_fig = go.Figure(data = go.Heatmap(z = correlation_matrix.values, x = correlation_matrix.columns, y = correlation_matrix.columns, colorscale='Viridis')
)
third_fig.update_layout(title='Correlation Matrix Heatmap')
third_fig.show()

Dynamic Pricing Strategy

In [12]:
import numpy as np

high_demand_percentile = 75
low_demand_percentile = 25

df['Demand_Multiplier'] = np.where(df['Number_of_Riders'] > np.percentile(df['Number_of_Riders'], high_demand_percentile),
                                   df['Number_of_Riders'] / np.percentile(df['Number_of_Riders'], high_demand_percentile),
                                   df['Number_of_Riders'] / np.percentile(df['Number_of_Riders'], low_demand_percentile),)


In [13]:

low_supply_percentile = 25
high_supply_percentile = 75

df['supply_multiplier'] = np.where(df['Number_of_Drivers'] > np.percentile(df['Number_of_Drivers'], low_supply_percentile),
                                   np.percentile(df['Number_of_Drivers'], high_supply_percentile) / df['Number_of_Drivers'],
                                   np.percentile(df['Number_of_Drivers'], low_supply_percentile) / df['Number_of_Drivers'])

In [14]:
demand_threshold_high = 1.2
demand_threshold_low = 0.8
supply_threshold_high = 0.8
supply_threshold_low = 1.2

df['Adjusted_ride_cost'] = df['Historical_Cost_of_Ride'] * (np.maximum(df['Demand_Multiplier'],demand_threshold_low)* np.maximum(df['supply_multiplier'], supply_threshold_high))

In [15]:
df.head()


Unnamed: 0,Number_of_Riders,Number_of_Drivers,Location_Category,Customer_Loyalty_Status,Number_of_Past_Rides,Average_Ratings,Time_of_Booking,Vehicle_Type,Expected_Ride_Duration,Historical_Cost_of_Ride,Demand_Multiplier,supply_multiplier,Adjusted_ride_cost
0,90,45,,,13,4.47,,,90,284.257273,1.111111,0.844444,266.710528
1,58,39,,,72,4.06,,,43,173.874753,1.45,0.974359,245.653817
2,42,31,,,0,3.99,,,76,329.795469,1.05,1.225806,424.478684
3,89,28,,,67,4.31,,,134,470.201232,1.098765,1.357143,701.155452
4,78,22,,,74,3.77,,,149,579.681422,1.95,1.727273,1952.472427


In [16]:
df['profit_percentage'] = ((df['Adjusted_ride_cost'] - df['Historical_Cost_of_Ride']) / df['Historical_Cost_of_Ride']) * 100


profitable_rides = df[df['profit_percentage'] > 0]

loss_rides = df[df['profit_percentage']<0]



## Calculate the count of profitable and loss rides

In [17]:
profitableCount = len(profitable_rides)
lossCount = len(loss_rides)



## Pie Chart for profitable and loss rides

In [21]:
import plotly.graph_objects as go

labels = ['Profitable Rides', 'Loss Rides']
values = [profitableCount, lossCount]

fourth_fig = go.Figure(data = [go.Pie(labels = labels, values = values, hole = 0.4)])
fourth_fig.update_layout(title = 'Profitable of Rides ')
fourth_fig.show()

In [22]:
fifth_fig = px.scatter(df, x = 'Expected_Ride_Duration', y = 'Adjusted_ride_cost', title = 'Expected Ride vs Cost of ride', trendline = 'ols')
fifth_fig.show()

# Training a Machine Learning

In [23]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler



In [24]:
def preprocessing(df):

  numeric_features = df.select_dtypes(include = ['float', 'int']).columns
  categorical_features= df.select_dtypes(include = ['objects']).columns
  df[numeric_features]= df[numeric_features].fillna(df[numeric_features].mean())

  for feature in numeric_features:
    a = df[feature].quantile(0.25)
    b = df[feature].quantile(0.75)

    IQR = b - a
    lower_bound = a - (1.5 * IQR)
    upper_bound = b + (1.5 * IQR)

    df[feature] = np.where((df[feature] < lower_bound)| (df[feature] > upper_bound), df[feature].mean(), df[feature])
  df[categorical_features] = df[categorical_features].fillna(df[categorical_features].mode().iloc[0])
  return df

In [25]:
df["Vehicle_Type"] = df["Vehicle_Type"].map({"Premium" : 1, "Economy" :0})

# Model for prediction of cost of ride

In [28]:
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer

x = np.array(df[["Number_of_Riders", "Number_of_Drivers", "Vehicle_Type","Expected_Ride_Duration"]])
y = np.array(df[["Adjusted_ride_cost"]])

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

y_train = y_train.ravel()
y_test = y_test.ravel()

imputer = SimpleImputer(strategy='mean')
x_train = imputer.fit_transform(x_train)
x_test = imputer.transform(x_test)

from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
model.fit(x_train, y_train)

##Testing Model

In [36]:
def get_vehicle_type_numeric(vehicle_type):
  vehicle_type_mapping = {
      "Premium" : 1,
      "Economy" : 0
  }
  vehicle_type_numeric = vehicle_type_mapping.get(vehicle_type.capitalize())
  return vehicle_type_numeric


def pricePRediction(number_of_riders, number_of_drivers, vehicle_type, Excepted_Ride_Duration):
  vehicle_type_numeric = get_vehicle_type_numeric(vehicle_type)
  if vehicle_type_numeric is None:
    raise ValueError("Invalid vehicle type")
  Data_input = np.array([[number_of_riders, vehicle_type_numeric, Excepted_Ride_Duration]])
  predicted_price = model.predict(Data_input)
  return predicted_price

input_no_of_Riders = 30
input_no_of_Drivers = 25

input_vehicle_type = "Economy"

Expected_Ride_Duration = 30

predicted_price = pricePRediction(input_no_of_Riders, input_no_of_Drivers, input_vehicle_type, Expected_Ride_Duration )
print("Predicted Price for vehicle Ride : ", predicted_price)

Predicted Price for vehicle Ride :  [267.36491453]


## Comparison Between actual and predicted

In [40]:
y_pred = model.predict(x_test)

fifth_fig = go.Figure()

fifth_fig.add_trace(go.Scatter(
    x = y_test.flatten(),
    y = y_pred,

    mode = 'markers',
    name = 'Actual va Predicted'
))

fifth_fig.add_trace(go.Scatter(
    x = [min(y_test.flatten()), max(y_test.flatten())],
    y = [min(y_test.flatten()), max(y_test.flatten())],

    mode = 'lines',
    name = 'Ideal',
    line = dict(color = 'black', dash='dash')

))

fifth_fig.update_layout(
    title = 'Actual vs Predicted price of Ride',
    xaxis_title = 'Actual Values',
    yaxis_title = 'Predicted Values',
    showlegend = True,
)

fifth_fig.show()