In [None]:
#Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor

# Reading the Data and Performing EDA
df=pd.read_csv('dynamic_pricing.csv')
df.head()
df.describe()
df.info()
df['Location_Category'].value_counts()
df['Customer_Loyalty_Status'].value_counts()
df['Time_of_Booking'].value_counts()
df['Vehicle_Type'].value_counts()

#Visualization
plt.figure(figsize=(8,6))
fig=px.scatter(df,df['Expected_Ride_Duration'],df['Historical_Cost_of_Ride'])
fig.show()
fig=px.box(df,'Vehicle_Type','Historical_Cost_of_Ride')
fig.show()
# Implementing Pricing Strategy
high_demand_multiplier=75
low_demand_multiplier=25
df['demand_multiplier']=np.where(df['Number_of_Riders']>np.percentile(df['Number_of_Riders'],high_demand_multiplier),
                      df['Number_of_Riders']/np.percentile(df['Number_of_Riders'],high_demand_multiplier),
                      df['Number_of_Riders']/np.percentile(df['Number_of_Riders'],low_demand_multiplier))

high_supply_multiplier=25
low_supply_multiplier=75
df['supplier_multiplier']=np.where(df['Number_of_Drivers']>np.percentile(df['Number_of_Drivers'],high_supply_multiplier),
                                   df['Number_of_Drivers']/np.percentile(df['Number_of_Drivers'],high_supply_multiplier),
                                   df['Number_of_Drivers']/np.percentile(df['Number_of_Drivers'],low_supply_multiplier))

high_demand_threshold=1.2
low_demand_threshold=0.8
high_supply_threshold=1.2
low_supply_threshold=0.8
df['dynamic_price']=df['Historical_Cost_of_Ride']*(np.maximum(df['demand_multiplier'],low_demand_threshold)*np.maximum(df['supplier_multiplier'],low_supply_threshold))

#Calculating the Profit and Loss Percentage
df['profit_percentage']=((df['dynamic_price']-df['Historical_Cost_of_Ride'])/df['Historical_Cost_of_Ride'])*100
profitable_rides=df[df['profit_percentage']>0]
loss_rides=df[df['profit_percentage']<0]

profit_count=len(profitable_rides)
loss_count=len(loss_rides)

labels=['profit','loss']
values=[profit_count,loss_count]
#visualization
fig=go.Figure(data=[go.Pie(labels=labels,values=values,hole=0.3)])
fig.update_layout(title='profitabiltiy of rides')
fig.show()

fig=px.scatter(df,df['Expected_Ride_Duration'],df['dynamic_price'])
fig.show()

# Data Pre-Processing
def pre_processing(df):
    numeric_features = df.select_dtypes(include=['float', 'int']).columns
    categorical_features = df.select_dtypes(include=['object']).columns

    df[numeric_features] = df[numeric_features].fillna(df[numeric_features].mean())
    for feature in numeric_features:
        Q1 = df[feature].quantile(0.25)
        Q3 = df[feature].quantile(0.75)
        IQR = Q3 - Q1
        MIN = Q1 - (1.5 * IQR)
        MAX = Q3 + (1.5 * IQR)
        df[feature] = np.where(df[feature] < MIN | (df[feature] > MAX,
                                                    df[feature].mean(), df[feature]))
    df[categorical_features] = df[categorical_features].fillna(df[categorical_features].mode().iloc[0])
    return df
df['Vehicle_Type'] = df['Vehicle_Type'].map({"Premium": 1,
                                             "Economy": 0})
#Splitting the Data
x = np.array(df[['Number_of_Drivers', "Number_of_Riders", "Expected_Ride_Duration", "Historical_Cost_of_Ride"]])
y = np.array(df[['dynamic_price']])
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
y_train = y_train.ravel()
y_test = y_test.ravel()
#Model Selection
dtree = RandomForestRegressor()
dtree.fit(x_train, y_train)
def get_vehicle_type_numeric(vehicle_type):
    vehicle_type_mapping = {
        "Premium": 1,
        "Economy": 0
    }
    vehicle_type_numeric = vehicle_type_mapping.get(vehicle_type)
    return vehicle_type_numeric
#Predictions
def predict_price(number_of_riders, number_of_drivers, vehicle_type, expected_ride_duration):
    vehicle_type_numeric = get_vehicle_type_numeric(vehicle_type)
    if vehicle_type is None:
        raise ValueError("Invalid vehicle type")

    input_data = np.array([[number_of_riders, number_of_drivers, vehicle_type, expected_ride_duration]])
    predicted_price = dtree.predict(input_data)
    return predicted_price

#Comparison between actual and predicted results

y_pred=df.predict(x_test)
fig=go.Figure()
fig.add_trace(go.Scatter(s=y_test.flatten(),y=y_pred,mode='markers',name='Actual'))
fig.show()






