**Step 1 -> Loading dataset**

In [17]:
import pandas as pd
import plotly.express as plx
import plotly.graph_objects as pgo
import numpy as np

In [18]:
data = pd.read_csv(r"./dataset.csv")

In [19]:
data.head(5)

Unnamed: 0,Number_of_Riders,Number_of_Drivers,Location_Category,Customer_Loyalty_Status,Number_of_Past_Rides,Average_Ratings,Time_of_Booking,Vehicle_Type,Expected_Ride_Duration,Historical_Cost_of_Ride
0,90,45,Urban,Silver,13,4.47,Night,Premium,90,284.257273
1,58,39,Suburban,Silver,72,4.06,Evening,Economy,43,173.874753
2,42,31,Rural,Silver,0,3.99,Afternoon,Premium,76,329.795469
3,89,28,Rural,Regular,67,4.31,Afternoon,Premium,134,470.201232
4,78,22,Rural,Regular,74,3.77,Afternoon,Economy,149,579.681422


**Step 2 -> Exploration of Data**

In [20]:
data.dtypes

Number_of_Riders             int64
Number_of_Drivers            int64
Location_Category           object
Customer_Loyalty_Status     object
Number_of_Past_Rides         int64
Average_Ratings            float64
Time_of_Booking             object
Vehicle_Type                object
Expected_Ride_Duration       int64
Historical_Cost_of_Ride    float64
dtype: object

**Step 3 -> Data Cleaning**

*Searching for Null or Missing Values*

In [21]:
data.isnull().sum()

Number_of_Riders           0
Number_of_Drivers          0
Location_Category          0
Customer_Loyalty_Status    0
Number_of_Past_Rides       0
Average_Ratings            0
Time_of_Booking            0
Vehicle_Type               0
Expected_Ride_Duration     0
Historical_Cost_of_Ride    0
dtype: int64

*Searching for Duplicates and Removing Duplicates if any.*

In [22]:
data.drop_duplicates()

Unnamed: 0,Number_of_Riders,Number_of_Drivers,Location_Category,Customer_Loyalty_Status,Number_of_Past_Rides,Average_Ratings,Time_of_Booking,Vehicle_Type,Expected_Ride_Duration,Historical_Cost_of_Ride
0,90,45,Urban,Silver,13,4.47,Night,Premium,90,284.257273
1,58,39,Suburban,Silver,72,4.06,Evening,Economy,43,173.874753
2,42,31,Rural,Silver,0,3.99,Afternoon,Premium,76,329.795469
3,89,28,Rural,Regular,67,4.31,Afternoon,Premium,134,470.201232
4,78,22,Rural,Regular,74,3.77,Afternoon,Economy,149,579.681422
...,...,...,...,...,...,...,...,...,...,...
995,33,23,Urban,Gold,24,4.21,Morning,Premium,11,91.389526
996,84,29,Urban,Regular,92,4.55,Morning,Premium,94,424.155987
997,44,6,Suburban,Gold,80,4.13,Night,Premium,40,157.364830
998,53,27,Suburban,Regular,78,3.63,Night,Premium,58,279.095048


In [23]:
data.describe()

Unnamed: 0,Number_of_Riders,Number_of_Drivers,Number_of_Past_Rides,Average_Ratings,Expected_Ride_Duration,Historical_Cost_of_Ride
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,60.372,27.076,50.031,4.25722,99.588,372.502623
std,23.701506,19.068346,29.313774,0.435781,49.16545,187.158756
min,20.0,5.0,0.0,3.5,10.0,25.993449
25%,40.0,11.0,25.0,3.87,59.75,221.365202
50%,60.0,22.0,51.0,4.27,102.0,362.019426
75%,81.0,38.0,75.0,4.6325,143.0,510.497504
max,100.0,89.0,100.0,5.0,180.0,836.116419


**Step 4 -> Visualization**

*Relationships Between Historical_Cost_of_Ride and other variables*

Relationship Between Historical Cost of Ride and Number of Riders

In [24]:
figure = plx.scatter( data, x="Historical_Cost_of_Ride",
                     y= "Number_of_Riders",
                     title = "Relationship Between Historical Cost of Ride and Number of Riders",
                     trendline = 'ols')
figure.show()

Relationship Between Historical Cost of Ride and Number ofDrivers

In [25]:
figure = plx.scatter( data, x="Historical_Cost_of_Ride",
                     y= "Number_of_Drivers",
                     title = "Relationship Between Historical Cost of Ride and Number ofDrivers",
                     trendline = 'ols')
figure.show()

Relationship Between Historical Cost of Ride and Number of Past Rides

In [26]:
figure = plx.scatter( data, x="Historical_Cost_of_Ride",
                     y= "Number_of_Past_Rides",
                     title = "Relationship Between Historical Cost of Ride and Number of Past Rides",
                     trendline = 'ols')
figure.show()

Relationship Between Historical Cost of Ride and Average Ratings

In [27]:
figure = plx.scatter( data, x="Historical_Cost_of_Ride",
                     y= "Average_Ratings",
                     title = "Relationship Between Historical Cost of Ride and Average Ratings",
                     trendline = 'ols')
figure.show()

Relationship Between Historical Cost of Ride and Expected_Ride_Duration

In [28]:
figure = plx.scatter( data, x="Historical_Cost_of_Ride",
                     y= "Expected_Ride_Duration",
                     title = "Relationship Between Historical Cost of Ride and Expected_Ride_Duration",
                     trendline = 'ols')
figure.show()

Relationship Between Historical Cost of Ride and Location_Category

In [29]:
figure = plx.box( data, x="Location_Category",
                     y= "Historical_Cost_of_Ride",
                     title = "Relationship Between Historical Cost of Ride and Location_Category",
                    )
figure.show()

Relationship Between Historical Cost of Ride and Customer_Loyalty_Status

In [30]:
figure = plx.box( data, x="Customer_Loyalty_Status",
                     y= "Historical_Cost_of_Ride",
                     title = "Relationship Between Historical Cost of Ride and Expected_Ride_Duration",
                    )
figure.show()

Relationship Between Historical Cost of Ride and Time_of_Booking

In [31]:
figure = plx.box( data, x="Time_of_Booking",
                     y= "Historical_Cost_of_Ride",
                     title = "Relationship Between Historical Cost of Ride and Time_of_Booking",
                    )
figure.show()

Relationship Between Historical Cost of Ride and Vehicle_Type

In [32]:
figure = plx.box( data, x="Vehicle_Type",
                     y= "Historical_Cost_of_Ride",
                     title = "Relationship Between Historical Cost of Ride and Vehicle_Type",
                    )
figure.show()