In [6]:
import pandas as pd
import statistics
import numpy as np

# Declaration
data = pd.read_csv("ncr_ride_bookings.csv")

target_col = ['Driver Ratings','Customer Rating','Ride Distance','Avg CTAT','Avg VTAT','Booking Value']

for dt in target_col:
    # Convert to int
    data[dt] = pd.to_numeric(data[dt], errors='coerce')
    
    # Remove NaN
    data = data.dropna(subset=[dt])

    max_val = data[dt].max()
    min_val = data[dt].min()
    range_val = max_val - min_val
    mean = round(data[dt].mean(),2)
    mode = round(statistics.mode(data[dt]),2)
    var = round(np.nanvar(data[dt],ddof=1),2)
    std = round(var ** 0.5,2)

    print(f"Max of {dt.title()} : {max_val}")
    print(f"Min of {dt.title()} : {min_val}")
    print(f"Range of {dt.title()} : {range_val}")
    print(f"Mean of {dt.title()} : {mean}")
    print(f"Mode of {dt.title()} : {mode}")
    print(f"Variance of {dt.title()} : {var}")
    print(f"Standard Deviance of {dt.title()} : {std}\n")

Max of Driver Ratings : 5.0
Min of Driver Ratings : 3.0
Range of Driver Ratings : 2.0
Mean of Driver Ratings : 4.23
Mode of Driver Ratings : 4.3
Variance of Driver Ratings : 0.19
Standard Deviance of Driver Ratings : 0.44

Max of Customer Rating : 5.0
Min of Customer Rating : 3.0
Range of Customer Rating : 2.0
Mean of Customer Rating : 4.4
Mode of Customer Rating : 4.9
Variance of Customer Rating : 0.19
Standard Deviance of Customer Rating : 0.44

Max of Ride Distance : 50.0
Min of Ride Distance : 2.0
Range of Ride Distance : 48.0
Mean of Ride Distance : 26.0
Mode of Ride Distance : 9.61
Variance of Ride Distance : 191.11
Standard Deviance of Ride Distance : 13.82

Max of Avg Ctat : 45.0
Min of Avg Ctat : 15.0
Range of Avg Ctat : 30.0
Mean of Avg Ctat : 30.03
Mode of Avg Ctat : 42.8
Variance of Avg Ctat : 74.83
Standard Deviance of Avg Ctat : 8.65

Max of Avg Vtat : 15.0
Min of Avg Vtat : 2.0
Range of Avg Vtat : 13.0
Mean of Avg Vtat : 8.51
Mode of Avg Vtat : 5.1
Variance of Avg Vtat :

In [13]:
import pandas as pd
import statistics
import numpy as np

# Declaration
# Descriptive Statistic for Average Driver Rating, Customer Rating, Ride Distance, Avg CTAT, Avg VTAT, and Booking Value
data = pd.read_csv("ncr_ride_bookings.csv")

# Unique values for Vehicle Type
types_groups = data['Vehicle Type'].unique().tolist()

avg_columns = ['Driver Ratings','Customer Rating','Ride Distance','Avg CTAT','Avg VTAT','Booking Value']

results = []
for types in types_groups:
    types_data = data[data['Vehicle Type'] == types]
    total_booking = types_data['Booking ID'].count()
    
    result_row = {
        'Vehicle Type': types,
        'Total Booking': total_booking,
    }

    for col in avg_columns:
        result_row[f'Average {col}'] = round(types_data[col].mean(), 2)

    results.append(result_row)

# Sort
res_df = pd.DataFrame(results)
res_df = res_df.sort_values(by='Total Booking', ascending=False)
res_df

Unnamed: 0,Vehicle Type,Total Booking,Average Driver Ratings,Average Customer Rating,Average Ride Distance,Average Avg CTAT,Average Avg VTAT,Average Booking Value
2,Auto,37419,4.23,4.4,24.62,29.14,8.45,506.73
5,Go Mini,29806,4.23,4.4,24.61,29.16,8.47,507.68
1,Go Sedan,27141,4.23,4.41,24.61,29.04,8.4,511.5
4,Bike,22517,4.23,4.4,24.65,29.2,8.5,510.2
3,Premier Sedan,18111,4.23,4.4,24.6,29.22,8.44,509.57
0,eBike,10557,4.23,4.4,24.99,29.18,8.48,503.9
6,Uber XL,4449,4.24,4.4,24.4,29.21,8.58,501.82
