In [1]:
import pandas as pd

In [2]:
trips_df = pd.DataFrame({
    'id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'client_id': [1, 2, 3, 4, 1, 2, 3, 2, 3, 4],
    'driver_id': [10, 11, 12, 13, 10, 11, 12, 12, 10, 13],
    'city_id': [1, 1, 6, 6, 1, 6, 6, 12, 12, 12],
    'status': ['completed', 'cancelled_by_driver', 'completed', 'cancelled_by_client', 'completed', 'completed', 'completed', 'completed', 'completed', 'cancelled_by_driver'],
    'request_at': ['2013-10-01', '2013-10-01', '2013-10-01', '2013-10-01', '2013-10-02', '2013-10-02', '2013-10-02', '2013-10-03', '2013-10-03', '2013-10-03']
})
users_df = pd.DataFrame({
    'users_id': [1, 2, 3, 4, 10, 11, 12, 13],
    'banned': [False, True, False, False, False, False, False, False],
    'role': ['client', 'client', 'client', 'client', 'driver', 'driver', 'driver', 'driver']
})
trips_df

Unnamed: 0,id,client_id,driver_id,city_id,status,request_at
0,1,1,10,1,completed,2013-10-01
1,2,2,11,1,cancelled_by_driver,2013-10-01
2,3,3,12,6,completed,2013-10-01
3,4,4,13,6,cancelled_by_client,2013-10-01
4,5,1,10,1,completed,2013-10-02
5,6,2,11,6,completed,2013-10-02
6,7,3,12,6,completed,2013-10-02
7,8,2,12,12,completed,2013-10-03
8,9,3,10,12,completed,2013-10-03
9,10,4,13,12,cancelled_by_driver,2013-10-03


In [3]:
users_df

Unnamed: 0,users_id,banned,role
0,1,False,client
1,2,True,client
2,3,False,client
3,4,False,client
4,10,False,driver
5,11,False,driver
6,12,False,driver
7,13,False,driver


How to calculate the cancellation rate of requests with unbanned users (client and driver both not banned) each day between October 1, 2013, and October 3, 2013? Ensure the solution rounds the cancellation rate to two decimal points.

In [4]:
trips_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   id          10 non-null     int64 
 1   client_id   10 non-null     int64 
 2   driver_id   10 non-null     int64 
 3   city_id     10 non-null     int64 
 4   status      10 non-null     object
 5   request_at  10 non-null     object
dtypes: int64(4), object(2)
memory usage: 608.0+ bytes


In [5]:
trips_df['request_at'] = pd.to_datetime(trips_df['request_at'])

In [6]:
if_banned = pd.merge(trips_df,users_df,left_on='client_id',right_on='users_id')

In [7]:
if_banned

Unnamed: 0,id,client_id,driver_id,city_id,status,request_at,users_id,banned,role
0,1,1,10,1,completed,2013-10-01,1,False,client
1,5,1,10,1,completed,2013-10-02,1,False,client
2,2,2,11,1,cancelled_by_driver,2013-10-01,2,True,client
3,6,2,11,6,completed,2013-10-02,2,True,client
4,8,2,12,12,completed,2013-10-03,2,True,client
5,3,3,12,6,completed,2013-10-01,3,False,client
6,7,3,12,6,completed,2013-10-02,3,False,client
7,9,3,10,12,completed,2013-10-03,3,False,client
8,4,4,13,6,cancelled_by_client,2013-10-01,4,False,client
9,10,4,13,12,cancelled_by_driver,2013-10-03,4,False,client


In [8]:
if_banned = pd.merge(if_banned,users_df,suffixes=('_client', '_driver'),left_on='driver_id',right_on='users_id')

In [9]:
if_banned

Unnamed: 0,id,client_id,driver_id,city_id,status,request_at,users_id_client,banned_client,role_client,users_id_driver,banned_driver,role_driver
0,1,1,10,1,completed,2013-10-01,1,False,client,10,False,driver
1,5,1,10,1,completed,2013-10-02,1,False,client,10,False,driver
2,9,3,10,12,completed,2013-10-03,3,False,client,10,False,driver
3,2,2,11,1,cancelled_by_driver,2013-10-01,2,True,client,11,False,driver
4,6,2,11,6,completed,2013-10-02,2,True,client,11,False,driver
5,8,2,12,12,completed,2013-10-03,2,True,client,12,False,driver
6,3,3,12,6,completed,2013-10-01,3,False,client,12,False,driver
7,7,3,12,6,completed,2013-10-02,3,False,client,12,False,driver
8,4,4,13,6,cancelled_by_client,2013-10-01,4,False,client,13,False,driver
9,10,4,13,12,cancelled_by_driver,2013-10-03,4,False,client,13,False,driver


In [10]:
unbanned = if_banned[(if_banned['banned_client'] == False) & (if_banned['banned_driver'] == False)]
unbanned = unbanned.drop(columns=['client_id','driver_id','city_id','users_id_client','role_client','users_id_driver','role_driver'])
unbanned

Unnamed: 0,id,status,request_at,banned_client,banned_driver
0,1,completed,2013-10-01,False,False
1,5,completed,2013-10-02,False,False
2,9,completed,2013-10-03,False,False
6,3,completed,2013-10-01,False,False
7,7,completed,2013-10-02,False,False
8,4,cancelled_by_client,2013-10-01,False,False
9,10,cancelled_by_driver,2013-10-03,False,False


In [11]:
cancellation = unbanned[unbanned['status'].str.contains('cancelled')].groupby('request_at').size().reset_index(name='cancellation_count')
total = unbanned.groupby('request_at').size().reset_index(name='total_trips')
total

Unnamed: 0,request_at,total_trips
0,2013-10-01,3
1,2013-10-02,2
2,2013-10-03,2


In [12]:
cancellation_df = pd.merge(cancellation,total,on='request_at',how='outer')
cancellation_df

Unnamed: 0,request_at,cancellation_count,total_trips
0,2013-10-01,1.0,3
1,2013-10-03,1.0,2
2,2013-10-02,,2


In [13]:
cancellation_df['Cancellation Rate'] = (cancellation_df['cancellation_count'] / cancellation_df['total_trips']).fillna(0.0).round(2)
cancellation_df.drop(columns=['cancellation_count','total_trips'],inplace=True)
cancellation_df = cancellation_df.rename(columns={'request_at': 'Day'})

In [14]:
cancellation_df = cancellation_df.sort_values(by=['Day'], ascending=True)

In [15]:
cancellation_df

Unnamed: 0,Day,Cancellation Rate
0,2013-10-01,0.33
2,2013-10-02,0.0
1,2013-10-03,0.5
