In [33]:
import pandas as pd

In [34]:
trips_df = pd.DataFrame({
    'id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'client_id': [1, 2, 3, 4, 1, 2, 3, 2, 3, 4],
    'driver_id': [10, 11, 12, 13, 10, 11, 12, 12, 10, 13],
    'city_id': [1, 1, 6, 6, 1, 6, 6, 12, 12, 12],
    'status': ['completed', 'cancelled_by_driver', 'completed', 'cancelled_by_client', 'completed', 'completed', 'completed', 'completed', 'completed', 'cancelled_by_driver'],
    'request_at': ['2013-10-01', '2013-10-01', '2013-10-01', '2013-10-01', '2013-10-02', '2013-10-02', '2013-10-02', '2013-10-03', '2013-10-03', '2013-10-03']
})
trips_df

Unnamed: 0,id,client_id,driver_id,city_id,status,request_at
0,1,1,10,1,completed,2013-10-01
1,2,2,11,1,cancelled_by_driver,2013-10-01
2,3,3,12,6,completed,2013-10-01
3,4,4,13,6,cancelled_by_client,2013-10-01
4,5,1,10,1,completed,2013-10-02
5,6,2,11,6,completed,2013-10-02
6,7,3,12,6,completed,2013-10-02
7,8,2,12,12,completed,2013-10-03
8,9,3,10,12,completed,2013-10-03
9,10,4,13,12,cancelled_by_driver,2013-10-03


In [35]:
trips_df['request_at'].unique()

array(['2013-10-01', '2013-10-02', '2013-10-03'], dtype=object)

In [50]:
users_df = pd.DataFrame({
    'users_id': [1, 2, 3, 4, 10, 11, 12, 13],
    'banned': [False, True, False, False, False, False, False, False],
    'role': ['client', 'client', 'client', 'client', 'driver', 'driver', 'driver', 'driver']
})
users_df['client_id'] = users_df['users_id']

How to calculate the cancellation rate of requests with unbanned users (client and driver both not banned) each day between October 1, 2013, and October 3, 2013? Ensure the solution rounds the cancellation rate to two decimal points.

In [37]:
output = pd.DataFrame({
    "Day": ["2013-10-01", "2013-10-02", "2013-10-03"],
    "Cancellation Rate": [0.33, 0.00, 0.50]
})
output

Unnamed: 0,Day,Cancellation Rate
0,2013-10-01,0.33
1,2013-10-02,0.0
2,2013-10-03,0.5


Explanation:

On 2013-10-01: Out of 4 total requests, 2 were canceled. Discounting a request from a banned client, there were 3 unbanned requests, with a cancellation rate of 0.33.

On 2013-10-02: Among 3 total requests, none were canceled. Ignoring a request from a banned client, there were 2 unbanned requests, resulting in a cancellation rate of 0.00.

On 2013-10-03: Out of 3 total requests, 1 was canceled. Disregarding a request from a banned client, there were 2 unbanned requests, leading to a cancellation rate of 0.50.

In [51]:
merged_df = pd.merge(trips_df, users_df, on='client_id')
# Show the resulting
merged_df

Unnamed: 0,id,client_id,driver_id,city_id,status,request_at,users_id,banned,role
0,1,1,10,1,completed,2013-10-01,1,False,client
1,5,1,10,1,completed,2013-10-02,1,False,client
2,2,2,11,1,cancelled_by_driver,2013-10-01,2,True,client
3,6,2,11,6,completed,2013-10-02,2,True,client
4,8,2,12,12,completed,2013-10-03,2,True,client
5,3,3,12,6,completed,2013-10-01,3,False,client
6,7,3,12,6,completed,2013-10-02,3,False,client
7,9,3,10,12,completed,2013-10-03,3,False,client
8,4,4,13,6,cancelled_by_client,2013-10-01,4,False,client
9,10,4,13,12,cancelled_by_driver,2013-10-03,4,False,client


In [52]:
merged_df['request_at'].value_counts()

2013-10-01    4
2013-10-02    3
2013-10-03    3
Name: request_at, dtype: int64

In [53]:
merged_df[merged_df['request_at'] == '2013-10-01']
# On 2013-10-01: Out of 4 total requests, 2 were canceled.
#Discounting a request from a banned client, there were 3 unbanned requests
#, with a cancellation rate of 0.33.

Unnamed: 0,id,client_id,driver_id,city_id,status,request_at,users_id,banned,role
0,1,1,10,1,completed,2013-10-01,1,False,client
2,2,2,11,1,cancelled_by_driver,2013-10-01,2,True,client
5,3,3,12,6,completed,2013-10-01,3,False,client
8,4,4,13,6,cancelled_by_client,2013-10-01,4,False,client


In [54]:
merged_df[merged_df['request_at'] == '2013-10-02']
# On 2013-10-02: Among 3 total requests, none were canceled. Ignoring a request from a banned client,
# there were 2 unbanned requests, resulting in a cancellation rate of 0.00.

Unnamed: 0,id,client_id,driver_id,city_id,status,request_at,users_id,banned,role
1,5,1,10,1,completed,2013-10-02,1,False,client
3,6,2,11,6,completed,2013-10-02,2,True,client
6,7,3,12,6,completed,2013-10-02,3,False,client


In [55]:
merged_df[merged_df['request_at'] == '2013-10-03']
# On 2013-10-03: Out of 3 total requests, 1 was canceled. Disregarding a request from a banned client,
# there were 2 unbanned requests, leading to a cancellation rate of 0.50.

Unnamed: 0,id,client_id,driver_id,city_id,status,request_at,users_id,banned,role
4,8,2,12,12,completed,2013-10-03,2,True,client
7,9,3,10,12,completed,2013-10-03,3,False,client
9,10,4,13,12,cancelled_by_driver,2013-10-03,4,False,client


In [60]:
contains_cancelled = merged_df['status'].str.contains('cancelled')
merged_df['cancellation'] = contains_cancelled
merged_df

Unnamed: 0,id,client_id,driver_id,city_id,status,request_at,users_id,banned,role,cancellation
0,1,1,10,1,completed,2013-10-01,1,False,client,False
1,5,1,10,1,completed,2013-10-02,1,False,client,False
2,2,2,11,1,cancelled_by_driver,2013-10-01,2,True,client,True
3,6,2,11,6,completed,2013-10-02,2,True,client,False
4,8,2,12,12,completed,2013-10-03,2,True,client,False
5,3,3,12,6,completed,2013-10-01,3,False,client,False
6,7,3,12,6,completed,2013-10-02,3,False,client,False
7,9,3,10,12,completed,2013-10-03,3,False,client,False
8,4,4,13,6,cancelled_by_client,2013-10-01,4,False,client,True
9,10,4,13,12,cancelled_by_driver,2013-10-03,4,False,client,True


In [61]:
not_banned_df = merged_df[merged_df['banned'] == False]

In [62]:
not_banned_df

Unnamed: 0,id,client_id,driver_id,city_id,status,request_at,users_id,banned,role,cancellation
0,1,1,10,1,completed,2013-10-01,1,False,client,False
1,5,1,10,1,completed,2013-10-02,1,False,client,False
5,3,3,12,6,completed,2013-10-01,3,False,client,False
6,7,3,12,6,completed,2013-10-02,3,False,client,False
7,9,3,10,12,completed,2013-10-03,3,False,client,False
8,4,4,13,6,cancelled_by_client,2013-10-01,4,False,client,True
9,10,4,13,12,cancelled_by_driver,2013-10-03,4,False,client,True


In [64]:
not_banned_df[not_banned_df['request_at'] == '2013-10-03']
# On 2013-10-03: Out of 3 total requests, 1 was canceled. Disregarding a request from a banned client,
# there were 2 unbanned requests, leading to a cancellation rate of 0.50.

Unnamed: 0,id,client_id,driver_id,city_id,status,request_at,users_id,banned,role,cancellation
7,9,3,10,12,completed,2013-10-03,3,False,client,False
9,10,4,13,12,cancelled_by_driver,2013-10-03,4,False,client,True


In [65]:
not_banned_df[not_banned_df['request_at'] == '2013-10-02']


Unnamed: 0,id,client_id,driver_id,city_id,status,request_at,users_id,banned,role,cancellation
1,5,1,10,1,completed,2013-10-02,1,False,client,False
6,7,3,12,6,completed,2013-10-02,3,False,client,False


In [98]:
target = not_banned_df.groupby('request_at')['cancellation'].mean()

In [99]:
target

request_at
2013-10-01    0.333333
2013-10-02    0.000000
2013-10-03    0.500000
Name: cancellation, dtype: float64