# Ride Hailing Service: Model Building
### Benjamen Simon

The goal of this document is to explore the processes for the Ride Hailing service, using the probabilities of acceptance for each Pay estimated in the Data Modelling document, to explore different strategies for maximising profit.

## Import Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from numpy import random

## Load the data

In [2]:
pay_prob_df = pd.read_csv('Data/probs.csv', names = ['PAY', 'PROB'], header = 0)

pay_prob_df

Unnamed: 0,PAY,PROB
0,0.00,0.001807
1,0.01,0.001812
2,0.02,0.001816
3,0.03,0.001821
4,0.04,0.001826
...,...,...
5996,59.96,0.999864
5997,59.97,0.999864
5998,59.98,0.999865
5999,59.99,0.999865


## The process for 1 month

In [29]:
num_riders = 12000

In [30]:
users = {'USER_ID': np.arange(1, num_riders+1),
          'ACTIVE': np.repeat(0, num_riders),
          'REQ_M1': np.repeat(0, num_riders),
          'REQ_M2': np.repeat(0, num_riders),
          'REQ_M3': np.repeat(0, num_riders),
          'REQ_M4': np.repeat(0, num_riders),
          'REQ_M5': np.repeat(0, num_riders),
          'REQ_M6': np.repeat(0, num_riders),
          'REQ_M7': np.repeat(0, num_riders),
          'REQ_M8': np.repeat(0, num_riders),
          'REQ_M9': np.repeat(0, num_riders),
          'REQ_M10': np.repeat(0, num_riders),
          'REQ_M11': np.repeat(0, num_riders),
          'REQ_M12': np.repeat(0, num_riders),
          'RATE_M1': np.repeat(0, num_riders),
          'RATE_M2': np.repeat(0, num_riders),
          'RATE_M3': np.repeat(0, num_riders),
          'RATE_M4': np.repeat(0, num_riders),
          'RATE_M5': np.repeat(0, num_riders),
          'RATE_M6': np.repeat(0, num_riders),
          'RATE_M7': np.repeat(0, num_riders),
          'RATE_M8': np.repeat(0, num_riders),
          'RATE_M9': np.repeat(0, num_riders),
          'RATE_M10': np.repeat(0, num_riders),
          'RATE_M11': np.repeat(0, num_riders),
          'RATE_M12': np.repeat(0, num_riders),
          'RATE_M13': np.repeat(0, num_riders)
          }

users_df = pd.DataFrame(users)

users_df

Unnamed: 0,USER_ID,ACTIVE,REQ_M1,REQ_M2,REQ_M3,REQ_M4,REQ_M5,REQ_M6,REQ_M7,REQ_M8,...,RATE_M4,RATE_M5,RATE_M6,RATE_M7,RATE_M8,RATE_M9,RATE_M10,RATE_M11,RATE_M12,RATE_M13
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11995,11996,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11996,11997,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11997,11998,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11998,11999,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [126]:
rides = {'USER_ID': [],
          'MONTH': [],
          'RIDE_ID': [],
          'ACCEPTED': [],
          'PAY': [],
          'PROFIT': [],
          }

rides_df = pd.DataFrame(rides)

rides_df

Unnamed: 0,USER_ID,MONTH,RIDE_ID,ACCEPTED,PAY,PROFIT


## Month 1

### Generate new active users

In [32]:
# Flag new users as active
users_df.ACTIVE[0:1000] = 1

# This is the rate at which the new users request rides
users_df.RATE_M1[0:1000] = 1

### Find active users

In [33]:
# These users are active and will generate requests
active_users = users_df.index[users_df.ACTIVE == 1]

# This is the number of active users
num_active_users = len(active_users)


### Generate requests

In [34]:
# This is the rate at which active users request new rides
rate = users_df.RATE_M1[active_users]

# These are the number of requests per user this month
users_df.REQ_M1[active_users] = random.poisson(lam=rate, size = num_active_users)

# These are the active users with 0 requests
non_returning_users = active_users[users_df.REQ_M1[active_users] == 0]

# Set these users to never active again
users_df.ACTIVE[non_returning_users] = -1

# These are the users with requests greater than 0
users_requesting = users_df.index[users_df.REQ_M1 > 0]

# These are the requests that we need to generate acceptances for
requests_oi = users_df.REQ_M1[users_requesting]

### Generate acceptances

In [None]:
# Generate the acceptances
users_df.RATE_M2[users_requesting] = random.binomial(size = len(requests_oi), n = requests_oi, p = 0.7)

# These are the users that had none of their ride requests accepted this month
non_returning_users_2 = users_requesting[users_df.RATE_M2[users_requesting] == 0]

# Set these users to never active again
users_df.ACTIVE[non_returning_users_2] = -1


users_df[['USER_ID', 'ACTIVE', 'REQ_M1', 'REQ_M2', 'RATE_M1', 'RATE_M2']]


Index(['USER_ID', 'MONTH', 'RIDE_ID', 'ACCEPTED', 'PAY', 'PROFIT'], dtype='object')

### Generate acceptances (individually)

In [156]:
# Extract the number of rides of each user
ids_and_requests = users_df.loc[users_requesting, ['USER_ID', "REQ_M1"]]

# Calculate the total number of rides requested
num_rides = sum(ids_and_requests.REQ_M1)

# Set up an array to store the results
rides_M1 = np.zeros((num_rides, len(rides_df.columns)))

# A count of the number of rides processed
ride_row = 0

# Loop through each ride and generate if it accepted or not
# and how the Pay, and store the results in the temp array
for row in range(0, len(ids_and_requests)):
    user_id_j = ids_and_requests.USER_ID.iloc[row]
    requests_j = ids_and_requests.REQ_M1.iloc[row]
    
    for ride in range(1, requests_j+1):
        acc_j_r = random.random() < 0.75
        rides_M1[ride_row, :] = [user_id_j, 1, ride, acc_j_r, 25, 30-25]
        ride_row = ride_row + 1
    
# Convert the temp array to a data frame        
rides_M1_df = pd.DataFrame(rides_M1, columns=rides_df.columns)        

# Append the temp array to the rides results df
rides_df = pd.concat([rides_df, rides_M1_df], ignore_index=True)

rides_df

Unnamed: 0,USER_ID,MONTH,RIDE_ID,ACCEPTED,PAY,PROFIT
0,1.0,1.0,1.0,1.0,25.0,5.0
1,1.0,1.0,2.0,0.0,25.0,5.0
2,2.0,1.0,1.0,1.0,25.0,5.0
3,3.0,1.0,1.0,1.0,25.0,5.0
4,4.0,1.0,1.0,0.0,25.0,5.0
...,...,...,...,...,...,...
5010,995.0,1.0,1.0,0.0,25.0,5.0
5011,996.0,1.0,1.0,1.0,25.0,5.0
5012,997.0,1.0,1.0,1.0,25.0,5.0
5013,998.0,1.0,1.0,1.0,25.0,5.0


## Wrapping it in a function

In [167]:
# Initialise the results dataframes

def create_users_df(num_riders):
    
    users = {'USER_ID': np.arange(1, num_riders+1),
          'ACTIVE': np.repeat(0, num_riders),
          'REQ_M1': np.repeat(0, num_riders),
          'REQ_M2': np.repeat(0, num_riders),
          'REQ_M3': np.repeat(0, num_riders),
          'REQ_M4': np.repeat(0, num_riders),
          'REQ_M5': np.repeat(0, num_riders),
          'REQ_M6': np.repeat(0, num_riders),
          'REQ_M7': np.repeat(0, num_riders),
          'REQ_M8': np.repeat(0, num_riders),
          'REQ_M9': np.repeat(0, num_riders),
          'REQ_M10': np.repeat(0, num_riders),
          'REQ_M11': np.repeat(0, num_riders),
          'REQ_M12': np.repeat(0, num_riders),
          'RATE_M1': np.repeat(0, num_riders),
          'RATE_M2': np.repeat(0, num_riders),
          'RATE_M3': np.repeat(0, num_riders),
          'RATE_M4': np.repeat(0, num_riders),
          'RATE_M5': np.repeat(0, num_riders),
          'RATE_M6': np.repeat(0, num_riders),
          'RATE_M7': np.repeat(0, num_riders),
          'RATE_M8': np.repeat(0, num_riders),
          'RATE_M9': np.repeat(0, num_riders),
          'RATE_M10': np.repeat(0, num_riders),
          'RATE_M11': np.repeat(0, num_riders),
          'RATE_M12': np.repeat(0, num_riders),
          'RATE_M13': np.repeat(0, num_riders)
          }

    users_df = pd.DataFrame(users) 
    
    return(users_df)


def create_rides_df():
    
    rides = {'USER_ID': [],
          'MONTH': [],
          'RIDE_ID': [],
          'ACCEPTED': [],
          'PAY': [],
          'PROFIT': []
          }

    rides_df = pd.DataFrame(rides)
    
    return(rides_df)

In [215]:
def generate_new_active_users(month, users_df):
    
    lower = (month - 1) * 1000
    upper = (month) * 1000
    
    # Flag new users as active
    users_df.ACTIVE[lower:upper] = 1

    # This is the rate at which the new users request rides
    users_df["RATE_M"+f"{month}"][lower:upper] = 1
    
    return(users_df)


def find_active_users(users_df):
    
    # These users are active and will generate requests
    active_users = users_df.index[users_df.ACTIVE == 1]

    # This is the number of active users
    num_active_users = len(active_users)
    
    return(active_users, num_active_users, users_df)


def generate_requests(active_users, num_active_users, month, users_df):
    
    # This is the rate at which active users request new rides
    rate = users_df["RATE_M"+f"{month}"][active_users]

    # These are the number of requests per user this month
    users_df["REQ_M"+f"{month}"][active_users] = random.poisson(lam=rate, size = num_active_users)

    # These are the active users with 0 requests
    non_returning_users = active_users[users_df["REQ_M"+f"{month}"][active_users] == 0]

    # Set these users to never active again
    users_df.ACTIVE[non_returning_users] = -1

    # These are the users with requests greater than 0
    users_requesting = users_df.index[users_df["REQ_M"+f"{month}"] > 0]

    # These are the requests that we need to generate acceptances for
    requests_oi = users_df["REQ_M"+f"{month}"][users_requesting]
    
    return(users_requesting, requests_oi, users_df)
    
    
def generate_acceptances(users_requesting, requests_oi, month, users_df):
        
    # Generate the acceptances
    users_df["RATE_M"+f"{month+1}"][users_requesting] = random.binomial(size = len(requests_oi), n = requests_oi, p = 0.9)

    # These are the users that had none of their ride requests accepted this month
    non_returning_users_2 = users_requesting[users_df["RATE_M"+f"{month+1}"][users_requesting] == 0]

    # Set these users to never active again
    users_df.ACTIVE[non_returning_users_2] = -1
    
    return(users_df)


def generate_acceptances_individually(users_requesting, pay, pay_probs, month, users_df, rides_df):
    
    # Extract the number of rides of each user
    ids_and_requests = users_df.loc[users_requesting, ['USER_ID', "REQ_M"+f"{month}"]]

    # Calculate the total number of rides requested
    num_rides = sum(ids_and_requests["REQ_M"+f"{month}"])

    # Set up an array to store the results
    rides_this_month = np.zeros((num_rides, len(rides_df.columns)))

    # A count of the number of rides processed
    ride_row = 0
    
    # Look up the acceptance probability of the Pay
    prob_acc = pay_probs.PROB[pay_probs.PAY == pay].values[0]

    # Loop through each ride and generate if it accepted or not
    # and how the Pay, and store the results in the temp array
    for row in range(0, len(ids_and_requests)):
        user_id_j = ids_and_requests.USER_ID.iloc[row]
        requests_j = ids_and_requests["REQ_M"+f"{month}"].iloc[row]
        
        total_acc_j = 0
        
        for ride in range(1, requests_j+1):
            acc_j_r = random.random() < prob_acc
            rides_this_month[ride_row, :] = [user_id_j, month, ride, acc_j_r, pay, 30-pay]
            ride_row = ride_row + 1
            total_acc_j = total_acc_j + acc_j_r
            
        users_df["RATE_M"+f"{month+1}"][users_requesting[row]] = total_acc_j
            
    # These are the users that had none of their ride requests accepted this month
    non_returning_users_2 = users_requesting[users_df["RATE_M"+f"{month+1}"][users_requesting] == 0]

    # Set these users to never active again
    users_df.ACTIVE[non_returning_users_2] = -1        
        
    # Convert the temp ride array to a data frame        
    rides_this_month_df = pd.DataFrame(rides_this_month, columns=rides_df.columns)        

    # Append the temp array to the rides results df
    rides_df = pd.concat([rides_df, rides_this_month_df], ignore_index=True)
    
    return(users_df, rides_df)


def generate_acceptances_adaptive(users_requesting, pays, probs, pay_probs, month, users_df, rides_df):
    
    # Extract the number of rides of each user
    ids_and_requests = users_df.loc[users_requesting, ['USER_ID', "REQ_M"+f"{month}"]]

    # Calculate the total number of rides requested
    num_rides = sum(ids_and_requests["REQ_M"+f"{month}"])

    # Set up an array to store the results
    rides_this_month = np.zeros((num_rides, len(rides_df.columns)))

    # A count of the number of rides processed
    ride_row = 0
    

    # Loop through each ride and generate if it accepted or not
    # and how the Pay, and store the results in the temp array
    for row in range(0, len(ids_and_requests)):
        user_id_j = ids_and_requests.USER_ID.iloc[row]
        requests_j = ids_and_requests["REQ_M"+f"{month}"].iloc[row]
        
        total_acc_j = 0

        for ride in range(1, requests_j+1):
            if total_acc_j > 0:
                pay = pays[1]
                prob_acc = probs[1]
            else:
                pay = pays[0]
                prob_acc = probs[0]
                
            acc_j_r = random.random() < prob_acc
            rides_this_month[ride_row, :] = [user_id_j, month, ride, acc_j_r, pay, 30-pay]
            ride_row = ride_row + 1
            total_acc_j = total_acc_j + acc_j_r
            
        users_df["RATE_M"+f"{month+1}"][users_requesting[row]] = total_acc_j
            
    # These are the users that had none of their ride requests accepted this month
    non_returning_users_2 = users_requesting[users_df["RATE_M"+f"{month+1}"][users_requesting] == 0]

    # Set these users to never active again
    users_df.ACTIVE[non_returning_users_2] = -1        
        
    # Convert the temp ride array to a data frame        
    rides_this_month_df = pd.DataFrame(rides_this_month, columns=rides_df.columns)        

    # Append the temp array to the rides results df
    rides_df = pd.concat([rides_df, rides_this_month_df], ignore_index=True)
    
    return(users_df, rides_df)



def generate_acceptances_half(users_requesting, pays, probs, pay_probs, month_change, month, users_df, rides_df):
    
    # Extract the number of rides of each user
    ids_and_requests = users_df.loc[users_requesting, ['USER_ID', "REQ_M"+f"{month}"]]

    # Calculate the total number of rides requested
    num_rides = sum(ids_and_requests["REQ_M"+f"{month}"])

    # Set up an array to store the results
    rides_this_month = np.zeros((num_rides, len(rides_df.columns)))

    # A count of the number of rides processed
    ride_row = 0
    

    # Loop through each ride and generate if it accepted or not
    # and how the Pay, and store the results in the temp array
    for row in range(0, len(ids_and_requests)):
        user_id_j = ids_and_requests.USER_ID.iloc[row]
        requests_j = ids_and_requests["REQ_M"+f"{month}"].iloc[row]
        
        total_acc_j = 0

        for ride in range(1, requests_j+1):
            if month > month_change:
                pay = pays[1]
                prob_acc = probs[1]
            else:
                pay = pays[0]
                prob_acc = probs[0]
                
            acc_j_r = random.random() < prob_acc
            rides_this_month[ride_row, :] = [user_id_j, month, ride, acc_j_r, pay, 30-pay]
            ride_row = ride_row + 1
            total_acc_j = total_acc_j + acc_j_r
            
        users_df["RATE_M"+f"{month+1}"][users_requesting[row]] = total_acc_j
            
    # These are the users that had none of their ride requests accepted this month
    non_returning_users_2 = users_requesting[users_df["RATE_M"+f"{month+1}"][users_requesting] == 0]

    # Set these users to never active again
    users_df.ACTIVE[non_returning_users_2] = -1        
        
    # Convert the temp ride array to a data frame        
    rides_this_month_df = pd.DataFrame(rides_this_month, columns=rides_df.columns)        

    # Append the temp array to the rides results df
    rides_df = pd.concat([rides_df, rides_this_month_df], ignore_index=True)
    
    return(users_df, rides_df)
    

## Testing

### Experiment 1: Hard coded probability (90%)

In [28]:
users_df_test = create_users_df(12000)
rides_df_test = create_rides_df()


for m in range(1, 13):
    
    users_df_test = generate_new_active_users(m, users_df_test)
    
    active_users_m, num_active_users_m, users_df_test = find_active_users(users_df_test)
    
    users_requesting_m, requests_oi_m, users_df_test = generate_requests(active_users_m, num_active_users_m, m, users_df_test)
    
    users_df_test = generate_acceptances(users_requesting_m, requests_oi_m, m, users_df_test)


users_df_test.iloc[0:10, [0,1,2,14,3,15]]


Unnamed: 0,USER_ID,ACTIVE,REQ_M1,RATE_M1,REQ_M2,RATE_M2
0,1,1,1,1,2,1
1,2,-1,0,1,0,0
2,3,-1,2,1,2,2
3,4,-1,0,1,0,0
4,5,-1,1,1,0,0
5,6,-1,2,1,2,2
6,7,-1,1,1,0,1
7,8,-1,0,1,0,0
8,9,-1,1,1,2,1
9,10,1,2,1,2,2


### Experiment 2: Fixed Pay point

In [218]:
users_df_test_2 = create_users_df(12000)
rides_df_test_2 = create_rides_df()

for m in range(1, 13):
    
    users_df_test_2 = generate_new_active_users(m, users_df_test_2)
    
    active_users_m, num_active_users_m, users_df_test_2 = find_active_users(users_df_test_2)
    
    users_requesting_m, requests_oi_m, users_df_test_2 = generate_requests(active_users_m, num_active_users_m, m, users_df_test_2)
    
    users_df_test_2, rides_df_test_2 = generate_acceptances_individually(users_requesting_m, 25, pay_prob_df, m, users_df_test_2, rides_df_test_2)


rides_df_test_2


Unnamed: 0,USER_ID,MONTH,RIDE_ID,ACCEPTED,PAY,PROFIT
0,1.0,1.0,1.0,1.0,25.0,5.0
1,3.0,1.0,1.0,0.0,25.0,5.0
2,3.0,1.0,2.0,1.0,25.0,5.0
3,5.0,1.0,1.0,1.0,25.0,5.0
4,7.0,1.0,1.0,0.0,25.0,5.0
...,...,...,...,...,...,...
21595,11996.0,12.0,1.0,0.0,25.0,5.0
21596,11997.0,12.0,1.0,0.0,25.0,5.0
21597,11998.0,12.0,1.0,1.0,25.0,5.0
21598,11998.0,12.0,2.0,0.0,25.0,5.0


### Experiment 3: Adaptive to accepted status

In [217]:
users_df_test_3 = create_users_df(12000)
rides_df_test_3 = create_rides_df()

pay_first, prob_acc_first = pay_prob_df[pay_prob_df.PROB > 0.9].iloc[0]
pay_extra, prob_acc_extra = pay_prob_df[pay_prob_df.PAY > 20].iloc[0]

pays = [pay_first, pay_extra]
probs = [prob_acc_first, prob_acc_extra]

for m in range(1, 13):
    
    users_df_test_3 = generate_new_active_users(m, users_df_test_3)
    
    active_users_m, num_active_users_m, users_df_test_3 = find_active_users(users_df_test_3)
    
    users_requesting_m, requests_oi_m, users_df_test_3 = generate_requests(active_users_m, num_active_users_m, m, users_df_test_3)
    
    users_df_test_3, rides_df_test_3 = generate_acceptances_adaptive(users_requesting_m, pays, probs, pay_prob_df, m, users_df_test_3, rides_df_test_3)


rides_df_test_3


Unnamed: 0,USER_ID,MONTH,RIDE_ID,ACCEPTED,PAY,PROFIT
0,2.0,1.0,1.0,1.0,33.54,-3.54
1,3.0,1.0,1.0,1.0,33.54,-3.54
2,3.0,1.0,2.0,0.0,20.01,9.99
3,3.0,1.0,3.0,0.0,20.01,9.99
4,4.0,1.0,1.0,1.0,33.54,-3.54
...,...,...,...,...,...,...
29210,11994.0,12.0,1.0,0.0,33.54,-3.54
29211,11996.0,12.0,1.0,1.0,33.54,-3.54
29212,11996.0,12.0,2.0,0.0,20.01,9.99
29213,12000.0,12.0,1.0,1.0,33.54,-3.54


### Experiment 4: Lock in

In [221]:
users_df_test_4 = create_users_df(12000)
rides_df_test_4 = create_rides_df()

pay_first_half, prob_acc_first_half = pay_prob_df[pay_prob_df.PROB > 0.95].iloc[0]
pay_last_half, prob_acc_last_half = pay_prob_df[pay_prob_df.PAY > 5].iloc[0]

pays = [pay_first_half, pay_last_half]
probs = [prob_acc_first_half, prob_acc_last_half]

month_change = 9

for m in range(1, 13):
    
    users_df_test_4 = generate_new_active_users(m, users_df_test_4)
    
    active_users_m, num_active_users_m, users_df_test_4 = find_active_users(users_df_test_4)
    
    users_requesting_m, requests_oi_m, users_df_test_4 = generate_requests(active_users_m, num_active_users_m, m, users_df_test_4)
    
    users_df_test_4, rides_df_test_4 = generate_acceptances_half(users_requesting_m, pays, probs, pay_prob_df, month_change, m, users_df_test_4, rides_df_test_4)


rides_df_test_4


Unnamed: 0,USER_ID,MONTH,RIDE_ID,ACCEPTED,PAY,PROFIT
0,1.0,1.0,1.0,1.0,36.49,-6.49
1,3.0,1.0,1.0,1.0,36.49,-6.49
2,3.0,1.0,2.0,1.0,36.49,-6.49
3,5.0,1.0,1.0,1.0,36.49,-6.49
4,6.0,1.0,1.0,1.0,36.49,-6.49
...,...,...,...,...,...,...
50850,11996.0,12.0,2.0,0.0,5.01,24.99
50851,11997.0,12.0,1.0,0.0,5.01,24.99
50852,11998.0,12.0,1.0,0.0,5.01,24.99
50853,11999.0,12.0,1.0,0.0,5.01,24.99
