In [1]:
# %%bash
# source $VIRTUAL_ENV_DIR/python3/bin/activate
 
# # Install latest mxpkg version (to specify version, use syntax: pip install mxpkg==1.1.7)
# pip install dataclasses
# pip install matching-ds-tools
 
# deactivate

In [2]:
import json
import datetime
import re

import logging
logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from queryrunner_client import Client
USER_EMAIL = 'thai@uber.com'
qclient = Client(user_email=USER_EMAIL)
CONSUMER_NAME = 'intelligentdispatch'

import os
import warnings
warnings.filterwarnings('ignore')
import multiprocessing
from joblib import Parallel, delayed
#num_cores = multiprocessing.cpu_count()  -- 48
n_cores = 4

In [3]:
from dataclasses import dataclass
import itertools
from typing import *
import numpy as np
import pandas as pd
from queryrunner_client import Client as QRClient
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.optimize import linear_sum_assignment

In [4]:
import mdstk
from mdstk.data_fetcher.data_fetcher import DataFetcher
from mdstk.data_fetcher.cached_data_fetcher import CachedDataFetcher

In [5]:
%pip install bayesian-optimization

Looking in indexes: http://artifactory.uber.internal:4587/artifactory/api/pypi/pypi/simple/
Note: you may need to restart the kernel to use updated packages.


In [6]:
# import numpy as np
from bayes_opt import BayesianOptimization
from scipy.optimize import linear_sum_assignment
from scipy.stats.mstats import gmean
# import matplotlib.pyplot as plt

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [7]:
# data collection

QUERY = """
with dispatch as (
select 
    datestr,
    msg.cityid,
    msg.ctplangenrequestuuid as plangen_uuid,
    msg.ctrequestuuid as scan_uuid,
    j as job_uuid,
    msg.supplyuuid,
    msg.planactiontype
from 
    rawdata_user.kafka_hp_multileg_dispatched_plan_nodedup
cross join 
    unnest(msg.jobuuid) jobs(j)
where 
    datestr = '{datestr}'
    and msg.cityid = {city_id}
    and msg.vehicleviewid = {vvid} 
    and msg.tenancy = 'uber/production'
    and CARDINALITY(msg.jobuuid) > 0
    and substr(msg.ctrequestuuid, 1, length('{digits}')) = '{digits}'
),
plangen as (
select 
    msg.scanuuid as plangen_uuid, 
    p.uuid as job_uuid,
    j.supplyuuid
from 
    rawdata_user.kafka_hp_multileg_matching_observability_proposals_v2_nodedup
cross join 
    unnest(msg.proposals) as job(j)
cross join 
    unnest(j.jobs) as plan(p)
where 
    datestr = '{datestr}'
    and msg.cityid = {city_id}
    and msg.flowtype = 'solo_batch'
    and msg.tenancy = 'uber/production'
    and j.status = 'eligible'
),
mgv as (
select datestr,
    msg.city_id,
    msg.job_uuid,
    msg.client_uuid,
    msg.ct_request_uuid as plangen_uuid,
    msg.supply_uuid,
    msg.supply_plan_uuid as plan_uuid,
    msg.unadjusted_eta as eta,
    (CASE
      WHEN msg.adjustedeta > 1500 THEN 1500.0
      WHEN msg.adjustedeta < 0 THEN 0.0
      ELSE msg.adjustedeta
    END) as adjustedeta,
    round(msg.job_surge, 4) as surge_mul,
    round(msg.eventual_completion_probability, 4) as eventual_comp_prob,
    msg.ranking_metric,
    round(1 - msg.solo_cancel_model_driver_accept_prob, 4) as d_proba,
    round(1 - msg.solo_cancel_model_rider_accept_prob, 4) as r_proba,
    round(1 - msg.spinner_survive_prob_before_next_scan, 4) as s_proba,
    msg.preferred_destination_adjustment,
    msg.objective_value as of_value,
    msg.inconvenience_etd - msg.ranking_metric as trip_length
from   
    rawdata.kafka_hp_multileg_mgv_log_nodedup
where  
    datestr = '{datestr}'
    and msg.city_id = {city_id}
    and msg.tenancy = 'uber/production'
    and msg.vehicle_view_id = {vvid} 
    and msg.flow_type = 'solo_batch'
    and msg.job_uuid <> msg.client_uuid
    and msg.calculator_type = 'markov_eta_v2'
),
test as (
select 
    mgv.datestr,
    mgv.city_id,
    dispatch.scan_uuid,
    mgv.plangen_uuid,
    mgv.job_uuid,
    dispatch.planactiontype,
    mgv.supply_uuid,
    case when dispatch.supplyuuid = mgv.supply_uuid then 1 else 0 end as is_selected,
    mgv.eta,
    mgv.adjustedeta,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1), 4) as eta_one,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.05), 4) as eta_one_five,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.10), 4) as eta_one_ten,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.15), 4) as eta_one_fifteen,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.20), 4) as eta_one_twenty,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.25), 4) as eta_one_quarter,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.30), 4) as eta_one_thirty,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.35), 4) as eta_one_thirty_five,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.40), 4) as eta_one_forty,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.45), 4) as eta_one_forty_five,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.50), 4) as eta_one_fifty,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.55), 4) as eta_one_fifty_five,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.60), 4) as eta_one_sixty,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.65), 4) as eta_one_sixty_five,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.70), 4) as eta_one_seventy,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.75), 4) as eta_one_seventy_five,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.80), 4) as eta_one_eighty,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.85), 4) as eta_one_eighty_five,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.90), 4) as eta_one_ninety,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 1.95), 4) as eta_one_ninety_five,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 2), 4) as eta_square,
    round(POWER(1 - mgv.adjustedeta / 1500.0, 3), 4) as eta_cube,
    mgv.surge_mul,
    mgv.eventual_comp_prob,
    round(1.0 / (1.0 + POWER(mgv.surge_mul, 2)), 4) as network_contention_2,
    round(1.0 / (1.0 + POWER(mgv.surge_mul, 3)), 4) as network_contention_3,
    round(1.0 / (1.0 + POWER(mgv.surge_mul, 5)), 4) as network_contention_5,
    mgv.ranking_metric,
    mgv.d_proba,
    mgv.r_proba,
    mgv.s_proba,
    round((1.0 - mgv.d_proba) * (1.0 - mgv.r_proba) * (1.0 - mgv.s_proba) + mgv.eventual_comp_prob * mgv.d_proba, 4) as cr_ratio,
    round((1.0 - mgv.d_proba) * (1.0 - mgv.r_proba) + mgv.eventual_comp_prob * mgv.d_proba, 4) as crof_ratio,
    mgv.preferred_destination_adjustment,
    mgv.of_value,
    mgv.trip_length,
    fare.est_rider_quoted_final_fare as fare,
    fare.est_rider_quoted_final_fare * 1.0 / fare.usd_fx_rate as fare_usd
from
    mgv
join
    plangen
on 
    mgv.plangen_uuid = plangen.plangen_uuid
    and mgv.job_uuid = plangen.job_uuid
    and mgv.supply_uuid = plangen.supplyuuid
join
    dispatch
on
    mgv.plangen_uuid = dispatch.plangen_uuid
    and mgv.job_uuid = dispatch.job_uuid
join
    dwh.fact_trip_fare fare 
on
    mgv.job_uuid = fare.trip_uuid
    and fare.datestr = '{datestr}'
    and fare.city_id = {city_id}
)
select * from test
"""

In [8]:
@dataclass
class Query:
    prefix: str
    hex_digits: str
    city_id: int
    vvid: str
    datestr: str
    
    def __post_init__(self):
        self.name = f'{self.prefix}_city{self.city_id}_{self.vvid}_{self.datestr}_segment{self.hex_digits}'
        self.qry = QUERY.format(city_id=self.city_id, vvid=self.vvid, digits=self.hex_digits, datestr=self.datestr)

In [9]:
class MyDataFetcher(DataFetcher):
    def query_many_presto(self, *args, **kwargs):
        return super().query_many_presto(*args, **kwargs)

In [10]:
# Calculate new objective function
def clean_df(df):
    df = df[df['fare'].notnull()]
    df['trip_length'][df['trip_length'] <= 100] = 100
    df = df.drop_duplicates(subset=['job_uuid', 'supply_uuid'])
    df = df.dropna()
    return df

# def compute_new_of(df):
    
#     # Baseline (Markov)

# {'total_jobs': 6076,
#  'match_rate': 0.945,
#  'overwrite': 0.0,
#  'Average Matched ETA': 487.91,
#  'P90 Matched ETA': 1122.0,
#  'Driver AR': 0.496,
#  'Rider cancel': 0.154,
#  'Average trip length': 829.1,
#  'Average Matched Fare': 16.0,
#  'Total GB': 38381}

#     # EFOF
#     df['new_of'] = - df['eta_square'] * df['cr_ratio'] * df['fare']

# {'total_jobs': 6076,
#  'match_rate': 0.985,
#  'overwrite': 0.164,
#  'Average Matched ETA': 531.9,
#  'P90 Matched ETA': 1219.8,
#  'Driver AR': 0.493,
#  'Rider cancel': 0.171,
#  'Average trip length': 835.03,
#  'Average Matched Fare': 16.1,
#  'Total GB': 39531}

#     # CROF
#     df['new_of'] = - df['eta_square'] * df['crof_ratio']

# {'total_jobs': 6076,
#  'match_rate': 0.984,
#  'overwrite': 0.134,
#  'Average Matched ETA': 530.09,
#  'P90 Matched ETA': 1217.0,
#  'Driver AR': 0.493,
#  'Rider cancel': 0.169,
#  'Average trip length': 833.92,
#  'Average Matched Fare': 16.08,
#  'Total GB': 39573}

############################################
#             GUB as label                 #
#              Unit: USD                   #
############################################

#     # gamma = 1.00 - with Intercept - MAIN II - Use local currency with a fixed exchange rate (which is ~ 1 at the median value)
#     df['new_of'] = - (0.4019 * df['d_proba'] \
#                       - 0.9627 * df['eventual_comp_prob'] \
#                       - 1.3453 * df['eta_one'] * df['cr_ratio'] \
#                       + 0.6210 * df['eta_one'] * (1 - df['network_contention_2']) * df['cr_ratio'] * df['fare'] \
#                       - 0.6435 * df['eta_one_quarter'] * (1 - df['network_contention_5']) * df['cr_ratio'] * df['fare'] \
#                       - 1.1098 * df['eta_one'] * (1 - df['network_contention_2']) * df['cr_ratio'] * df['fare'] / df['surge_mul'] \
#                       + 4.1085 * df['eta_one'] * (1 - df['network_contention_5']) * df['cr_ratio'] * df['fare'] / df['surge_mul'] \
#                       + 0.15
#                      )

# {'total_jobs': 6076,
#  'match_rate': 0.981,
#  'overwrite': 0.19,
#  'Average Matched ETA': 531.33,
#  'P90 Matched ETA': 1212.0,
#  'Driver AR': 0.501,
#  'Rider cancel': 0.17,
#  'Average trip length': 834.63,
#  'Average Matched Fare': 16.12,
#  'Total GB': 40213}

#     return df

def global_new_of(df):

    df['global_new_of'] = - (0.4019 * df['d_proba'] \
                      - 0.9627 * df['eventual_comp_prob'] \
                      - 1.3453 * df['eta_one'] * df['cr_ratio'] \
                      + 0.6210 * df['eta_one'] * (1 - df['network_contention_2']) * df['cr_ratio'] * df['fare'] \
                      - 0.6435 * df['eta_one_quarter'] * (1 - df['network_contention_5']) * df['cr_ratio'] * df['fare'] \
                      - 1.1098 * df['eta_one'] * (1 - df['network_contention_2']) * df['cr_ratio'] * df['fare'] / df['surge_mul'] \
                      + 4.1085 * df['eta_one'] * (1 - df['network_contention_5']) * df['cr_ratio'] * df['fare'] / df['surge_mul'] \
                      + 1.3591
                     )

    return df

def compute_new_of(
        df,
        a = 0.4019,
        b = -0.9627,
        c = -1.3453,
        d = 0.6210,
        e = -0.6435,
        f = -1.1098,
        g = 4.1085,
        h = 1.3591
    ):

    df['new_of'] = - (a * df['d_proba'] \
                      + b * df['eventual_comp_prob'] \
                      + c * df['eta_one'] * df['cr_ratio'] \
                      + d * df['eta_one'] * (1 - df['network_contention_2']) * df['cr_ratio'] * df['fare'] \
                      + e * df['eta_one_quarter'] * (1 - df['network_contention_5']) * df['cr_ratio'] * df['fare'] \
                      + f * df['eta_one'] * (1 - df['network_contention_2']) * df['cr_ratio'] * df['fare'] / df['surge_mul'] \
                      + g * df['eta_one'] * (1 - df['network_contention_5']) * df['cr_ratio'] * df['fare'] / df['surge_mul'] \
                      + h
             )

    return df


In [11]:
# local solver
def solve_dict(
    scan: dict, 
    cost_col: str, 
    job_singleton: float = 1500,
    infinity: float = 1000000
):
    job_list = list(set([k[0] for k in scan.keys()]))
    job_idx = {j: i for i, j in enumerate(job_list)}
    job_count = len(job_list)

    supply_list = list(set([k[1] for k in scan.keys()]))
    supply_idx = {s: i for i, s in enumerate(supply_list)}
    supply_count = len(supply_list)
    
    utility = np.full((len(job_list), len(supply_list) + len(job_list)), infinity, dtype=np.float32)
    for k in scan.keys():
        jidx = job_idx[k[0]]
        sidx = supply_idx[k[1]]
        utility[jidx, sidx] = scan[k][cost_col]
    for i in range(len(job_list)):
        utility[i, supply_count + i] = job_singleton
            
    # solve
    job_sol, supply_sol = linear_sum_assignment(utility)

    result = set()
    for jidx, sidx in zip(job_sol, supply_sol):
        j = job_list[jidx]
        if sidx >= supply_count:
            result.add((j,))
        else:
            s = supply_list[sidx]
            result.add((j, s))
            
    assert len(result) == len(job_list)
    return result  

In [12]:
from dataclasses import field

@dataclass
class ScanMetrics:
    total_jobs: int = 0.
    total_eta: float = 0.
    total_offer: float = 0.
    total_ar: float = 0.
    total_rc: float = 0.
    total_trip: float = 0.
    total_gb: float = 0.
    total_fare: float = 0.
    total_overwrite: int = 0.
    total_global_new_of: float = 0.
    list_etas: list = field(default_factory = list)
    
    def __add__(self, o: 'ScanMetrics') -> 'ScanMetrics':
        return ScanMetrics(
            self.total_jobs + o.total_jobs,
            self.total_eta + o.total_eta,
            self.total_offer + o.total_offer,
            self.total_ar + o.total_ar,
            self.total_rc + o.total_rc,
            self.total_trip + o.total_trip,
            self.total_overwrite + o.total_overwrite,
            self.total_gb + o.total_gb,
            self.total_fare + o.total_fare,
            self.total_global_new_of + o.total_global_new_of,
            self.list_etas.expand + o.list_etas
        )
    def __iadd__(self, o: 'ScanMetrics') -> 'ScanMetrics':
        self.total_jobs += o.total_jobs
        self.total_eta += o.total_eta
        self.total_offer += o.total_offer
        self.total_ar += o.total_ar
        self.total_rc += o.total_rc
        self.total_trip += o.total_trip
        self.total_overwrite += o.total_overwrite
        self.total_gb += o.total_gb
        self.total_fare += o.total_fare
        self.total_global_new_of += o.total_global_new_of
        self.list_etas += o.list_etas
        
        return self

In [13]:
# Metric Summary
def metric_summary_dict(
    scan_dict: Dict[str, Dict[str, Any]],
    matching: set, 
    overwrite: int,
) -> ScanMetrics:
    sm = ScanMetrics()
    sm.total_jobs = len(matching)
    sm.total_overwrite = overwrite
    
    for m in matching:
        if len(m) == 2:
            row = scan_dict[(m[0], m[1])]
            sm.total_offer += 1
            sm.total_eta += row['eta']
            sm.total_ar += 1 - row['d_proba']
            sm.total_rc += row['r_proba']
            if row['trip_length'] < 7200:
                sm.total_trip += row['trip_length']
            if row['fare_usd'] > 0:
                sm.total_gb += (1 - row['d_proba']) * (1 - row['r_proba']) * row['fare_usd']
                sm.total_fare += row['fare_usd']
            
            sm.total_global_new_of += (1 - row['d_proba']) * (1 - row['r_proba']) * row['global_new_of']
                
            sm.list_etas.append(row['eta'])

    return sm

def solve_all_dict(df, solver: Callable[[dict], set]):
    total_scans = dict(tuple(df.groupby('scan_uuid')))

    sm = ScanMetrics()
    for scan_uuid, scan_df in total_scans.items():
        scan = (scan_df.set_index(['job_uuid', 'supply_uuid']).to_dict(orient='index'))
        matching, overwrite = solver(scan)
        sm += metric_summary_dict(scan, matching, overwrite)
    
    return {'total_jobs': round(sm.total_jobs),
            'match_rate': round(sm.total_offer * 1.0 / sm.total_jobs, 3),
            'overwrite': round(sm.total_overwrite * 1.0 / sm.total_jobs, 3), # different decisions compared to Markov
            'average_matched_eta': round(sm.total_eta * 1.0 / sm.total_offer, 2),
            'p90_matched_eta': round(np.percentile(sm.list_etas, 90), 2),
            'driver_ar': round(sm.total_ar * 1.0 / sm.total_offer, 3),
            'rider_cancel': round(sm.total_rc * 1.0 / sm.total_offer, 3),
            'average_trip_length': round(sm.total_trip * 1.0 / sm.total_offer, 2),
            'average_matched_fare': round(sm.total_fare * 1.0 / sm.total_offer, 2),
            'total_gb': round(sm.total_gb),
            'global_new_of': round(sm.total_global_new_of * 1.0 / sm.total_jobs, 3)
           }

In [14]:
def different_matching_decision(m1,m2):
    return m1.difference(m2), m2.difference(m1)

def supply_cost_solve_dict(scan, is_markov = False, secondary_singleton = 0.0):
    # Markov
    primary_matching = solve_dict(scan, 'of_value', job_singleton = 1500)
    if is_markov:      
        return primary_matching, 0
    
    # SCA solve
    secondary_matching = solve_dict(scan, 'new_of', job_singleton = secondary_singleton)
    different_matches = len(different_matching_decision(primary_matching, secondary_matching)[0])
    return secondary_matching, different_matches


In [15]:
prefix = 'replay'
hex_digits = '36'

city_id_vvids = {38: '(3298)', 37: '(5235)', 36: '(570)'}

datestrs = [  # 1 week
    '2022-09-13',
    '2022-09-14',
    '2022-09-15',
    '2022-09-16',
    '2022-09-17',
    '2022-09-18',
    '2022-09-19'
]

queries = [
    Query(prefix=prefix, hex_digits=hex_digits, city_id=city_id, vvid=vvid, datestr=datestr)
    for (city_id, vvid), datestr in itertools.product(city_id_vvids.items(), datestrs)
]

cache_qry_map = {
    q.name: q.qry 
    for q in queries
}

cdf = CachedDataFetcher(
    data_fetcher=MyDataFetcher(
        user_email=USER_EMAIL,
        consumer_name=CONSUMER_NAME,
    ),
    cache_qry_map=cache_qry_map,
    datacenter='dca1',
    datasource='presto-secure',
)

# In the first run, set bust_cache to True; after that, set this to False)
cdf.fetch(bust_cache=False)

Loaded 21/21 dataframes from cache!


In [16]:
# Clean data
scans = pd.concat(cdf.dfs.values(), axis=0, ignore_index=True) 
df = scans
df = clean_df(df)

In [17]:
# BayesOpt

In [18]:
# Generate global_new_of
df = global_new_of(df)

In [19]:
global df

In [20]:
def optimized_function(
        a = 0.4019,
        b = -0.9627,
        c = -1.3453,
        d = 0.6210,
        e = -0.6435,
        f = -1.1098,
        g = 4.1085,
        h = 1.3591,
        seed = 96,
        training = True,
        iter_df = df
    ):
    np.random.seed(seed)
    
    iter_df = compute_new_of(iter_df, a = a, b = b, c = c, d = d, e = e, f = f, g = g, h = h)
    
    # Solve is based on 'new_of'
    matching = solve_all_dict(iter_df, lambda scan: supply_cost_solve_dict(scan, is_markov = False))
    
    # Evaluation is based on 'global_new_of'
    rew = - matching['global_new_of'] - 0.015 * matching['average_matched_eta'] + 25 * matching['match_rate']
    
    if training:
        return rew - 0.005 * sum([i**2 for i in [a, b, c, d, e, f, g, h]])
    return rew


In [21]:
# Bounded region of parameter space
pbounds = {'a': (0, 1),
           'b': (-2, 0),
           'c': (-2, 0),
           'd': (0, 2),
           'e': (-2, 0),
           'f': (-2, 0),
           'g': (3, 5),
           'h': (0, 2)
          }

In [22]:
optimizer = BayesianOptimization(
    f=optimized_function,
    pbounds=pbounds,
    random_state=1,
)

In [23]:
optimizer.probe(
    params={'a': 0.4019,
            'b': -0.9627,
            'c': -1.3453,
            'd': 0.6210,
            'e': -0.6435,
            'f': -1.1098,
            'g': 4.1085,
            'h': 1.3591},
    lazy=True,
)

In [24]:
optimizer.maximize(
    init_points=2,
    n_iter=25, acq="poi", xi=1e-4,
)

|   iter    |  target   |     a     |     b     |     c     |     d     |     e     |     f     |     g     |     h     |
-------------------------------------------------------------------------------------------------------------------------


INFO:jaeger_tracing:Tracing sampler started with sampling refresh interval 60 sec


| [0m 1       [0m | [0m 32.2    [0m | [0m 0.4019  [0m | [0m-0.9627  [0m | [0m-1.345   [0m | [0m 0.621   [0m | [0m-0.6435  [0m | [0m-1.11    [0m | [0m 4.109   [0m | [0m 1.359   [0m |
| [0m 2       [0m | [0m 27.56   [0m | [0m 0.417   [0m | [0m-0.5594  [0m | [0m-2.0     [0m | [0m 0.6047  [0m | [0m-1.706   [0m | [0m-1.815   [0m | [0m 3.373   [0m | [0m 0.6911  [0m |
| [95m 3       [0m | [95m 32.31   [0m | [95m 0.3968  [0m | [95m-0.9224  [0m | [95m-1.162   [0m | [95m 1.37    [0m | [95m-1.591   [0m | [95m-0.2438  [0m | [95m 3.055   [0m | [95m 1.341   [0m |
| [0m 4       [0m | [0m 32.18   [0m | [0m 0.9087  [0m | [0m-0.0963  [0m | [0m-0.1637  [0m | [0m 0.6454  [0m | [0m-0.3824  [0m | [0m-0.1678  [0m | [0m 3.734   [0m | [0m 1.38    [0m |
| [0m 5       [0m | [0m 32.28   [0m | [0m 0.0972  [0m | [0m-1.043   [0m | [0m-1.267   [0m | [0m 1.453   [0m | [0m-1.215   [0m | [0m-0.2768  [0m | [0m 3.169   [0m | 

In [25]:
def LSR_Eval(
        params,
        seed = 96,
        iter_df = df
    ):
    np.random.seed(seed)
    
    a, b, c, d, e, f, g, h = params
    iter_df = compute_new_of(iter_df, a = a, b = b, c = c, d = d, e = e, f = f, g = g, h = h)
    
    # Solve is based on 'new_of'
    matching = solve_all_dict(iter_df, lambda scan: supply_cost_solve_dict(scan, is_markov = False))
    
    return matching

In [26]:
baseline = LSR_Eval(
        params = [0.4019, -0.9627, -1.3453, 0.6210, -0.6435, -1.1098, 4.1085, 1.3591],
        seed = 96,
        iter_df = df
    )
baseline

{'total_jobs': 6076,
 'match_rate': 0.988,
 'overwrite': 0.199,
 'average_matched_eta': 539.81,
 'p90_matched_eta': 1231.0,
 'driver_ar': 0.501,
 'rider_cancel': 0.172,
 'average_trip_length': 835.6,
 'average_matched_fare': 16.13,
 'total_gb': 40498,
 'global_new_of': -15.718}

In [27]:
new = LSR_Eval(
    params = list(optimizer.max["params"].values()),
    seed = 96,
    iter_df = df
    )
new

{'total_jobs': 6076,
 'match_rate': 0.989,
 'overwrite': 0.208,
 'average_matched_eta': 541.72,
 'p90_matched_eta': 1233.0,
 'driver_ar': 0.503,
 'rider_cancel': 0.172,
 'average_trip_length': 835.62,
 'average_matched_fare': 16.14,
 'total_gb': 40696,
 'global_new_of': -15.805}

In [28]:
list(optimizer.max["params"].values())

[0.3444705740718891,
 -0.886766111974594,
 -1.1853201082277138,
 1.3662973896250004,
 -1.6049984362797407,
 -0.25883503674061675,
 3.0404645312380407,
 1.3040092284736122]