In [2]:
from flipr_client.clients.remote_client import RemoteClient
from queryrunner_client import Client as QueryRunnerClient
import yaml
from typing import *
import copy
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

import pandas as pd

from mdstk import (
    SimulationJob,
    MultiverseClient,
    DataFetcher,
)

from batch_utils import helpers
from batch_utils.study_run import StudyRun
from batch_utils.batch_querier import BatchQuerier
from batch_utils.batch_querier_sim_x import create_x_batch_querier, study_run_to_cache_name
from batch_utils.batch_runner import BatchRunner

In [32]:
USER_EMAIL = 'mehrdadb@uber.com'
USER_UUID = '115cfaf7-2e91-4ff7-ad2e-9009cbd18727'
QR_CONSUMER = 'intelligentdispatch'

In [4]:
sum_cols = [
    "Number of Unfulfilled Trips",
    "Number of Rider Canceled Trips",
    "Number of Driver Canceled Trips",
    "Number of Completed Trips",
    "Number of Prematch Trips",
    "Number of Trip Swaps",
    "Number of Trips Without Fares",
    "Number Of Jobs With Greedy Fallback",
    "Number of Spinner Canceled Trips",
    "Number of ReRequest Trips",
    
    "Total Hours",
    "Full Bookings. (Local Fare Calc.)",
    "n"
]

avg_cols = [
    "Overall Avg. Pickup Time",
    "Driver Offer Rejection Rate",
    "Overall Avg. Cost (Local Fare Calc.)",
    "Cancel Probability Avg. Rider",
    "Cancel Probability Avg. Driver",
    "Overall Avg. Trip Duration",
]

In [5]:
def agg_sum(df, c, total_col='n'):
    return df[c].sum()

def agg_avg(df, c, total_col='n'):
    return (df[c]*df[total_col]).sum() / df[total_col].sum()

def aggregate_func(df):
    return pd.DataFrame({
        **{c: [agg_sum(df, c)] for c in sum_cols},
        **{c: [agg_avg(df, c)] for c in avg_cols},
    })


In [143]:
#qr_client = QueryRunnerClient(user_email=USER_EMAIL)
qr_client = QueryRunnerClient(consumer_name=QR_CONSUMER)

CACHE_DIR = 'cache'
STUDY_PREFIX = 'partitioning_preXP'
# MULTILEG_VERSION = '04e41cd6baa8'
MULTILEG_VERSION = 'cd511031ba1485dfb784dfb2db3c2fcb5375f0b7'

X_VVIDS = {
    90: 651,
    146: 1934,
    803: 10369,
    218: 837,
    1379: 10002430,
    799: 11047,

}
city_ids = [90, 146, 218, 803, 799, 1379]
city_id = 90

In [144]:
flipr = RemoteClient(
    host='localhost',
    port=14570,
    application_identifier='autolaszlo'
)
sim_client = MultiverseClient(
    user_email=USER_EMAIL, 
    user_uuid=USER_UUID,
)

In [145]:
pipelines = [
    f'rt-sim{n:02d}'
    for n in range(5, 32)
]
# pipelines

In [146]:
partitioning_settings = yaml.load('''
gb_centric_of.lambda: 1
polysolve.graph_partitioning_enabled: true
polysolve.plangen_parallel_of_enabled: true
polysolve.plangen_parallel_of_names:
    - gb_centric_of
polysolve.plangen_parallel_of_override_is_enabled: true
polysolve.plangen_parallel_of_solving_enabled: true
objective_function.use_compound_completion_rate_of: false
''')

  # Remove the CWD from sys.path while we load stuff.


In [147]:
def get_partitioning_multileg(enabled: bool, percentage_based_override=False):
     return {
        'gb_centric_of.lambda': 1,
        'polysolve.graph_partitioning_enabled': enabled,
        'polysolve.plangen_parallel_of_enabled': True,
        'polysolve.plangen_parallel_of_names': ['gb_centric_of'],
        'polysolve.plangen_parallel_of_override_is_enabled': enabled,
        'polysolve.plangen_parallel_of_solving_enabled': enabled,
        'objective_function.use_compound_completion_rate_of': False,
        'polysolve.should_use_percentage_based_override': percentage_based_override,
        'polysolve.percentage_based_override_value': 0.1,
    }

In [148]:
def create_sim(r: StudyRun, 
               bq: BatchQuerier,
               study_prefix: str,
               sim_client, 
               mitm_perf_version='v5.0.1',
               multileg_sim_version='cd511031ba1485dfb784dfb2db3c2fcb5375f0b7',
               rt_control_tower_sim_version='main',
              ) -> SimulationJob:
    supply_df = bq.dfs[study_run_to_cache_name('supply', r)]
    demand_df = bq.dfs[study_run_to_cache_name('demand', r)]

    radars = helpers.get_radars(r.city_id, flipr)
    spinner_rates, spinner_params = helpers.get_spinners(city_id=r.city_id, vvid=r.x_vvid, flipr_client=flipr)

    sim_job = SimulationJob(
        sim_uuid=None,
        flow_type='solo',
        description=f'{study_prefix}_{r.name}',
        batch_ids=[study_prefix, str(r.city_id)],
        sim_client=sim_client,
        sim_object=None
    )
    
    sim_job.get_dependencies()['mitm-perf'] = mitm_perf_version
    sim_job.get_dependencies()['multileg-sim'] = multileg_sim_version
    sim_job.get_dependencies()['rt-control-tower-sim'] = rt_control_tower_sim_version

    sim_job.set_demand_input(demand_df)
    sim_job.set_supply_input(supply_df)
    sim_job.set_city_id(r.city_id)
    helpers.fix_michelangelo_keys(sim_job, r.city_id)
    helpers.clear_fliprs(sim_job)
    sim_job.update_fliprs({
         'rt-control-tower': {
             'ct_radars_v1': helpers.get_polysolve_radars(radars, r.x_vvid),
             'ct_radars_v2': radars
         },
        'multileg': get_partitioning_multileg(
            enabled=r.params['partitioning_enabled'],
            percentage_based_override=r.params['percentage_based_override'],
        ),
        'disco': {
            'objective_value.calculator_type': 'markov_eta_v2',
        },
    })
    sim_job.update_settings({
        'spinner_cancel_params': spinner_params,
        'use_production_fliprs': True,
    })

    return sim_job

In [149]:

time_periods = [
     ("2021-08-23 09:00:00", "2021-08-23 10:00:00"),
    ("2021-08-25 11:00:00", "2021-08-25 11:30:00"),
]


sim_study = []
for _city_id in city_ids:
    for start, end in time_periods:
        sim_study.append(
            StudyRun(
                city_id=_city_id,
                x_vvid=X_VVIDS[_city_id],
                start_timestamp_local=start,
                end_timestamp_local=end,
                params={
                    "partitioning_enabled": True,
                    "percentage_based_override": True,
                }
            )
        )


In [150]:
bq = create_x_batch_querier(
    sim_study=sim_study,
    qr=qr_client,
)

In [151]:
bq.submit_jobs()


zz_data_cache_demand_90_651_2021-08-23 09:00:00_2021-08-23 10:00:00.csv is already loaded from cache and bust_cache = False!
zz_data_cache_supply_90_651_2021-08-23 09:00:00_2021-08-23 10:00:00.csv is already loaded from cache and bust_cache = False!
zz_data_cache_demand_90_651_2021-08-25 11:00:00_2021-08-25 11:30:00.csv is already loaded from cache and bust_cache = False!
zz_data_cache_supply_90_651_2021-08-25 11:00:00_2021-08-25 11:30:00.csv is already loaded from cache and bust_cache = False!
zz_data_cache_demand_146_1934_2021-08-23 09:00:00_2021-08-23 10:00:00.csv is already loaded from cache and bust_cache = False!
zz_data_cache_supply_146_1934_2021-08-23 09:00:00_2021-08-23 10:00:00.csv is already loaded from cache and bust_cache = False!
zz_data_cache_demand_146_1934_2021-08-25 11:00:00_2021-08-25 11:30:00.csv is already loaded from cache and bust_cache = False!
zz_data_cache_supply_146_1934_2021-08-25 11:00:00_2021-08-25 11:30:00.csv is already loaded from cache and bust_cache =

In [152]:
bq.block_until_complete()


Done!


In [153]:
if len(bq.get_failed_jobs()) > 0:
    raise ValueError(bq.get_failed_jobs())

In [154]:
pipelines = [
    f'rt-sim{n:02d}'
    for n in range(6, 32)
]

br = BatchRunner(
    sim_client=sim_client,
    sim_study=sim_study,
    create_sim=create_sim,
    study_prefix=STUDY_PREFIX,
    pipelines=pipelines,
)
br.start(bq=bq, 
         study_prefix=STUDY_PREFIX, 
         sim_client=sim_client, 
         # bust_cache=True,
         multileg_sim_version=MULTILEG_VERSION)

Failed to load from cache/partitioning_preXP_90_651_2021-08-23 09:00:00_2021-08-23 10:00:00_partitioning_enabled_True_percentage_based_override_True.sim_info: [Errno 2] No such file or directory: 'cache/partitioning_preXP_90_651_2021-08-23 09:00:00_2021-08-23 10:00:00_partitioning_enabled_True_percentage_based_override_True.sim_info'
Failed to load from cache/partitioning_preXP_90_651_2021-08-25 11:00:00_2021-08-25 11:30:00_partitioning_enabled_True_percentage_based_override_True.sim_info: [Errno 2] No such file or directory: 'cache/partitioning_preXP_90_651_2021-08-25 11:00:00_2021-08-25 11:30:00_partitioning_enabled_True_percentage_based_override_True.sim_info'
Failed to load from cache/partitioning_preXP_146_1934_2021-08-23 09:00:00_2021-08-23 10:00:00_partitioning_enabled_True_percentage_based_override_True.sim_info: [Errno 2] No such file or directory: 'cache/partitioning_preXP_146_1934_2021-08-23 09:00:00_2021-08-23 10:00:00_partitioning_enabled_True_percentage_based_override_Tru

In [128]:
j = list(br.cache.values())[0].sim_job
print(yaml.dump(list(br.cache.values())[0].sim_job.get_params()))


batchIds:
- PARTITIONING_V1
- '90'
cityId: 90
createdAt: '2021-11-19T07:17:00.585Z'
dependencyGitSha:
  mitm-perf: 773ba4d3d9ed2b60bb59cf41c170a7a9873b9032
  multileg-sim: 2a93d29691ffbd89017ae3fad15c64e34268e20e
  rt-control-tower-sim: 8e836e61dd98b7a23ccae85b613c2b154f542dab
dependencySpecs:
  mitm-perf: v5.0.1
  multileg-sim: 2a93d29691ffbd89017ae3fad15c64e34268e20e
  rt-control-tower-sim: main
description: partitioning_v1_90_651_2021-08-23 09:00:00_2021-08-23 10:00:00_partitioning_enabled_False
errorMessage: 'java.lang.Exception: simulation 34f9f1d3-c366-402b-873b-6d18b52089b5
  terminated'
errorType: SIM_ERROR_UNKNOWN
fliprOverrides:
  disco:
    objective_value.calculator_type: markov_eta_v2
  dispatch:
    jit.radarScanIntervalMs: 10000.0
  motown: {}
  multileg:
    gb_centric_of.lambda: 1.0
    objective_function.use_compound_completion_rate_of: false
    polysolve.graph_partitioning_enabled: false
    polysolve.plangen_parallel_of_enabled: true
    polysolve.plangen_parallel_

In [37]:
# for j in br.cache.values():
#     if j.sim_job.get_state() not in ('postproc_completed', 'sim_failed', 'canceled'):
#         j.sim_job.cancel()