### Skims (Wait Times) - Preprocessing, Step 1
**Author:** Carlos Guirado
**Date:** Feb
**Objective:** Downloading skims in bulk and pre-processing to obtain merged RH solo and RH pooled wait time skim files by time period.

In [44]:
# Import libraries
import os
import pandas as pd
import numpy as np
import geopandas as gpd
import h5py
import boto.s3
import glob
import boto3
from zipfile import ZipFile
import shutil
import io

In [45]:
import pyreadr
import openmatrix as omx

In [46]:
# Note: see https://docs.google.com/spreadsheets/d/1tiAiqI2Wvvmu-7C5RAOsYyOXYbHEpoAa2RGs10IQRM8/edit#gid=121792557
# For naming convention
# This numeric system follows the rows in the spreadsheet

In [47]:
links =['s3://beam-outputs/pilates-outputs/sfbay_1fleet_100price_100fleet_30pct_20230222/',
's3://beam-outputs/pilates-outputs/sfbay_baseline_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_100price_100fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_100price_164fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_100price_200fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_100price_400fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_100price_1000fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_2fleets_100price_164fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_2fleets_100price_200fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_2fleets_100price_400fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_2fleets_100price_1000fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_2fleets_47price_100fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_47price_100fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_47price_164fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_47price_200fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_47price_400fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_47price_1000fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_2fleets_27price_100fleet_30pct_20230221/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_27price_100fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_27price_164fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_27price_200fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_27price_400fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_27price_1000fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_2fleets_12_5price_100fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_12_5price_100fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_12_5price_164fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_12_5price_200fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_12_5price_400fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_12_5price_1000fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_2fleets_6_25_price_100fleet_30pct_20230223/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_6_25price_100fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_6_25price_164fleet_30pct_20230218/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_6_25price_200fleet_30pct_20230223/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_6_25_price_400fleet_30pct_20230223/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_6_25_price_1000fleet_30pct_20230223/',
's3://beam-outputs/pilates-outputs/sfbay_1fleet_mix_price_100fleet_30pct_20230223/',
's3://beam-outputs/pilates-outputs/sfbay_2fleets_mix_price_100fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_mixprice_100fleet_30pct_20230223/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_mix_price_164fleet_30pct_20230226/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_mixprice_200fleet_30pct_20230223/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_mixprice_400fleet_30pct_20230223/',
's3://beam-outputs/pilates-outputs/sfbay_5fleets_mixprice_1000fleet_30pct_20230223/']

In [48]:
links2 =['pilates-outputs/sfbay_1fleet_100price_100fleet_30pct_20230222/',
'pilates-outputs/sfbay_baseline_30pct_20230218/',
'pilates-outputs/sfbay_5fleets_100price_100fleet_30pct_20230218/',
'pilates-outputs/sfbay_5fleets_100price_164fleet_30pct_20230218/',
'pilates-outputs/sfbay_5fleets_100price_200fleet_30pct_20230218/',
'pilates-outputs/sfbay_5fleets_100price_400fleet_30pct_20230218/',
'pilates-outputs/sfbay_5fleets_100price_1000fleet_30pct_20230218/',
'pilates-outputs/sfbay_2fleets_100price_164fleet_30pct_20230218/',
'pilates-outputs/sfbay_2fleets_100price_200fleet_30pct_20230218/',
'pilates-outputs/sfbay_2fleets_100price_400fleet_30pct_20230218/',
'pilates-outputs/sfbay_2fleets_100price_1000fleet_30pct_20230218/',
'pilates-outputs/sfbay_2fleets_47price_100fleet_30pct_20230226/',
'pilates-outputs/sfbay_5fleets_47price_100fleet_30pct_20230226/',
'pilates-outputs/sfbay_5fleets_47price_164fleet_30pct_20230226/',
'pilates-outputs/sfbay_5fleets_47price_200fleet_30pct_20230226/',
'pilates-outputs/sfbay_5fleets_47price_400fleet_30pct_20230226/',
'pilates-outputs/sfbay_5fleets_47price_1000fleet_30pct_20230226/',
'pilates-outputs/sfbay_2fleets_27price_100fleet_30pct_20230221/',
'pilates-outputs/sfbay_5fleets_27price_100fleet_30pct_20230218/',
'pilates-outputs/sfbay_5fleets_27price_164fleet_30pct_20230218/',
'pilates-outputs/sfbay_5fleets_27price_200fleet_30pct_20230218/',
'pilates-outputs/sfbay_5fleets_27price_400fleet_30pct_20230218/',
'pilates-outputs/sfbay_5fleets_27price_1000fleet_30pct_20230218/',
'pilates-outputs/sfbay_2fleets_12_5price_100fleet_30pct_20230226/',
'pilates-outputs/sfbay_5fleets_12_5price_100fleet_30pct_20230226/',
'pilates-outputs/sfbay_5fleets_12_5price_164fleet_30pct_20230226/',
'pilates-outputs/sfbay_5fleets_12_5price_200fleet_30pct_20230226/',
'pilates-outputs/sfbay_5fleets_12_5price_400fleet_30pct_20230226/',
'pilates-outputs/sfbay_5fleets_12_5price_1000fleet_30pct_20230226/',
'pilates-outputs/sfbay_2fleets_6_25_price_100fleet_30pct_20230223/',
'pilates-outputs/sfbay_5fleets_6_25price_100fleet_30pct_20230218/',
'pilates-outputs/sfbay_5fleets_6_25price_164fleet_30pct_20230218/',
'pilates-outputs/sfbay_5fleets_6_25price_200fleet_30pct_20230223/',
'pilates-outputs/sfbay_5fleets_6_25_price_400fleet_30pct_20230223/',
'pilates-outputs/sfbay_5fleets_6_25_price_1000fleet_30pct_20230223/',
'pilates-outputs/sfbay_1fleet_mix_price_100fleet_30pct_20230223/',
'pilates-outputs/sfbay_2fleets_mix_price_100fleet_30pct_20230226/',
'pilates-outputs/sfbay_5fleets_mixprice_100fleet_30pct_20230223/',
'pilates-outputs/sfbay_5fleets_mix_price_164fleet_30pct_20230226/',
'pilates-outputs/sfbay_5fleets_mixprice_200fleet_30pct_20230223/',
'pilates-outputs/sfbay_5fleets_mixprice_400fleet_30pct_20230223/',
'pilates-outputs/sfbay_5fleets_mixprice_1000fleet_30pct_20230223/']

Step 1: Download ALL skims (works better to have them locally) - only run this bit of code once!

In [49]:
#print(links2[0])

pilates-outputs/sfbay_1fleet_100price_100fleet_30pct_20230222/


In [50]:
#links_to_skims = [x + 'activitysim/data/skims.omx' for x in links]

In [51]:
#links2_to_skims = [x + 'activitysim/data/skims.omx' for x in links2]

In [52]:
#print(links2_to_skims[0])

pilates-outputs/sfbay_1fleet_100price_100fleet_30pct_20230222/activitysim/data/skims.omx


In [53]:
#s3 = boto3.client("s3")
#for i in range(0,len(links2_to_skims)):
#    j = i +3 # row 0 in list corresponds to row 3 in master spreadsheet
#    key = links2_to_skims[i]
#    s3.download_file(Filename=f"C:\\Users\\Administrator\\Downloads\\Feb2023Runs\\skims_{j}.omx",Bucket="beam-outputs", Key=key)

In [54]:
#s3.download_file(Filename="C:\\Users\\Administrator\\Downloads\\Feb2023Runs\\skims1.omx",Bucket="beam-outputs", Key=key)

Step 2: Extract RH solo and pooled wait times by time period

Assumes skims have been downloaded locally

In [58]:
#key = r"C:\Users\Administrator\Downloads\base-skims.omx"
def grep(l,s):
    return [i for i in l if s in i]
import matplotlib.pyplot as plt

In [64]:
time_periods = ['AM', 'PM', 'MD', 'EA', 'EV']

In [61]:
#3: baseline, 44 last row on master spreadsheet: https://docs.google.com/spreadsheets/d/1tiAiqI2Wvvmu-7C5RAOsYyOXYbHEpoAa2RGs10IQRM8/edit#gid=121792557
# See Anna's proposal tab
for period in time_periods:
    for i in range(3,45):
        row=i # convention in spreadsheet
        myfile = omx.open_file(f'C:/Users/Administrator/Downloads/Feb2023Runs/skims_{row}.omx')
        df = pd.read_csv(r"C:\Users\Administrator\Documents\Carlos\skim_vars.txt", sep=',')
        list_vars = list(df.columns.values.tolist())
        m2 = myfile[f'RH_POOLED_WAIT__{period}']
        df2 = pd.DataFrame(m2)
        waits_pooled_rh=df2.iloc[:,1]
        waits_pooled_rh=waits_pooled_rh[waits_pooled_rh != 6]
        waits_pooled_rh.describe().to_csv(f'pooled-waits-{row}_{period}.csv')
        plt.hist(waits_pooled_rh, bins=100)
        plt.axvline(waits_pooled_rh.mean(), color='r', linestyle='dashed', linewidth=1)
        plt.axvline(waits_pooled_rh.median(), color='y', linestyle='dashed', linewidth=1)
        plt.savefig(f'waits_pooled_{row}')
        plt.clf()
        m3 = myfile[f'RH_SOLO_WAIT__{period}']
        df3 = pd.DataFrame(m3)
        waits_solo_rh=df3.iloc[:,1]
        waits_solo_rh=waits_solo_rh[waits_solo_rh != 6]
        plt.hist(waits_solo_rh, bins=100)
        plt.axvline(waits_solo_rh.mean(), color='r', linestyle='dashed', linewidth=1)
        plt.axvline(waits_solo_rh.median(), color='y', linestyle='dashed', linewidth=1)
        plt.savefig(f'waits_solo_{row}.png')
        waits_solo_rh.describe().to_csv(f'solo-waits{row}_{period}.csv')
        plt.clf()

<Figure size 640x480 with 0 Axes>

In [71]:
print(time_periods)

['AM', 'PM', 'MD', 'EA', 'EV']


In [99]:
all_filenames_pooled_am = []
all_filenames_pooled_pm = []
all_filenames_pooled_md = []
all_filenames_pooled_ea = []
all_filenames_pooled_ev = []

all_filenames_solo_am = []
all_filenames_solo_pm = []
all_filenames_solo_md = []
all_filenames_solo_ea = []
all_filenames_solo_ev = []

for i in range(3,45):
    pooled_am=f'pooled-waits-{i}_AM.csv'
    solo_am=f'solo-waits{i}_AM.csv'
    pooled_pm=f'pooled-waits-{i}_PM.csv'
    solo_pm=f'solo-waits{i}_PM.csv'
    pooled_ea=f'pooled-waits-{i}_EA.csv'
    solo_ea=f'solo-waits{i}_EA.csv'
    pooled_ev=f'pooled-waits-{i}_EV.csv'
    solo_ev=f'solo-waits{i}_EV.csv'
    pooled_md=f'pooled-waits-{i}_MD.csv'
    solo_md=f'solo-waits{i}_MD.csv'
    all_filenames_pooled_am.append(pooled_am)
    all_filenames_solo_am.append(solo_am)
    all_filenames_pooled_pm.append(pooled_pm)
    all_filenames_solo_pm.append(solo_pm)
    all_filenames_pooled_md.append(pooled_md)
    all_filenames_solo_md.append(solo_md)
    all_filenames_pooled_ea.append(pooled_ea)
    all_filenames_solo_ea.append(solo_ea)
    all_filenames_pooled_ev.append(pooled_ev)
    all_filenames_solo_ev.append(solo_ev)

In [100]:
combined_csv_pooled_am = pd.concat([pd.read_csv(f) for f in all_filenames_pooled_am])
combined_csv_solo_am = pd.concat([pd.read_csv(f) for f in all_filenames_solo_am])

combined_csv_pooled_pm = pd.concat([pd.read_csv(f) for f in all_filenames_pooled_pm])
combined_csv_solo_pm = pd.concat([pd.read_csv(f) for f in all_filenames_solo_pm])

combined_csv_pooled_ea = pd.concat([pd.read_csv(f) for f in all_filenames_pooled_am])
combined_csv_solo_ea = pd.concat([pd.read_csv(f) for f in all_filenames_solo_am])

combined_csv_pooled_ev = pd.concat([pd.read_csv(f) for f in all_filenames_pooled_ev])
combined_csv_solo_ev = pd.concat([pd.read_csv(f) for f in all_filenames_solo_ev])

combined_csv_pooled_md = pd.concat([pd.read_csv(f) for f in all_filenames_pooled_md])
combined_csv_solo_md = pd.concat([pd.read_csv(f) for f in all_filenames_solo_md])

In [101]:
combined_csv_pooled_am.to_csv('all_waits_pooled_am.csv')
combined_csv_pooled_pm.to_csv('all_waits_pooled_pm.csv')
combined_csv_pooled_ea.to_csv('all_waits_pooled_ea.csv')
combined_csv_pooled_ev.to_csv('all_waits_pooled_ev.csv')
combined_csv_pooled_md.to_csv('all_waits_pooled_md.csv')

In [102]:
combined_csv_solo_am.to_csv('all_waits_solo_am.csv')
combined_csv_solo_pm.to_csv('all_waits_solo_pm.csv')
combined_csv_solo_ea.to_csv('all_waits_solo_ea.csv')
combined_csv_solo_ev.to_csv('all_waits_solo_ev.csv')
combined_csv_solo_md.to_csv('all_waits_solo_md.csv')

Output: at this point, there are 5 RH solo and 5 RH pooled files (one by time period) condensing all the model runs.