In [1]:
import pandas as pd
import numpy as np
!pip install s3fs
!pip install google-cloud-storage 
!pip install --upgrade google-cloud-storage
import gzip
import io
from google.cloud import storage
import pandas as pd


Collecting s3fs
  Using cached s3fs-2023.3.0-py3-none-any.whl (27 kB)
Collecting fsspec==2023.3.0
  Using cached fsspec-2023.3.0-py3-none-any.whl (145 kB)
Collecting aiobotocore~=2.4.2
  Using cached aiobotocore-2.4.2-py3-none-any.whl (66 kB)
Collecting aiohttp!=4.0.0a0,!=4.0.0a1
  Using cached aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
Collecting botocore<1.27.60,>=1.27.59
  Using cached botocore-1.27.59-py3-none-any.whl (9.1 MB)
Collecting wrapt>=1.10.10
  Using cached wrapt-1.15.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (78 kB)
Collecting aioitertools>=0.5.1
  Using cached aioitertools-0.11.0-py3-none-any.whl (23 kB)
Collecting multidict<7.0,>=4.5
  Using cached multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)
Collecting frozenlist>=1.1.1
  Using cached frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_6

In [3]:
# create an empty DataFrame to store the results
EPASummary = pd.DataFrame()

# create a client object for interacting with the Google Cloud Storage API
client = storage.Client(project="BEAM CORE")

# set the prefix and bucket name for the files to download
prefix = "output/sfbay/sfbay-pilates-base__2023-03-16_22-27-58_xmg"
bucket_name = "beam-core-outputs"

# set the name of the file to download and create locally
blob_name = prefix + "/ITERS/it.0/0.events.csv.gz"
gz_file = 'example.csv.gz'

# get a reference to the CSV file in the bucket
bucket = client.get_bucket(bucket_name)
blob = bucket.blob(blob_name)

# download the gzipped file to local storage
blob.download_to_filename(gz_file)

# unzip the file and decode the contents into a text stream
with gzip.open(gz_file, 'rb') as f_in:
    with io.TextIOWrapper(f_in, encoding='utf-8') as f_in_text:
        # read the text stream into a pandas DataFrame
        df = pd.read_csv(f_in_text)
        
# set a scale factor to convert the output to the desired units
scalefactor = (0.3)

# set some variables to store the output data
name = 'baseline'
data_name = 'baseline'
carData = {}

# create a new DataFrame containing only the path traversal data
pathTraversal = df.loc[df.type == 'PathTraversal',:].dropna(how='all', axis=1)

# create some additional columns in the path traversal DataFrame
pathTraversal['mode_extended'] = pathTraversal['mode']
pathTraversal['isRH'] = ((pathTraversal['driver'].str.contains('rideHail')== True))
pathTraversal.loc[pathTraversal['isRH'], 'mode_extended'] += '_RH'
pathTraversal['gallons'] = (pathTraversal['primaryFuel'] + pathTraversal['secondaryFuel']) * 8.3141841e-9
pathTraversal['trueOccupancy'] = pathTraversal['numPassengers']
pathTraversal.loc[pathTraversal['mode_extended'] == 'car', 'trueOccupancy'] += 1
pathTraversal.loc[pathTraversal['mode_extended'] == 'walk', 'trueOccupancy'] += 1
pathTraversal.loc[pathTraversal['mode_extended'] == 'bike', 'trueOccupancy'] += 1
pathTraversal['vehicleMiles'] = pathTraversal['length']/1609.34
pathTraversal['passengerMiles'] = (pathTraversal['length'] * pathTraversal['trueOccupancy'])/1609.34
pathTraversal['vehicleHours'] = (pathTraversal['arrivalTime'] - pathTraversal['departureTime'])/3600.
pathTraversal['passengerHours'] = pathTraversal['vehicleHours'] * pathTraversal['trueOccupancy']

# group the path traversal data by mode and calculate some summary statistics
byType = pathTraversal.groupby(['mode_extended']).agg({'vehicleMiles':'sum','vehicleHours':'sum','passengerMiles':'sum','passengerHours':'sum','gallons':'sum'})
byType = byType

# store the results in a dictionary
carData[name] = byType

# calculate some additional summary statistics and store the results in the EPASummary DataFrame
EPASummary.at[name, 'LDV VMT (million)'] = carData[name].loc['car']['vehicleMiles'] / scalefactor / 1000000                                                                   
EPASummary.at[name, 'LDV Energy ((million gallon eq.))'] = carData[name].loc['car']['gallons'] / scalefactor / 1000000

# reading data
bucket_name = "beam-core-outputs"
blob_name = prefix + "/ITERS/it.0/0.realizedMode.csv"
storage_client = storage.Client(project="BEAM CORE")
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(blob_name)

with blob.open("r") as f:
    transit = pd.read_csv(f)


transit_trips = (transit['bike_transit'].sum() + transit['drive_transit'].sum() + transit['ride_hail_transit'].sum() + transit['walk_transit'].sum()) / scalefactor / 1000000
EPASummary.at[name, 'Transit trips (million)'] = transit_trips

# reading data
bucket_name = "beam-core-outputs"
blob_name = prefix + "/ITERS/it.0/0.modeChoice.csv"
storage_client = storage.Client(project="BEAM CORE")
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(blob_name)

with blob.open("r") as f:
    total_transit = pd.read_csv(f)
    
number_of_trips = 0  # initialize a variable to store the number of trips

# loop through 24 bins and add up the number of trips for each mode of transit
for i in range(24):
    # bike transit trips
    number_of_trips += total_transit[total_transit['Modes'] == 'bike_transit']['Bin_' + str(i)].reset_index()['Bin_' + str(i)][0]
    # drive transit trips
    number_of_trips += total_transit[total_transit['Modes'] == 'drive_transit']['Bin_' + str(i)].reset_index()['Bin_' + str(i)][0]
    # ride hail transit trips
    number_of_trips += total_transit[total_transit['Modes'] == 'ride_hail_transit']['Bin_' + str(i)].reset_index()['Bin_' + str(i)][0]
    # walk transit trips
    number_of_trips += total_transit[total_transit['Modes'] == 'walk_transit']['Bin_' + str(i)].reset_index()['Bin_' + str(i)][0]

# calculate the number of denied transit trips and update the EPASummary dataframe
denied_transit_trips = number_of_trips / scalefactor / 1000000 - transit_trips
EPASummary.at[name, 'Denied Transit trips (million)'] = denied_transit_trips
EPASummary.at[name, 'Percentage of still choosing transit'] = 1.00

# extract bikeshare data and update the EPASummary dataframe
pathTraversal = df[df['type'] == 'PathTraversal']
bikePathTraversal = pathTraversal[pathTraversal['mode'] == 'bike']
sharedbike = bikePathTraversal[bikePathTraversal['vehicle'].str.contains('bay')]
EPASummary.at[name, 'Bikeshare Trips (000s)'] = len(sharedbike) / scalefactor / 1000    

# extract mode choice data and update the EPASummary dataframe with average distance traveled for each mode of transit
modeChoice = df[df['type'] == 'ModeChoice']
EPASummary.at[name, 'walk average distance'] = modeChoice[modeChoice['mode'] == 'walk']['length'].mean() * 0.000621371
EPASummary.at[name, 'car average distance'] = modeChoice[modeChoice['mode'] == 'car']['length'].mean() * 0.000621371
EPASummary.at[name, 'walk_transit average distance'] = modeChoice[modeChoice['mode'] == 'walk_transit']['length'].mean() * 0.000621371
EPASummary.at[name, 'drive_transit average distance'] = modeChoice[modeChoice['mode'] == 'drive_transit']['length'].mean() * 0.000621371
EPASummary.at[name, 'ride_hail_pooled average distance'] = modeChoice[modeChoice['mode'] == 'ride_hail_pooled']['length'].mean() * 0.000621371
EPASummary.at[name, 'ride_hail_transit average distance'] = modeChoice[modeChoice['mode'] == 'ride_hail_transit']['length'].mean() * 0.000621371
EPASummary.at[name, 'ride_hail average distance'] = modeChoice[modeChoice['mode'] == 'ride_hail']['length'].mean() * 0.000621371
                                                                                                
EPASummary.at[name, 'bike_transit average distance'] = modeChoice[modeChoice['mode'] == 'bike_transit']['length'].mean() * 0.000621371
bikeModeChoice = modeChoice[modeChoice['mode'] == 'bike_transit']
bikeshareToTransit = bikeModeChoice[bikeModeChoice['legVehicleIds'].str.contains('bay')]
EPASummary.at[name, 'Bikeshare-to-Transit Trips (000s)'] = len(bikeshareToTransit) / scalefactor / 1000    
EPASummary.at[name, 'Bikeshare VMT (000s)'] = sharedbike['length'].sum() / 1609.34 / scalefactor / 1000
nonsharedbike = bikePathTraversal[bikePathTraversal['vehicle'].str.contains('bay') == False]
EPASummary.at[name, 'Personal Bike Trips (000s)'] = len(nonsharedbike) / scalefactor / 1000
personalToTransit = bikeModeChoice[bikeModeChoice['legVehicleIds'].str.contains('bay') == False]
EPASummary.at[name, 'Personal Bike-to-Transit Trips (000s)'] = len(personalToTransit) / scalefactor / 1000
EPASummary.at[name, 'Personal Bike VMT(000s)'] = nonsharedbike['length'].sum() / 1609.34 / scalefactor / 1000
travel_time = (df[(df['type'] == 'PathTraversal') & (df['mode'] == 'car') & (df['departureTime'] != df['arrivalTime'])]['arrivalTime'] - df[(df['type'] == 'PathTraversal') & (df['mode'] == 'car') & (df['departureTime'] != df['arrivalTime'])]['departureTime']).mean() / 60
EPASummary.at[name, 'LDV Avg Travel Time (min)'] = travel_time
bikeData = pathTraversal.loc[pathTraversal['mode'] == 'bike']
walkData = pathTraversal.loc[pathTraversal['mode'] == 'walk']
walkData.reset_index(inplace=True)
bikeData.reset_index(inplace=True)
bikeData = bikeData[bikeData.vehicle.str.startswith('bay')]
bikeData.reset_index(inplace=True)
bikeData = bikeData.drop('level_0', axis=1)

walkArrivalTime = walkData['arrivalTime']
count = 0
length = 0
for i in range(len(bikeData)):
    if (bikeData['departureTime'][i] in walkArrivalTime.tolist()):
        walkRecord = walkData.loc[walkData['arrivalTime'] == bikeData['departureTime'][i]]
        walkRecord.reset_index(inplace=True)
        walkRecord = walkRecord.drop('level_0', axis=1)    
        for j in range(len(walkRecord)):    
            # probably we could filter out the distance equal to 0
            if (bikeData['driver'][i] == walkRecord['driver'][j]):
                count += 1
                length += walkRecord['length'][j] / 1609.34
            else:
                continue
if count == 0:
    avg_distance = 0
else:
    avg_distance = length / count
EPASummary.at[name, 'average walk distance to shared bikes(mile)'] = avg_distance

# reading data
bucket_name = "beam-core-outputs"
blob_name = prefix + "/ITERS/it.0/0.realizedMode.csv"
storage_client = storage.Client(project="BEAM CORE")
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(blob_name)

with blob.open("r") as f:
    realizedModeChoice = pd.read_csv(f)
    
# reading data
bucket_name = "beam-core-outputs"
blob_name = prefix + "/ITERS/it.0/0.modeChoice.csv"
storage_client = storage.Client(project="BEAM CORE")
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(blob_name)

with blob.open("r") as f:
    modeChoice = pd.read_csv(f)

Realized_Car_Trips = realizedModeChoice['car'].sum() / scalefactor
EPASummary.at[name, 'Realized Car Trips'] = Realized_Car_Trips
number_of_car_trips = 0
number_of_car_trips = 0
for i in range(30):
    number_of_car_trips += modeChoice[modeChoice['Modes'] == 'car']['Bin_' + str(i)].reset_index()['Bin_' + str(i)][0]    
number_of_car_trips = number_of_car_trips / scalefactor
Realized_RH_Trips = realizedModeChoice['ride_hail'].sum() +  realizedModeChoice['ride_hail_pooled'].sum()
Realized_RH_Trips = Realized_RH_Trips / scalefactor
EPASummary.at[name, 'Realized Ridehail Trips'] = Realized_RH_Trips
number_of_RH_trips = 0
for i in range(30):
    number_of_RH_trips += modeChoice[modeChoice['Modes'] == 'ride_hail']['Bin_' + str(i)].reset_index()['Bin_' + str(i)][0]
    number_of_RH_trips += modeChoice[modeChoice['Modes'] == 'ride_hail_pooled']['Bin_' + str(i)].reset_index()['Bin_' + str(i)][0]
number_of_RH_trips = number_of_RH_trips / scalefactor
Denied_RH_Trips = number_of_RH_trips - Realized_RH_Trips
EPASummary.at[name, 'Denied Ridehail Trips'] = Denied_RH_Trips
carData = {}
pathTraversal = df.loc[df.type == 'PathTraversal',:].dropna(how='all', axis=1)
pathTraversal['mode_extended'] = pathTraversal['mode']
pathTraversal['isRH'] = ((pathTraversal['driver'].str.contains('rideHail')== True))
pathTraversal.loc[pathTraversal['isRH'], 'mode_extended'] += '_RH'

pathTraversal.loc[pathTraversal.primaryFuelType == 'Gasoline', 'emission'] = pathTraversal['primaryFuel'] * 8.3141841e-9 * 8.89e-3
pathTraversal.loc[pathTraversal.primaryFuelType == 'Diesel', 'emission'] = pathTraversal['primaryFuel'] * 8.3141841e-9 * 10.180e-3
pathTraversal.loc[pathTraversal.primaryFuelType == 'Biod mnmsdxciesel', 'emission'] = pathTraversal['primaryFuel'] * 8.3141841e-9 * 10.180e-3
pathTraversal.loc[pathTraversal.primaryFuelType == 'Electricity', 'emission'] = pathTraversal['primaryFuel'] * 2.77778e-10 * 947.2 * 0.0005
pathTraversal.loc[pathTraversal.primaryFuelType == 'Food', 'emission'] = pathTraversal['primaryFuel'] * 8.3141841e-9 * 0

pathTraversal.loc[pathTraversal.secondaryFuelType == 'Gasoline', 'emission'] = pathTraversal['emission'] + pathTraversal['secondaryFuel'] * 8.3141841e-9 * 8.89e-3

byType = pathTraversal.groupby(['mode_extended']).agg({'emission':'sum'})
byType = byType
carData[name] = byType
# pd.concat(carData).emission.unstack().to_csv('out/emission0.3pop-higher-transit.csv')
EPASummary.at[name, 'CO2 Emissions. (1000s tons)'] = carData[name]['emission'].sum() / 1000

filePath = data_name

# reading data
bucket_name = "beam-core-outputs"
blob_name = prefix + "/realizedModeChoice.csv"
storage_client = storage.Client(project="BEAM CORE")
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(blob_name)

with blob.open("r") as f:
    realizedmode = pd.read_csv(f)


realizedmode['transit'] = realizedmode['bike_transit'] + realizedmode['drive_transit'] + realizedmode['ride_hail_transit'] + realizedmode['walk_transit']
# realizedmode['ride_hail2'] = realizedmode['ride_hail_pooled'] + realizedmode['ride_hail']
# modesplit = realizedmode[['bike', 'car', 'ride_hail2', 'walk', 'transit']]
modesplit = realizedmode
# modeSplitTable = modesplit.div(modesplit.sum(axis=1), axis=0)
modeSplitTable = modesplit / scalefactor

# EPASummary.at[name, 'bike'] = modeSplitTable['bike'][0]
# EPASummary.at[name, 'bike_transit'] = modeSplitTable['bike_transit'][0]
EPASummary.at[name, 'car'] = modeSplitTable['car'][0]
EPASummary.at[name, 'drive_transit'] = modeSplitTable['drive_transit'][0]
EPASummary.at[name, 'ride_hail'] = modeSplitTable['ride_hail'][0]
EPASummary.at[name, 'ride_hail_pooled'] = modeSplitTable['ride_hail_pooled'][0]
EPASummary.at[name, 'ride_hail_transit'] = modeSplitTable['ride_hail_transit'][0]
EPASummary.at[name, 'walk'] = modeSplitTable['walk'][0]
EPASummary.at[name, 'walk_transit'] = modeSplitTable['walk_transit'][0]


EPASummary.at[name, 'Bikeshare Trips'] = len(sharedbike) / scalefactor
EPASummary.at[name, 'Personalbike Trips'] = modeSplitTable['bike'][0] - len(sharedbike) / scalefactor
EPASummary.at[name, 'Bikeshare-to-Transit Trips'] = len(bikeshareToTransit) / scalefactor
EPASummary.at[name, 'Personalbike-to-Transit Trips'] = modeSplitTable['bike_transit'][0] - len(bikeshareToTransit) / scalefactor
EPASummary.at[name, 'transit'] = modeSplitTable['drive_transit'][0] + modeSplitTable['ride_hail_transit'][0] + modeSplitTable['walk_transit'][0] + modeSplitTable['bike_transit'][0]


del df
EPASummary.to_csv('EPA_summarytablez_0.3pop_scaling_factor.csv')

  df = pd.read_csv(f_in_text)
