In [1]:
import pandas as pd
import numpy as np
import copy

# Download Employment Center Data

In [2]:
ec_list = pd.read_csv('ec_list.csv')

# Trip Summary Data Excel

In [3]:
replica = pd.read_csv("replica-first_study-03_21_23-trips_dataset.csv")#, nrows=10000)
replica['dummy_val'] = 1

In [4]:
replica['trip_duration_minutes'] = replica['trip_duration_minutes'].replace(0,1)
replica['trip_duration'] = pd.cut(x=replica['trip_duration_minutes'], bins=[0,5,15,30,60,np.inf], labels=['<=5 mins', '>5-15 mins', '>15-30 mins','>30-60 mins','>60 mins'], ordered =True)

In [5]:
replica['trip_distance'] = pd.cut(x=replica['trip_distance_miles'], bins=[-1,1,2,5,10,25,np.inf], labels=['<=1 mile','>1-2 miles','>2-5 miles','>5-10 miles','>10-25 miles','>25 miles'], ordered =True)

In [6]:
input_dictionaries = {
    'trips_by_primary_mode': {'agg_column':'primary_mode', 'column_header': 'Mode'},
    'trips_by_purpose': {'agg_column': 'trip_purpose', 'column_header': 'Trip Purpose'},
    'trips_by_previous_trip': {'agg_column':'previous_trip_purpose', 'column_header': 'Prev Trip Purpose'},
    'trips_by_vehicle_type': {'agg_column': 'vehicle_type', 'column_header': 'Vehicle Type'},
    'trips_by_origin_land_use': {'agg_column': 'origin_land_use', 'column_header': 'Origin Land Use'}, 
    'trips_by_destination_land_use': {'agg_column': 'destination_land_use', 'column_header': 'Destination Land Use'},
    'trips_by_destination_builidng_use': {'agg_column': 'destination_building_use', 'column_header': 'Destination Building Use'},
    'trips_by_duration': {'agg_column': 'trip_duration', 'column_header': 'Trip Duration'},
    'trips_by_distance': {'agg_column': 'trip_distance', 'column_header': 'Trip Distance'},
    'trips_by_origin_building_use': {'agg_column': 'origin_building_use', 'column_header': 'Origin Building Use'}
}

In [7]:
def number_of_trips_by_manipulation(agregation_column):
    groupby_vals = ['destination_custom', agregation_column] 
    temp_df = replica[groupby_vals + ['dummy_val']]
    grouped = temp_df.groupby(groupby_vals).agg({'count'})
    grouped = grouped.reset_index()
    grouped.columns = ['employment_center', 'agg_column', 'count']
    return grouped

In [8]:
def pivot_table_output(df):
    output = pd.pivot_table(df, values='count', index=['employment_center'], columns=['agg_column'])
    output.columns.name = ''
    return output

In [9]:
def add_header(df, column_header):
    header = pd.MultiIndex.from_product([[column_header], df.columns])
    df.columns = header
    return df

In [10]:
def create_output(key):
    grouped_data = number_of_trips_by_manipulation(agregation_column=input_dictionaries[key]['agg_column'])
    
    pivoted_table = pivot_table_output(grouped_data)
    if key == 'trips_by_previous_trip':
        pivoted_table = pivoted_table.rename(columns={'\\N': 'unknown'})
        #pivoted_table = pivoted_table.drop('\\N', axis = 1)

    header_added = add_header(df=pivoted_table, column_header=input_dictionaries[key]['column_header'])

    return header_added

In [11]:
output_df = pd.DataFrame()

for input_key in input_dictionaries.keys():
    if output_df.empty:
        output_df = create_output(key=input_key)
        output_df['Total'] = output_df.sum(axis=1)
    else:
        output_df = output_df.merge(create_output(key=input_key), how='inner', left_index=True, right_index=True)
    print(f"{input_key}: {create_output(key=input_key).shape}")

# Merge output with the Employment Centers data 
output_df = output_df.merge(ec_list, how='left', left_index=True, right_on='EC_Name')

# Sort by EC ID values
output_df['EC_ID'] = output_df['EC_ID'].astype(int)
output_df = output_df.sort_values(['EC_ID']).reset_index(drop=True)
output_df = output_df.set_index(['EC_ID', 'EC_Name', 'Tier'])

# Turn back into a multiindexed columns
output_df.columns = pd.MultiIndex.from_tuples(output_df.columns)

# Move the "Total" column to the front of the dataframe
col = output_df.pop(('Total',''))
output_df.insert(0, ('Total',''), col)

trips_by_primary_mode: (102, 8)
trips_by_purpose: (102, 13)
trips_by_previous_trip: (102, 13)
trips_by_vehicle_type: (102, 4)
trips_by_origin_land_use: (102, 15)
trips_by_destination_land_use: (102, 15)
trips_by_destination_builidng_use: (102, 14)
trips_by_duration: (102, 5)
trips_by_distance: (102, 6)
trips_by_origin_building_use: (102, 14)


  return merge(


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Mode,Mode,Mode,Mode,Mode,Mode,Mode,Mode,Total,Trip Purpose,...,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,biking,carpool,commercial,on_demand_auto,other_travel_mode,private_auto,public_transit,walking,Unnamed: 11_level_1,commercial,...,industrial,multi_family,non_retail_attraction,office,open_space,other,retail,single_family,transportation_utilities,unknown
EC_ID,EC_Name,Tier,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
1,San Diego Airport,3,426.0,34410.0,4567.0,4583.0,25987.0,86488.0,2606.0,16581.0,175648.0,4661.0,...,1957.0,14106.0,6142.0,9891.0,2035.0,69.0,72347.0,39118.0,11417.0,5192.0
2,Alpine,4,75.0,4283.0,1510.0,316.0,288.0,14130.0,62.0,2427.0,23091.0,1523.0,...,322.0,1099.0,644.0,2016.0,167.0,45.0,6662.0,8025.0,224.0,1539.0
3,Barrio Logan,4,153.0,5949.0,1846.0,1129.0,609.0,25423.0,3580.0,5840.0,44529.0,1909.0,...,1455.0,5043.0,1176.0,2731.0,383.0,26.0,14261.0,12268.0,653.0,1926.0
4,Carlsbad Palomar Airport,2,502.0,30703.0,4802.0,524.0,1131.0,46240.0,350.0,8234.0,92486.0,4854.0,...,4264.0,8576.0,3449.0,6692.0,580.0,39.0,19495.0,38933.0,1096.0,5022.0
5,Carlsbad State Beach,3,137.0,21568.0,1749.0,659.0,1562.0,28484.0,335.0,4873.0,59367.0,1858.0,...,1281.0,4613.0,3686.0,5173.0,620.0,31.0,19003.0,18768.0,964.0,2036.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,Jamul Casino,6,4.0,772.0,293.0,78.0,95.0,3449.0,29.0,324.0,5044.0,300.0,...,53.0,280.0,227.0,378.0,37.0,20.0,1467.0,1729.0,53.0,301.0
105,Pala Casino Spa Resort,6,24.0,2713.0,130.0,55.0,429.0,2837.0,18.0,770.0,6976.0,132.0,...,94.0,379.0,293.0,417.0,54.0,140.0,2444.0,2205.0,224.0,193.0
106,Sycuan Casino Resort,6,5.0,1586.0,123.0,325.0,269.0,4269.0,,818.0,7395.0,123.0,...,59.0,612.0,362.0,473.0,159.0,40.0,2590.0,2336.0,91.0,143.0
107,Valley View Casino & Hotel,6,2.0,1160.0,442.0,71.0,134.0,1798.0,14.0,209.0,3830.0,485.0,...,43.0,166.0,161.0,201.0,28.0,17.0,1010.0,1419.0,46.0,490.0


In [23]:
# Move the "Total" column to the front of the dataframe
col = output_df.pop(('Total',''))
output_df.insert(0, ('Total',''), col)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Total,Mode,Mode,Mode,Mode,Mode,Mode,Mode,Mode,Trip Purpose,...,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,biking,carpool,commercial,on_demand_auto,other_travel_mode,private_auto,public_transit,walking,commercial,...,industrial,multi_family,non_retail_attraction,office,open_space,other,retail,single_family,transportation_utilities,unknown
EC_ID,EC_Name,Tier,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
1,San Diego Airport,3,175648.0,426.0,34410.0,4567.0,4583.0,25987.0,86488.0,2606.0,16581.0,4661.0,...,1957.0,14106.0,6142.0,9891.0,2035.0,69.0,72347.0,39118.0,11417.0,5192.0
2,Alpine,4,23091.0,75.0,4283.0,1510.0,316.0,288.0,14130.0,62.0,2427.0,1523.0,...,322.0,1099.0,644.0,2016.0,167.0,45.0,6662.0,8025.0,224.0,1539.0
3,Barrio Logan,4,44529.0,153.0,5949.0,1846.0,1129.0,609.0,25423.0,3580.0,5840.0,1909.0,...,1455.0,5043.0,1176.0,2731.0,383.0,26.0,14261.0,12268.0,653.0,1926.0
4,Carlsbad Palomar Airport,2,92486.0,502.0,30703.0,4802.0,524.0,1131.0,46240.0,350.0,8234.0,4854.0,...,4264.0,8576.0,3449.0,6692.0,580.0,39.0,19495.0,38933.0,1096.0,5022.0
5,Carlsbad State Beach,3,59367.0,137.0,21568.0,1749.0,659.0,1562.0,28484.0,335.0,4873.0,1858.0,...,1281.0,4613.0,3686.0,5173.0,620.0,31.0,19003.0,18768.0,964.0,2036.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,Jamul Casino,6,5044.0,4.0,772.0,293.0,78.0,95.0,3449.0,29.0,324.0,300.0,...,53.0,280.0,227.0,378.0,37.0,20.0,1467.0,1729.0,53.0,301.0
105,Pala Casino Spa Resort,6,6976.0,24.0,2713.0,130.0,55.0,429.0,2837.0,18.0,770.0,132.0,...,94.0,379.0,293.0,417.0,54.0,140.0,2444.0,2205.0,224.0,193.0
106,Sycuan Casino Resort,6,7395.0,5.0,1586.0,123.0,325.0,269.0,4269.0,,818.0,123.0,...,59.0,612.0,362.0,473.0,159.0,40.0,2590.0,2336.0,91.0,143.0
107,Valley View Casino & Hotel,6,3830.0,2.0,1160.0,442.0,71.0,134.0,1798.0,14.0,209.0,485.0,...,43.0,166.0,161.0,201.0,28.0,17.0,1010.0,1419.0,46.0,490.0


In [14]:
output_df.to_csv('trip_summary_data_integers.csv')

In [None]:
output_df.head()

# Percent Output

In [26]:
percent_df = copy.deepcopy(output_df)

# define the column to divide by
col_to_divide_by = ('Total','')

# divide all columns by the column defined above
df_result = percent_df.div(percent_df[col_to_divide_by], axis=0)*100
df_result = df_result.round(1)

# update the original DataFrame by replacing all columns except the original one with the divided values
percent_df[percent_df.columns.difference([col_to_divide_by])] = df_result[df_result.columns.difference([col_to_divide_by])]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Total,Mode,Mode,Mode,Mode,Mode,Mode,Mode,Mode,Trip Purpose,...,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use,Origin Building Use
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,biking,carpool,commercial,on_demand_auto,other_travel_mode,private_auto,public_transit,walking,commercial,...,industrial,multi_family,non_retail_attraction,office,open_space,other,retail,single_family,transportation_utilities,unknown
EC_ID,EC_Name,Tier,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
1,San Diego Airport,3,175648.0,0.2,19.6,2.6,2.6,14.8,49.2,1.5,9.4,2.7,...,1.1,8.0,3.5,5.6,1.2,0.0,41.2,22.3,6.5,3.0
2,Alpine,4,23091.0,0.3,18.5,6.5,1.4,1.2,61.2,0.3,10.5,6.6,...,1.4,4.8,2.8,8.7,0.7,0.2,28.9,34.8,1.0,6.7
3,Barrio Logan,4,44529.0,0.3,13.4,4.1,2.5,1.4,57.1,8.0,13.1,4.3,...,3.3,11.3,2.6,6.1,0.9,0.1,32.0,27.6,1.5,4.3
4,Carlsbad Palomar Airport,2,92486.0,0.5,33.2,5.2,0.6,1.2,50.0,0.4,8.9,5.2,...,4.6,9.3,3.7,7.2,0.6,0.0,21.1,42.1,1.2,5.4
5,Carlsbad State Beach,3,59367.0,0.2,36.3,2.9,1.1,2.6,48.0,0.6,8.2,3.1,...,2.2,7.8,6.2,8.7,1.0,0.1,32.0,31.6,1.6,3.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,Jamul Casino,6,5044.0,0.1,15.3,5.8,1.5,1.9,68.4,0.6,6.4,5.9,...,1.1,5.6,4.5,7.5,0.7,0.4,29.1,34.3,1.1,6.0
105,Pala Casino Spa Resort,6,6976.0,0.3,38.9,1.9,0.8,6.1,40.7,0.3,11.0,1.9,...,1.3,5.4,4.2,6.0,0.8,2.0,35.0,31.6,3.2,2.8
106,Sycuan Casino Resort,6,7395.0,0.1,21.4,1.7,4.4,3.6,57.7,,11.1,1.7,...,0.8,8.3,4.9,6.4,2.2,0.5,35.0,31.6,1.2,1.9
107,Valley View Casino & Hotel,6,3830.0,0.1,30.3,11.5,1.9,3.5,46.9,0.4,5.5,12.7,...,1.1,4.3,4.2,5.2,0.7,0.4,26.4,37.0,1.2,12.8


In [None]:
percent_df.to_csv('trip_summary_data_percents.csv')

In [None]:
percent_df.head()

# Employment Centers Trips by Block Group CSV

In [12]:
replica_2 = pd.read_csv("replica-num_trips_blockgrps_to_ecs_fall_19_thusrday-03_21_23-trips_dataset.csv", nrows=10000)

In [13]:
trips_bgrp_to_EC = replica_2.groupby(['origin_bgrp','destination_custom']).agg({'count'}).reset_index()[['origin_bgrp','destination_custom','origin_cty']].rename(columns={'origin_cty':'trips'})

In [14]:
subset = trips_bgrp_to_EC[trips_bgrp_to_EC['origin_bgrp'].str.contains('San Diego')]
subset.columns = ['origin_bgrp', 'destination_custom', 'trip_count']
output = subset.merge(ec_list, how='left', left_on='destination_custom', right_on='EC_Name')
output['Tract'] = output['origin_bgrp'].str.extract(r'Tract\s(\d+(?:\.\d+)?)')[0]
output['BG'] = output['origin_bgrp'].str.extract(r'(\d+)')
output = output[['EC_ID', 'destination_custom', 'Tract', 'BG', 'trip_count']]
output['Tract'] = output['Tract'].astype(float)
output['BG'] = output['BG'].astype(int)
output['EC_ID'] = output['EC_ID'].astype(int)
output = output.sort_values(['EC_ID', 'Tract']).reset_index(drop=True)
output.columns = ['EC ID', 'Emp Ctr', 'Tract', 'BG', 'Trips']

Unnamed: 0,EC ID,Emp Ctr,Tract,BG,Trips
0,1,San Diego Airport,191.07,1,3
1,1,San Diego Airport,208.01,2,1
2,3,Barrio Logan,191.07,1,1
3,4,Carlsbad Palomar Airport,171.09,2,1
4,4,Carlsbad Palomar Airport,178.11,2,1
...,...,...,...,...,...
1019,107,Valley View Casino & Hotel,191.06,3,18
1020,107,Valley View Casino & Hotel,191.07,1,20
1021,107,Valley View Casino & Hotel,201.03,1,3
1022,107,Valley View Casino & Hotel,207.09,1,1


In [None]:
output.to_csv('EC_Trips_by_BG.csv')

In [None]:
output.head()