In [1]:
import pandas as pd
pd.set_option("display.max_columns", 100)
import os
import geopandas as gpd
from shapely.geometry import Point, LineString
from pyproj import CRS
import numpy as np
import openpyxl
import datetime

start_time = datetime.datetime.now()

In [2]:
### Define working directory
dir_path = "E:/Projects/Clients/sandag/TO21_Recalibration/ExternalNodeProcessing/"

### Set working directory
os.chdir(dir_path)

## Data paths or locations

In [3]:
### Data from before SPA processing
place_path = dir_path + "SPA_before/"
place_files = ["place_day1.csv", 
               "place_day2.csv", 
               "place_day3.csv", 
               "place_day4.csv", 
               "place_day5.csv", 
               "place_day6.csv", 
               "place_day7.csv"]

In [4]:
### Existing geocoded trips file
trips_path = dir_path + "SPA_geocoded/"
trips_file = "trips_debug.csv"

### Existing geocoded tours file
tours_path = dir_path + "SPA_geocoded/"
tours_file = "tours_debug.csv"

In [5]:
### Shp file containing SANDAG TAZ layer
taz_shp_path = dir_path + "SANDAG_TAZ/"
taz_shp_file = "SANDAG_TAZ.shp"

### Shp file containing SANDAG MAZ layer
maz_shp_path = dir_path + "MGRA13/"
maz_shp_file = "MGRA13.shp"

### Shp file containing external stations
ext_shp_path = dir_path + "externalNodes/"
ext_shp_file = "externalNodes.shp"

In [6]:
### Persons file
per_path = dir_path + "SPA_geocoded/"
per_file = "per_debug.csv"

In [7]:
### Read pre-SPA place files
temp_df = {}
for i in range(len(place_files)):
    temp_df[i] = pd.read_csv(place_path+place_files[i])

  temp_df[i] = pd.read_csv(place_path+place_files[i])
  temp_df[i] = pd.read_csv(place_path+place_files[i])


In [8]:
### Merge 7 day place files 
place_columns = ['SAMPN', 'PERNO', 'PLANO', 'DAYNO', 'TAZ', 'MAZ']

place_df = temp_df[0][place_columns]
for i in range(1,len(place_files)):
    place_df = pd.concat([place_df, temp_df[i][place_columns]])

place_df['uid'] = place_df.SAMPN*10000+place_df.PERNO*1000+place_df.PLANO*10+place_df.DAYNO
    
print("Place File Shape:", place_df.shape)

Place File Shape: (232420, 7)


In [9]:
### Read trip and tour files
trips_df = pd.read_csv(trips_path+trips_file)
tours_df = pd.read_csv(tours_path+tours_file)

  tours_df = pd.read_csv(tours_path+tours_file)


In [10]:
### Read shp files
taz_gdf = gpd.read_file(taz_shp_path+taz_shp_file)
maz_gdf = gpd.read_file(maz_shp_path+maz_shp_file)
ext_gdf = gpd.read_file(ext_shp_path+ext_shp_file)

In [11]:
### Read persons file
per_df = pd.read_csv(per_path+per_file)

## Fix TAZ and MAZ columns for trip/tour ends and stops - Internal only

In [12]:
def add_taz_maz(df_orig, place_col, taz_maz_cols):
    df = df_orig.copy()
    df['uid'] = df['HH_ID']*10000+df['PER_ID']*1000+df[place_col]*10+df['DAYNO']
    
    df = pd.merge(df, place_df[['uid', 'TAZ', 'MAZ']], how="left", on=['uid'])
    
    df[taz_maz_cols] = df[['TAZ', 'MAZ']]
    df = df.drop(['uid', 'TAZ', 'MAZ'], axis=1)
    
    return df

In [13]:
### Adding trips origin and destination TAZs and MAZs
trips_df = add_taz_maz(trips_df, 'ORIG_PLACENO', ['OTAZ', 'OMAZ'])
trips_df = add_taz_maz(trips_df, 'DEST_PLACENO', ['DTAZ', 'DMAZ'])

### Adding tours origin and destination TAZs and MAZs
tours_df = add_taz_maz(tours_df, 'ORIG_PLACENO', ['OTAZ', 'OMAZ'])
tours_df = add_taz_maz(tours_df, 'DEST_PLACENO', ['DTAZ', 'DMAZ'])

### Adding tours outbound stop TAZs and MAZs
tours_df = add_taz_maz(tours_df, 'OSTOP_1_PLACENO', ['OSTOP_1_TAZ', 'OSTOP_1_MAZ'])
tours_df = add_taz_maz(tours_df, 'OSTOP_2_PLACENO', ['OSTOP_2_TAZ', 'OSTOP_2_MAZ'])
tours_df = add_taz_maz(tours_df, 'OSTOP_3_PLACENO', ['OSTOP_3_TAZ', 'OSTOP_3_MAZ'])
tours_df = add_taz_maz(tours_df, 'OSTOP_4_PLACENO', ['OSTOP_4_TAZ', 'OSTOP_4_MAZ'])

### Adding tours inbound stop TAZs and MAZs
tours_df = add_taz_maz(tours_df, 'ISTOP_1_PLACENO', ['ISTOP_1_TAZ', 'ISTOP_1_MAZ'])
tours_df = add_taz_maz(tours_df, 'ISTOP_2_PLACENO', ['ISTOP_2_TAZ', 'ISTOP_2_MAZ'])
tours_df = add_taz_maz(tours_df, 'ISTOP_3_PLACENO', ['ISTOP_3_TAZ', 'ISTOP_3_MAZ'])
tours_df = add_taz_maz(tours_df, 'ISTOP_4_PLACENO', ['ISTOP_4_TAZ', 'ISTOP_4_MAZ'])

## Processing MAZ layer for coast trips

In [14]:
### Extracting latitude and longitude of each TAZ centroid
gdf = maz_gdf.copy()
gdf = gdf.to_crs(4326)

gdf['x'] = None
gdf['y'] = None

gdf['x'] = gdf.geometry.centroid.apply(lambda x: x.x)
gdf['y'] = gdf.geometry.centroid.apply(lambda x: x.y)

# gdf.head()


  gdf['x'] = gdf.geometry.centroid.apply(lambda x: x.x)

  gdf['y'] = gdf.geometry.centroid.apply(lambda x: x.y)


In [15]:
# ### Run to save the centroids to a shp file
# gdf = gdf.drop('geometry', axis=1)
# gdf = gpd.GeoDataFrame(gdf, geometry=gpd.points_from_xy(gdf.x, gdf.y, crs="EPSG:4326"))
# gdf.to_file(maz_shp_path+"MGRA13_Centroids.shp")

In [16]:
### Attach latitude and longitude to TAZ layer 
maz_gdf[['longitude', 'latitude']] = gdf[['x', 'y']] 
maz_gdf = maz_gdf.rename({'MGRA': 'MAZ'}, axis=1)

### Removing TAZs far from coast
maz_gdf2 = maz_gdf[((maz_gdf.longitude<=-117.112640) & (maz_gdf.latitude<=32.690036))|
                   ((maz_gdf.longitude<=-117.239663) & (maz_gdf.latitude<=33.01492))|
                   ((maz_gdf.longitude<=-117.276375) & (maz_gdf.latitude<=33.138442))|
                   (maz_gdf.longitude<=-117.32254)]
# maz_gdf2

## Processing external staions shp for external trips

In [17]:
### Extracting latitude and longitude of each external station
gdf1 = ext_gdf.copy()
gdf1 = gdf1.to_crs(4326)

gdf1['x'] = None
gdf1['y'] = None

gdf1['x'] = gdf1.geometry.apply(lambda x: x.x)
gdf1['y'] = gdf1.geometry.apply(lambda x: x.y)

# gdf1.head()

In [18]:
### Attach latitude and longitude to TAZ layer 
ext_gdf[['longitude', 'latitude']] = gdf1[['x', 'y']] 
ext_gdf = ext_gdf.rename({'ID': 'TAZ'}, axis=1)
# ext_gdf

## Defining functions used to identify and assign nearest MAZ/TAZ

In [19]:
def get_dist(a,b,c,d):
    R = 3958.8

    a = np.repeat(a, len(c))
    b = np.repeat(b, len(c))
    
    lat1 = np.deg2rad(a)
    lon1 = np.deg2rad(b)
    lat2 = np.deg2rad(c)
    lon2 = np.deg2rad(d)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = (np.sin(dlat/2))**2 + np.cos(lat1) * np.cos(lat2) * (np.sin(dlon/2))**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    distance = R * c
    return distance

In [20]:
def closest_maz(pt_lat, pt_lon, lat_col, lon_col, gdf):
    gdf = gdf.copy()
    gdf['distance'] = get_dist(pt_lat, pt_lon, gdf[lat_col], gdf[lon_col])
    
    short_gdf = gdf[gdf['distance'] == gdf['distance'].min()]
    short_gdf = short_gdf.reset_index()
    
    return short_gdf.loc[0, 'MAZ']

In [21]:
def closest_taz(pt_lat, pt_lon, lat_col, lon_col, gdf):
    gdf = gdf.copy()
    gdf['distance'] = get_dist(pt_lat, pt_lon, gdf[lat_col], gdf[lon_col])
    
    short_gdf = gdf[gdf['distance'] == gdf['distance'].min()]
    short_gdf = short_gdf.reset_index()
    
    return short_gdf.loc[0, 'TAZ']

In [22]:
def assign_coast_maz(columns, df):
    df1 = df[columns]
    df1.columns = ['MAZ', 'XCORD', 'YCORD']
    
    df1 = df1[df1['MAZ'].isna()&
              (df1.XCORD>=-117.389)&
              (df1.XCORD<=-117.136)&
              (df1.YCORD>=32.579)&
              (df1.YCORD<=33.195)]

    df1['MAZ'] = df1.apply(lambda x: (closest_maz(pt_lat=x.YCORD, 
                                                  pt_lon=x.XCORD, 
                                                  lat_col='latitude', 
                                                  lon_col='longitude', 
                                                  gdf=maz_gdf2)), axis=1)
    
    df.loc[df1.index, columns[0]] = df1['MAZ']
    
    return df

In [23]:
def assign_ext_taz(columns, df):
    df1 = df[columns]
    if columns[1] in ['ORIG_X', 'DEST_X'] or 'STOP' in columns[0]:
        df1 = df1[df1[columns[1]].notna()]
    
    df1.columns = ['TAZ', 'XCORD', 'YCORD']
    
    df1 = df1[df1['TAZ'].isna()]

    df1['TAZ'] = df1.apply(lambda x: (closest_taz(pt_lat=x.YCORD, 
                                                  pt_lon=x.XCORD, 
                                                  lat_col='latitude', 
                                                  lon_col='longitude', 
                                                  gdf=ext_gdf)), axis=1)
    
    df.loc[df1.index, columns[0]] = df1['TAZ']
    
    return df

In [24]:
def update_taz_from_maz(df, maz_col, taz_col):
    df = pd.merge(df, maz_gdf[['MAZ', 'TAZ']], left_on=maz_col, right_on='MAZ', how='left')
    df[taz_col] = df['TAZ']
    return df.drop(['MAZ', 'TAZ'], axis=1)

## Processing Trips to update coast MAZs and TAZs

In [25]:
### Update MAZs for coast trip ends
trips_df1 = trips_df.copy()
trips_df1 = assign_coast_maz(columns=['OMAZ','O_XCORD', 'O_YCORD'], df=trips_df1)
trips_df1 = assign_coast_maz(columns=['DMAZ','D_XCORD', 'D_YCORD'], df=trips_df1)

### Update TAZs from assigned MAZs
trips_df1 = update_taz_from_maz(trips_df1, 'OMAZ', 'OTAZ')
trips_df1 = update_taz_from_maz(trips_df1, 'DMAZ', 'DTAZ')

In [26]:
print("Original Trips File Size:", trips_df.shape)
print("Modified Trips File Size:", trips_df1.shape)

Original Trips File Size: (141136, 127)
Modified Trips File Size: (141136, 127)


In [27]:
# trips_df1.to_csv(trips_path+"trips_debug_coast.csv", index=False)

## Processing Tours to update coast TAZs

In [28]:
### Update MAZs for coast tour ends
tours_df1 = tours_df.copy()
tours_df1 = assign_coast_maz(columns=['OMAZ','ORIG_X', 'ORIG_Y'], df=tours_df1)
tours_df1 = assign_coast_maz(columns=['DMAZ','DEST_X', 'DEST_Y'], df=tours_df1)

### Update TAZs from assigned MAZs
tours_df1 = update_taz_from_maz(tours_df1, 'OMAZ', 'OTAZ')
tours_df1 = update_taz_from_maz(tours_df1, 'DMAZ', 'DTAZ')

In [29]:
print("Original Tours File Size:", tours_df.shape)
print("Modified Tours File Size:", tours_df1.shape)

Original Tours File Size: (52883, 267)
Modified Tours File Size: (52883, 267)


In [30]:
### Update zones for outbound and inbound coast stops

for i in ['OSTOP', 'ISTOP']:
    for j in ['1', '2', '3', '4']:
        stop_lbl = i+'_'+j
        if stop_lbl == 'OSTOP_3': 
            continue ### Skipping Outbound Stop-3 because there are no coast stops here
                     ### if you run it in any case, it will cause error
        
        ### Update MAZs for coast stops
        tours_df1 = assign_coast_maz(columns=[stop_lbl+'_MAZ',stop_lbl+'_X', stop_lbl+'_Y'], df=tours_df1)

        ### Update TAZs from assigned MAZs
        tours_df1 = update_taz_from_maz(tours_df1, stop_lbl+'_MAZ', stop_lbl+'_TAZ')

In [31]:
print("Original Tours File Size:", tours_df.shape)
print("Modified Tours File Size:", tours_df1.shape)

Original Tours File Size: (52883, 267)
Modified Tours File Size: (52883, 267)


In [32]:
# tours_df1.to_csv(tours_path+"tours_debug_coast.csv", index=False)

## Processing Trips to update external TAZs

In [33]:
trips_df1 = assign_ext_taz(columns=['OTAZ','O_XCORD', 'O_YCORD'], df=trips_df1)
trips_df1 = assign_ext_taz(columns=['DTAZ','D_XCORD', 'D_YCORD'], df=trips_df1)

In [34]:
print("Original Trips File Size:", trips_df.shape)
print("Modified Trips File Size:", trips_df1.shape)

Original Trips File Size: (141136, 127)
Modified Trips File Size: (141136, 127)


In [35]:
# trips_df1.to_csv(trips_path+"trips_debug_external.csv", index=False)

## Processing Tours to update external TAZs

In [36]:
tours_df1 = assign_ext_taz(columns=['OTAZ','ORIG_X', 'ORIG_Y'], df=tours_df1)
tours_df1 = assign_ext_taz(columns=['DTAZ','DEST_X', 'DEST_Y'], df=tours_df1)

In [37]:
print("Original Tours File Size:", tours_df.shape)
print("Modified Tours File Size:", tours_df1.shape)

Original Tours File Size: (52883, 267)
Modified Tours File Size: (52883, 267)


In [38]:
### Update TAZs for outbound and inbound external stops

for i in ['OSTOP', 'ISTOP']:
    for j in ['1', '2', '3', '4']:
        stop_lbl = i+'_'+j
        
        ### Update TAZS for external stops
        tours_df1 = assign_ext_taz(columns=[stop_lbl+'_TAZ',stop_lbl+'_X', stop_lbl+'_Y'], df=tours_df1)

In [39]:
print("Original Tours File Size:", tours_df.shape)
print("Modified Tours File Size:", tours_df1.shape)

Original Tours File Size: (52883, 267)
Modified Tours File Size: (52883, 267)


In [40]:
# tours_df1.to_csv(tours_path+"tours_debug_external.csv", index=False)

## Flag external trip records

In [41]:
ext_tazs = [i for i in range(1, 13)]
ext_tazs

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

In [42]:
trips_df1['OTAZ_ext'] = 0
trips_df1.loc[trips_df1['OTAZ'].isin(ext_tazs), 'OTAZ_ext'] = 1

trips_df1['DTAZ_ext'] = 0
trips_df1.loc[trips_df1['DTAZ'].isin(ext_tazs), 'DTAZ_ext'] = 1

trips_df1['external'] = trips_df1['OTAZ_ext'] + trips_df1['DTAZ_ext']

In [43]:
trips_df1['OTAZ_ext'].value_counts()

0    135976
1      5160
Name: OTAZ_ext, dtype: int64

In [44]:
trips_df1['DTAZ_ext'].value_counts()

0    135884
1      5252
Name: DTAZ_ext, dtype: int64

In [45]:
trips_df1['external'].value_counts()

0    135163
2      4439
1      1534
Name: external, dtype: int64

## Code External-Type variable for each tour record 

In [46]:
### Coding external origins
tours_df1['orig_ext'] = 0
tours_df1.loc[tours_df1['OTAZ'].isin(ext_tazs), 'orig_ext'] = 1

### Coding external destinations
tours_df1['dest_ext'] = 0
tours_df1.loc[tours_df1['DTAZ'].isin(ext_tazs), 'dest_ext'] = 1

### Coding external outbound stops
tours_df1['outbound_ext'] = 0
tours_df1.loc[tours_df1['OSTOP_1_TAZ'].isin(ext_tazs)|
              tours_df1['OSTOP_2_TAZ'].isin(ext_tazs)|
              tours_df1['OSTOP_3_TAZ'].isin(ext_tazs)|
              tours_df1['OSTOP_4_TAZ'].isin(ext_tazs), 'outbound_ext'] = 1

### Coding external inbound stops
tours_df1['inbound_ext'] = 0
tours_df1.loc[tours_df1['ISTOP_1_TAZ'].isin(ext_tazs)|
              tours_df1['ISTOP_2_TAZ'].isin(ext_tazs)|
              tours_df1['ISTOP_3_TAZ'].isin(ext_tazs)|
              tours_df1['ISTOP_4_TAZ'].isin(ext_tazs), 'inbound_ext'] = 1

In [47]:
tours_df1['orig_ext'].value_counts()

0    52096
1      787
Name: orig_ext, dtype: int64

In [48]:
tours_df1['dest_ext'].value_counts()

0    51257
1     1626
Name: dest_ext, dtype: int64

In [49]:
tours_df1['outbound_ext'].value_counts()

0    52206
1      677
Name: outbound_ext, dtype: int64

In [50]:
tours_df1['inbound_ext'].value_counts()

0    52367
1      516
Name: inbound_ext, dtype: int64

In [51]:
## Three external type tours: II, II-Ext, EE
### II - Fully internal tour
### II-Ext - Starts and ends internally but has atleast one external stop
### EE - Fully external tour

tours_df1['external_type'] = np.nan
tours_df1.loc[(tours_df1['orig_ext']==0), 'external_type'] = 'II'
tours_df1.loc[(tours_df1['orig_ext']==0)&
              ((tours_df1['dest_ext']==1)|
              (tours_df1['outbound_ext']==1)|
              (tours_df1['inbound_ext']==1)), 'external_type'] = 'II-Ext'
tours_df1.loc[(tours_df1['orig_ext']==1), 'external_type'] = 'EE'


In [52]:
tours_df1['external_type'].value_counts()

II        51079
II-Ext     1017
EE          787
Name: external_type, dtype: int64

## Identify II-Ext Trips Traveling more than 360 miles

In [53]:
def get_dist2(a,b,c,d):
    R = 3958.8
    
    lat1 = np.deg2rad(a)
    lon1 = np.deg2rad(b)
    lat2 = np.deg2rad(c)
    lon2 = np.deg2rad(d)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = (np.sin(dlat/2))**2 + np.cos(lat1) * np.cos(lat2) * (np.sin(dlon/2))**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    distance = R * c
    return distance

In [54]:
def ie_distance(taz_col, dist_col, XCORD, YCORD, df_orig, gdf=ext_gdf):
#     df_orig = df_orig.copy()
    df1 = df_orig[df_orig.external_type=="II-Ext"]

    df1 = pd.merge(df1, gdf[['TAZ', 'longitude', 'latitude']], left_on=taz_col, right_on='TAZ', how='left')

    df1.index = df_orig[df_orig.external_type=="II-Ext"].index

    df1.loc[df1.TAZ<13, dist_col] = df1[df1.TAZ<13].apply(lambda x: (get_dist2(a=x[YCORD], 
                                                                               b=x[XCORD], 
                                                                               c=x.latitude, 
                                                                               d=x.longitude)), axis=1)

    df_orig.loc[df_orig.external_type=='II-Ext', dist_col] = df1[dist_col]

    return df_orig

In [55]:
tours_df1 = ie_distance('DTAZ', 'DEST_Dist', 'DEST_X', 'DEST_Y', df_orig=tours_df1.copy(), gdf=ext_gdf)

tours_df1 = ie_distance('OSTOP_1_TAZ', 'OSTOP_1_Dist', 'OSTOP_1_X', 'OSTOP_1_Y', df_orig=tours_df1.copy(), gdf=ext_gdf)
tours_df1 = ie_distance('OSTOP_2_TAZ', 'OSTOP_2_Dist', 'OSTOP_2_X', 'OSTOP_2_Y', df_orig=tours_df1.copy(), gdf=ext_gdf)
tours_df1 = ie_distance('OSTOP_3_TAZ', 'OSTOP_3_Dist', 'OSTOP_3_X', 'OSTOP_3_Y', df_orig=tours_df1.copy(), gdf=ext_gdf)
tours_df1 = ie_distance('OSTOP_4_TAZ', 'OSTOP_4_Dist', 'OSTOP_4_X', 'OSTOP_4_Y', df_orig=tours_df1.copy(), gdf=ext_gdf)

tours_df1 = ie_distance('ISTOP_1_TAZ', 'ISTOP_1_Dist', 'ISTOP_1_X', 'ISTOP_1_Y', df_orig=tours_df1.copy(), gdf=ext_gdf)
tours_df1 = ie_distance('ISTOP_2_TAZ', 'ISTOP_2_Dist', 'ISTOP_2_X', 'ISTOP_2_Y', df_orig=tours_df1.copy(), gdf=ext_gdf)
tours_df1 = ie_distance('ISTOP_3_TAZ', 'ISTOP_3_Dist', 'ISTOP_3_X', 'ISTOP_3_Y', df_orig=tours_df1.copy(), gdf=ext_gdf)
tours_df1 = ie_distance('ISTOP_4_TAZ', 'ISTOP_4_Dist', 'ISTOP_4_X', 'ISTOP_4_Y', df_orig=tours_df1.copy(), gdf=ext_gdf)

In [56]:
tours_df1['IE_open_jaw'] = 0
tours_df1.loc[tours_df1['DEST_Dist']>360, 'IE_open_jaw'] = 1
tours_df1.loc[tours_df1['OSTOP_1_Dist']>360, 'IE_open_jaw'] = 1
tours_df1.loc[tours_df1['OSTOP_2_Dist']>360, 'IE_open_jaw'] = 1
tours_df1.loc[tours_df1['OSTOP_3_Dist']>360, 'IE_open_jaw'] = 1
tours_df1.loc[tours_df1['OSTOP_4_Dist']>360, 'IE_open_jaw'] = 1
tours_df1.loc[tours_df1['ISTOP_1_Dist']>360, 'IE_open_jaw'] = 1
tours_df1.loc[tours_df1['ISTOP_2_Dist']>360, 'IE_open_jaw'] = 1
tours_df1.loc[tours_df1['ISTOP_3_Dist']>360, 'IE_open_jaw'] = 1
tours_df1.loc[tours_df1['ISTOP_4_Dist']>360, 'IE_open_jaw'] = 1

tours_df1.loc[tours_df1.IE_open_jaw==1, 'external_type'] = 'IE'

In [57]:
tours_df1['external_type'].value_counts()

II        51079
II-Ext      825
EE          787
IE          192
Name: external_type, dtype: int64

## II-Ext Tours whose Primary Destination is Internal

In [58]:
tours_df1['II-Ext_internal_dest'] = 0
tours_df1.loc[tours_df1.external_type=='II-Ext', 'II-Ext_internal_dest'] = 1
tours_df1.loc[(tours_df1.external_type=='II-Ext')&(tours_df1.DTAZ>12), 'II-Ext_internal_dest'] = 2

In [59]:
tours_df1['II-Ext_internal_dest'].value_counts()

0    52058
1      754
2       71
Name: II-Ext_internal_dest, dtype: int64

## Reset Primary Destination of II-Ext tours with internal destination

In [60]:
def most_frequent(List):
    counter = 0
    num = List[0]
     
    for i in List:
        curr_frequency = List.count(i)
        if(curr_frequency> counter)&(i != 0):
            counter = curr_frequency
            num = i
 
    return num

In [61]:
stop_taz_cols = ['DTAZ', 
                 'OSTOP_1_TAZ', 'OSTOP_2_TAZ', 'OSTOP_3_TAZ', 'OSTOP_4_TAZ', 
                 'ISTOP_1_TAZ', 'ISTOP_2_TAZ', 'ISTOP_3_TAZ', 'ISTOP_4_TAZ']
temp_df = tours_df1.loc[tours_df1['II-Ext_internal_dest']==2, stop_taz_cols[1:]]

for i in temp_df.columns:
    temp_df.loc[temp_df[i]>12, i] = 0
temp_df = temp_df.fillna(0)

temp_df['most_frequent'] = 0
for i in temp_df.index:
    List = temp_df[stop_taz_cols[1:]].loc[i].to_list()
    temp_df.loc[i, 'most_frequent'] = most_frequent(List)

# temp_df

In [62]:
tours_df1.loc[tours_df1['II-Ext_internal_dest']==2, 'DTAZ'] = temp_df['most_frequent']

## Remove External Stops in Outbound and Inbound Direction

In [63]:
### Processing outbound stop TAZs
### Remove external TAZs
df1 = tours_df1[(tours_df1['external_type']=='II-Ext')&(tours_df1['outbound_ext']==1)]

outstop_taz_cols = ['OSTOP_1_TAZ', 'OSTOP_2_TAZ', 'OSTOP_3_TAZ', 'OSTOP_4_TAZ']

for i in range(1,5):
    edit_stop_cols = ['OSTOP_'+str(j)+'_TAZ' for j in range(i,5)]

    df1.loc[df1['OSTOP_'+str(i)+'_TAZ']<=12, edit_stop_cols] = np.nan

# df1.loc[:, outstop_taz_cols]

In [64]:
### Merge the changes made above with tours dataframe
tours_df1.loc[df1.index, outstop_taz_cols] = df1.loc[:, outstop_taz_cols]

In [65]:
### Processing inbound stop TAZs
### Remove external TAZs and move internal stops in sequence 
df2 = tours_df1[(tours_df1['external_type']=='II-Ext')&(tours_df1['inbound_ext']==1)]

instop_taz_cols = ['ISTOP_1_TAZ', 'ISTOP_2_TAZ', 'ISTOP_3_TAZ', 'ISTOP_4_TAZ']

for i in range(4,0,-1):
    edit_stop_cols = ['ISTOP_'+str(j)+'_TAZ' for j in range(i,0,-1)]

    df2.loc[df2['ISTOP_'+str(i)+'_TAZ']<=12, edit_stop_cols] = np.nan

    if i!=4:
        df3 = df2[df2['ISTOP_'+str(i)+'_TAZ'].isna()&df2['ISTOP_'+str(i+1)+'_TAZ'].notna()].copy()

        if i==3:
            df3[['ISTOP_1_TAZ']] = df3[['ISTOP_4_TAZ']]
            df3[['ISTOP_4_TAZ']] = np.nan
        elif i==2:
            df3[['ISTOP_1_TAZ', 'ISTOP_2_TAZ']] = df3[['ISTOP_3_TAZ', 'ISTOP_4_TAZ']]
            df3[['ISTOP_3_TAZ', 'ISTOP_4_TAZ']] = np.nan
        elif i==1:
            df3[['ISTOP_1_TAZ', 'ISTOP_2_TAZ', 'ISTOP_3_TAZ']] = df3[['ISTOP_2_TAZ', 'ISTOP_3_TAZ', 'ISTOP_4_TAZ']]
            df3['ISTOP_4_TAZ'] = np.nan

        df2.loc[df3.index, instop_taz_cols] = df3.loc[:, instop_taz_cols]

# df2.loc[:, instop_taz_cols]

In [66]:
### Merge the changes made above with tours dataframe
tours_df1.loc[df2.index, instop_taz_cols] = df2.loc[:, instop_taz_cols]

In [67]:
### Coding new external outbound stops
tours_df1['outbound_ext2'] = 0
tours_df1.loc[tours_df1['OSTOP_1_TAZ'].isin(ext_tazs)|
              tours_df1['OSTOP_2_TAZ'].isin(ext_tazs)|
              tours_df1['OSTOP_3_TAZ'].isin(ext_tazs)|
              tours_df1['OSTOP_4_TAZ'].isin(ext_tazs), 'outbound_ext2'] = 1

### Coding new external inbound stops
tours_df1['inbound_ext2'] = 0
tours_df1.loc[tours_df1['ISTOP_1_TAZ'].isin(ext_tazs)|
              tours_df1['ISTOP_2_TAZ'].isin(ext_tazs)|
              tours_df1['ISTOP_3_TAZ'].isin(ext_tazs)|
              tours_df1['ISTOP_4_TAZ'].isin(ext_tazs), 'inbound_ext2'] = 1

In [68]:
### Re-counting outbound and inbound stops
tours_df1[['outbound_stops', 'inbound_stops']] = tours_df1[['OUTBOUND_STOPS', 'INBOUND_STOPS']]

tours_df1.loc[~(tours_df1['OSTOP_1_TAZ'].isna())&(tours_df1['external_type']=='II-Ext'), 'outbound_stops'] = 1
tours_df1.loc[~(tours_df1['OSTOP_2_TAZ'].isna())&(tours_df1['external_type']=='II-Ext'), 'outbound_stops'] = 2
tours_df1.loc[~(tours_df1['OSTOP_3_TAZ'].isna())&(tours_df1['external_type']=='II-Ext'), 'outbound_stops'] = 3
tours_df1.loc[~(tours_df1['OSTOP_4_TAZ'].isna())&(tours_df1['external_type']=='II-Ext'), 'outbound_stops'] = 4

tours_df1.loc[~(tours_df1['ISTOP_1_TAZ'].isna())&(tours_df1['external_type']=='II-Ext'), 'inbound_stops'] = 1
tours_df1.loc[~(tours_df1['ISTOP_2_TAZ'].isna())&(tours_df1['external_type']=='II-Ext'), 'inbound_stops'] = 2
tours_df1.loc[~(tours_df1['ISTOP_3_TAZ'].isna())&(tours_df1['external_type']=='II-Ext'), 'inbound_stops'] = 3
tours_df1.loc[~(tours_df1['ISTOP_4_TAZ'].isna())&(tours_df1['external_type']=='II-Ext'), 'inbound_stops'] = 4

## Add Expansion Factors from Persons File

In [69]:
trips_df1['personid'] = trips_df1['HH_ID']*100+trips_df1['PER_ID']
tours_df1['personid'] = tours_df1['HH_ID']*100+tours_df1['PER_ID']

trips_df1 = pd.merge(trips_df1, per_df[['personid', 'finalweight']], on='personid', how='left')
tours_df1 = pd.merge(tours_df1, per_df[['personid', 'finalweight']], on='personid', how='left')

## External Type and Purpose Tables

In [70]:
tour_purpose_name = {     
    0: 'Home',
    1: 'Work',
    2: 'University',
    3: 'School',
    4: 'Escort',
    5: 'Shopping',
    6: 'Maintenance',
    7: 'Eat Out',
    8: 'Social/Visit',
    9: 'Discretionary',
    10: 'Work-Related',
    11: 'Loop',
    12: 'Change Mode',
    13: 'Other'
}
tours_df1['tour_purpose'] = tours_df1['TOURPURP'].map(tour_purpose_name)

# tours_df1['tour_purpose'].value_counts().rename_axis('Purpose').reset_index(name='Tour Counts')

In [71]:
tours_exttype = pd.pivot_table(tours_df1, 
                               index='external_type', 
                               values='finalweight_y', 
                               aggfunc='count', 
                               margins=True, margins_name='Total').rename({'finalweight_y': 'Counts'}, axis=1)
tours_exttype

Unnamed: 0_level_0,Counts
external_type,Unnamed: 1_level_1
EE,787
IE,192
II,51079
II-Ext,825
Total,52883


In [72]:
tours_exttype_expd = pd.pivot_table(tours_df1, 
                                    index='external_type', 
                                    values='finalweight_y', 
                                    aggfunc=np.sum, 
                                    margins=True, 
                                    margins_name='Total').round().astype(int).rename({'finalweight_y': 'Weighted Counts'}, axis=1)
tours_exttype_expd

Unnamed: 0_level_0,Weighted Counts
external_type,Unnamed: 1_level_1
EE,112274
IE,25767
II,11231709
II-Ext,177411
Total,11547161


In [73]:
tours_purp_exttype = pd.crosstab(tours_df1['tour_purpose'].fillna('Missing'), 
                                 tours_df1['external_type'].fillna('Missing'), 
                                 margins=True, margins_name='Total').round()
tours_purp_exttype

external_type,EE,IE,II,II-Ext,Total
tour_purpose,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Change Mode,18,22,169,10,219
Discretionary,145,46,5511,176,5878
Eat Out,103,14,3373,49,3539
Escort,23,2,4058,34,4117
Loop,105,0,6163,8,6276
Maintenance,21,1,3689,49,3760
Missing,57,0,3303,10,3370
Other,79,40,1417,89,1625
School,0,0,3312,2,3314
Shopping,55,10,4425,42,4532


In [74]:
tours_purp_exttype_expd = pd.crosstab(tours_df1['tour_purpose'].fillna('Missing'), 
                                      tours_df1['external_type'].fillna('Missing'), 
                                      values=tours_df1['finalweight_y'], 
                                      aggfunc=np.sum, 
                                      margins = True, margins_name='Total').round().fillna(0).astype(int)
tours_purp_exttype_expd

external_type,EE,IE,II,II-Ext,Total
tour_purpose,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Change Mode,2465,3358,34926,497,41245
Discretionary,13728,2535,1100208,23623,1140095
Eat Out,15463,3517,681606,9910,710496
Escort,1814,355,1231656,12302,1246128
Loop,17763,0,1197468,479,1215709
Maintenance,2266,380,705096,11099,718841
Missing,5751,0,651850,2343,659945
Other,12341,4920,287679,19119,324060
School,0,0,1049044,85,1049129
Shopping,5455,1650,905148,8303,920556


## External Station and Purpose Tables

In [75]:
external_taz_dict={
1: "San Ysidro",
2: "Otay Mesa",
3: "East Otay Mesa",
4: "Tecate",
5: "Jacumba",
6: "I-8",
7: "SR78",
8: "SR79",
9: "Pala Road",
10: "I-15",
11: "Foothill Tollway",
12: "I-5"
}

tours_ext_df = tours_df1.loc[tours_df1['external_type']=='II-Ext'].copy()
tours_ext_df['external_station'] = tours_ext_df['DTAZ'].map(external_taz_dict)

In [76]:
tours_extstat = pd.pivot_table(tours_ext_df, 
                               index='external_station', 
                               values='finalweight_y', 
                               aggfunc='count', 
                               margins=True, margins_name='Total').rename({'finalweight_y': 'Counts'}, axis=1)
tours_extstat

Unnamed: 0_level_0,Counts
external_station,Unnamed: 1_level_1
East Otay Mesa,11
Foothill Tollway,472
I-15,115
I-5,19
I-8,17
Jacumba,1
Otay Mesa,12
Pala Road,34
SR78,29
SR79,70


In [77]:
tours_extstat_expd = pd.pivot_table(tours_ext_df, 
                                    index='external_station', 
                                    values='finalweight_y', 
                                    aggfunc=np.sum, 
                                    margins=True, 
                                    margins_name='Total').round().astype(int).rename({'finalweight_y': 'Weighted Counts'}, axis=1)
tours_extstat_expd

Unnamed: 0_level_0,Weighted Counts
external_station,Unnamed: 1_level_1
East Otay Mesa,7886
Foothill Tollway,94738
I-15,27796
I-5,5044
I-8,1704
Jacumba,65
Otay Mesa,759
Pala Road,12806
SR78,5334
SR79,9350


In [78]:
tours_extstat_purp = pd.crosstab(tours_ext_df['tour_purpose'].fillna('Missing'), 
                                 tours_ext_df['external_station'].fillna('Missing'), 
                                 margins=True, margins_name='Total').round()
tours_extstat_purp

external_station,East Otay Mesa,Foothill Tollway,I-15,I-5,I-8,Jacumba,Otay Mesa,Pala Road,SR78,SR79,San Ysidro,Tecate,Total
tour_purpose,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Change Mode,0,6,0,0,0,0,1,0,2,0,1,0,10
Discretionary,0,95,5,4,7,1,4,6,5,40,6,3,176
Eat Out,0,32,4,0,3,0,0,3,3,2,1,1,49
Escort,0,21,6,0,0,0,1,4,0,0,2,0,34
Loop,0,1,3,0,0,0,0,0,0,0,4,0,8
Maintenance,0,25,9,1,0,0,0,2,2,1,8,1,49
Missing,2,2,3,0,0,0,1,0,0,2,0,0,10
Other,0,39,13,3,1,0,1,6,12,11,3,0,89
School,0,0,2,0,0,0,0,0,0,0,0,0,2
Shopping,0,20,15,1,0,0,0,2,0,1,2,1,42


In [79]:
tours_extstat_purp_expd = pd.crosstab(tours_ext_df['tour_purpose'].fillna('Missing'), 
                                      tours_ext_df['external_station'].fillna('Missing'), 
                                      values=tours_ext_df['finalweight_y'], 
                                      aggfunc=np.sum, 
                                      margins=True, margins_name='Total').fillna(0).round().astype(int)
tours_extstat_purp_expd

external_station,East Otay Mesa,Foothill Tollway,I-15,I-5,I-8,Jacumba,Otay Mesa,Pala Road,SR78,SR79,San Ysidro,Tecate,Total
tour_purpose,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Change Mode,0,217,0,0,0,0,210,0,39,0,31,0,497
Discretionary,0,14287,1012,599,1006,65,144,1722,317,3954,323,195,23623
Eat Out,0,6075,1275,0,104,0,0,309,627,171,54,1295,9910
Escort,0,5355,1676,0,0,0,65,2878,0,0,2329,0,12302
Loop,0,65,196,0,0,0,0,0,0,0,217,0,479
Maintenance,0,5404,1909,980,0,0,0,130,39,65,1907,666,11099
Missing,387,100,1661,0,0,0,65,0,0,131,0,0,2343
Other,0,8419,2720,59,20,0,65,1759,2047,1657,2374,0,19119
School,0,0,85,0,0,0,0,0,0,0,0,0,85
Shopping,0,3297,2166,1277,0,0,0,117,0,20,1104,321,8303


## Save External Activity Summaries

In [80]:
### Opening Excel Workbook for summaries
writer = pd.ExcelWriter(tours_path+'external_activity_summaries.xlsx', engine = 'xlsxwriter')
workbook = writer.book

In [81]:
### Writing linehaul-by-access mode summary matrices
worksheet = workbook.add_worksheet('External Activity')
writer.sheets['External Activity'] = worksheet

worksheet.write_string(0, 0, 'External-Type Tour Counts')
start = 2
tours_exttype.to_excel(writer, sheet_name = 'External Activity', startrow = start, startcol = 0)

column = tours_exttype.shape[1] + 3
worksheet.write_string(0, column, 'External-Type Tour Counts - Weighted')
tours_exttype_expd.to_excel(writer, sheet_name = 'External Activity', startrow = start, startcol = column)

start += tours_exttype_expd.shape[0] + 5
worksheet.write_string(start-2, 0, 'External-Type vs Tour Purpose Counts')
tours_purp_exttype.to_excel(writer, sheet_name = 'External Activity', startrow = start, startcol = 0)

column = tours_purp_exttype.shape[1] + 3
worksheet.write_string(start-2, column, 'External-Type vs Tour Purpose Counts - Weighted')
tours_purp_exttype_expd.to_excel(writer, sheet_name = 'External Activity', startrow = start, startcol = column)

start += tours_purp_exttype_expd.shape[0] + 5
worksheet.write_string(start-2, 0, 'External Station Tour Counts')
tours_extstat.to_excel(writer, sheet_name = 'External Activity', startrow = start, startcol = 0)

column = tours_extstat.shape[1] + 3
worksheet.write_string(start-2, column, 'External Station Tour Counts - Weighted')
tours_extstat_expd.to_excel(writer, sheet_name = 'External Activity', startrow = start, startcol = column)

start += tours_extstat_expd.shape[0] + 5
worksheet.write_string(start-2, 0, 'External Station vs Tour Purpose Counts')
tours_extstat_purp.to_excel(writer, sheet_name = 'External Activity', startrow = start, startcol = 0)

column = tours_extstat_purp.shape[1] + 3
worksheet.write_string(start-2, column, 'External Station vs Tour Purpose Counts - Weighted')
tours_extstat_purp_expd.to_excel(writer, sheet_name = 'External Activity', startrow = start, startcol = column)

# writer.save()
writer.close()

## Save Trip and Tour Files

In [82]:
trips_df1.to_csv(trips_path+"trips_debug_external.csv", index=False)
tours_df1.to_csv(tours_path+"tours_debug_external.csv", index=False)

In [83]:
end_time = datetime.datetime.now()
print("Start Time:", start_time)
print("End Time:", end_time)
print("Run Time:", round(end_time.timestamp()-start_time.timestamp(), 3), "sec")

Start Time: 2022-09-07 16:02:12.427978
End Time: 2022-09-07 16:04:59.930280
Run Time: 167.502 sec
