# Load trip data and shapefiles

In [None]:
%run spatial_analysis_load.ipynb

# Group trips

#### Create groupby objects

In [None]:
fips_grouped = dfnow.groupby('fips')

In [None]:
fips_sov_grouped = dfnow[dfnow['is_hov']==0].groupby('fips')
fips_hov_grouped = dfnow[dfnow['is_hov']==1].groupby('fips')

#### Compute groups (toll intervals, time of use, user frequency, length of HOT trip)

<i>For grouping trips by toll</i>

In [None]:
def toll_group_fxn(row):
    if row['toll']<=1:
        val = "0-1"
    elif (row['toll']>1) & (row['toll']<=2):
        val = ">1-2"
    elif (row['toll']>2) & (row['toll']<=4):
        val = ">2-4"      
    elif (row['toll']>4) & (row['toll']<=6):
        val = ">4-6"
    elif (row['toll']>6) & (row['toll']<=8):
        val = ">6-8"
    elif (row['toll']>8) & (row['toll']<=10):
        val = ">8-10"
    else:
        val = ">10?"
    return val

dfnow['toll_group'] = dfnow.apply(toll_group_fxn, axis=1)

<i>For grouping trips by week day, month, time of day</i>

In [None]:
dfnow['entry_time'] = pd.to_datetime(dfnow['entry_time'],unit='s')
#dfnow['entry_day'] = dfnow['entry_time'].apply(lambda t: t.weekday())
#dfnow['entry_mon'] = dfnow['entry_time'].apply(lambda t: t.month)
dfnow['entry_hr'] = dfnow['entry_time'].apply(lambda t: t.hour)

In [None]:
# morning peak = 5-10am, evening peak = 3-8pm
def peak_group_fxn(row):
    if (row['entry_hr']>=5) & (row['entry_hr']<=9) & (row['entry_plaza']<13):
        val = "NB_m_pk"
    elif (row['entry_hr']>=5) & (row['entry_hr']<=9) & (row['entry_plaza']>=13):
        val = "SB_m_pk"
    elif (row['entry_hr']>=15) & (row['entry_hr']<=19) & (row['entry_plaza']<13):
        val = "NB_e_pk"
    elif (row['entry_hr']>=15) & (row['entry_hr']<=19) & (row['entry_plaza']>=13):
        val = "SB_e_pk"
    else:
        val = "offpk"
    return val

dfnow['peak_group'] = dfnow.apply(peak_group_fxn, axis=1)

<i>For grouping trips by user frequency</i>

In [None]:
fips_id_lookup = dfnow.drop_duplicates('id', keep='first')[['id','fips']]
#fips_id_lookup = pd.read_sql_query("select fips, id from census;", db)
#print(len(fips_id_lookup))
#fips_id_lookup.head()

In [None]:
num_trips_by_id = dfnow.agg({'id':'value_counts'}) # counts how many times each value appears in the column = # of trips per id
num_trips_by_id.rename(columns={'id': 'num_trips_by_id'}, inplace=True)
num_trips_by_id['id'] = num_trips_by_id.index

In [None]:
num_trips_by_id_fips = pd.merge(num_trips_by_id, fips_id_lookup, how='left', on='id')

In [None]:
def freq_group_fxn(row):
    if row['num_trips_by_id']==1:
        val = "1"
    elif (row['num_trips_by_id']>=2) & (row['num_trips_by_id']<=20):
        val = "2-20"
    elif (row['num_trips_by_id']>20) & (row['num_trips_by_id']<=70):
        val = "21-70"      
    elif (row['num_trips_by_id']>70) & (row['num_trips_by_id']<=200):
        val = "71-200"
    elif (row['num_trips_by_id']>200) & (row['num_trips_by_id']<=400):
        val = "201-400"
    elif (row['num_trips_by_id']>400):
        val = ">401"
    else:
        val = "0?"
    return val

In [None]:
num_trips_by_id_fips['freq_group'] = num_trips_by_id_fips.apply(freq_group_fxn, axis=1)

In [None]:
# - check how many id's have an associated fips
all_trips = num_trips_by_id_fips[(num_trips_by_id_fips['num_trips_by_id']>0) 
                             & (num_trips_by_id_fips['num_trips_by_id']<300)]['num_trips_by_id']
all_trips_wofips = num_trips_by_id_fips[(~num_trips_by_id_fips['fips'].isna()) 
              & (num_trips_by_id_fips['num_trips_by_id']>0) 
              & (num_trips_by_id_fips['num_trips_by_id']<300)]['num_trips_by_id']
all_trips_wfips = num_trips_by_id_fips[(num_trips_by_id_fips['fips'].isna()) 
              & (num_trips_by_id_fips['num_trips_by_id']>0)
              & (num_trips_by_id_fips['num_trips_by_id']<300)]['num_trips_by_id']

fig,axs=plt.subplots(1,3, figsize=(15,3))
axs[0].hist(all_trips); axs[1].hist(all_trips_wofips); axs[2].hist(all_trips_wfips);

In [None]:
print('# of trips w/o fips: ' + str(dfnow['fips'].isna().sum()))
print('total # of trips: ' + str(len(dfnow)))
print('frac of trips w/o fips: ' + str(dfnow['fips'].isna().sum()/len(dfnow)))

In [None]:
print('# of ids w/o fips: ' + str(num_trips_by_id_fips['fips'].isna().sum()))
print('total # of ids: ' + str(len(num_trips_by_id_fips)))
print('frac of ids w/o fips: ' + str(num_trips_by_id_fips['fips'].isna().sum()/len(num_trips_by_id_fips)))

In [None]:
num_trips_by_id_fips.head()

<i>For grouping trips by HOT lane trip length</i>

In [None]:
def dist_group_fxn(row):
    if row['Dist btwn entry & exit loop']<=4:
        val = "0-4"
    elif (row['Dist btwn entry & exit loop']>4) & (row['Dist btwn entry & exit loop']<=6):
        val = ">4-6"
    elif (row['Dist btwn entry & exit loop']>6) & (row['Dist btwn entry & exit loop']<=8):
        val = ">6-8"      
    elif (row['Dist btwn entry & exit loop']>8) & (row['Dist btwn entry & exit loop']<=10):
        val = ">8-10"  
    elif (row['Dist btwn entry & exit loop']>10) & (row['Dist btwn entry & exit loop']<=12):
        val = ">10-12"  
    elif (row['Dist btwn entry & exit loop']>12):
        val = ">12"  
    else:
        val = "?"
    return val

dfnow['dist_group'] = dfnow.apply(dist_group_fxn, axis=1)

<i>For grouping trips by income</i>

In [None]:
cbgs_joinnow = cbgs_acs.merge(fips_sov_grouped.agg({'toll': 'mean'}), on='fips', how='left') # avg toll over sov per bg
print('# of bgs: ' + str(len(cbgs_joinnow)))
print('# of null tolls: ' + str(len(cbgs_joinnow[cbgs_joinnow['toll'].isnull()])))
print('# of null tolls or med incs: ' + str(len(cbgs_joinnow[cbgs_joinnow['med_inc'].isnull() | cbgs_joinnow['toll'].isnull()])))

In [None]:
filternow = ~(cbgs_joinnow['med_inc'].isnull() | cbgs_joinnow['toll'].isnull())
cbgs_joinnow = cbgs_joinnow[filternow]

In [None]:
dfnow = dfnow.merge(cbgs_joinnow[['med_inc','fips']], on='fips', how='left')

In [None]:
# - Compare income distribs from all acs, cbgs join (each fips wt = 1), all trips bgs (each fips wt = # of trips from that fips) 
print('all acs bgs med inc: ' + str(cbgs_acs['med_inc'].median()))
print('cbgs join bgs med inc: ' + str(cbgs_joinnow['med_inc'].median()))
print('all trips bgs med inc: ' + str(dfnow['med_inc'].median()))

all_acs_bgs_med_inc = cbgs_acs['med_inc']
cbgs_join_bgs_med_inc = cbgs_joinnow['med_inc']
all_trips_bgs_med_inc = dfnow['med_inc'] 

fig,axs=plt.subplots(1,3, figsize=(15,3))
axs[0].hist(all_acs_bgs_med_inc, bins=30); axs[1].hist(cbgs_join_bgs_med_inc, bins=30); axs[2].hist(all_trips_bgs_med_inc, bins=30);

In [None]:
def histedges_equalN(x, nbin):
    npt = len(x)
    return np.interp(np.linspace(0, npt, nbin + 1),
                     np.arange(npt),
                     np.sort(x))

In [None]:
print('all acs bgs inc bin edges: ' + str(histedges_equalN(cbgs_acs['med_inc'],3)))
print('cbgs join bgs inc bin edges: ' + str(histedges_equalN(cbgs_joinnow['med_inc'],3)))
print('all trips bgs inc bin edges: ' + str(histedges_equalN(dfnow['med_inc'],3)))

In [None]:
def inc3_group_fxn(row):
    if row['med_inc']<=65000:
        val = "low"
    elif (row['med_inc']>65000) & (row['med_inc']<=95000):
        val = "med"
    elif (row['med_inc']>95000):
        val = "high"
    else:
        val = "?"
    return val
#dfnow['inc3_group'] = dfnow.apply(inc3_group_fxn, axis=1)

In [None]:
cbgs_joinnow['inc3_group'] = cbgs_joinnow.apply(inc3_group_fxn, axis=1)
dfnow = dfnow.merge(cbgs_joinnow[['inc3_group','fips']], on='fips', how='left')

# Plot relationships

## Income x avg fare paid x forward/reverse commute

In [None]:
fips_peak_grouped = dfnow.groupby(['fips','peak_group'])
#peak_grouped = dfnow.groupby(['peak_group'])
#fips_peak_grouped.count()

In [None]:
# - Define reverse dir trips
colnow1 = fips_peak_grouped.agg({'trip_id': 'nunique'}).xs('NB_m_pk', level='peak_group')
colnow1.rename(columns={'trip_id': 'num_NB_m_pk_trips'},inplace=True)
colnow2 = fips_peak_grouped.agg({'trip_id': 'nunique'}).xs('SB_e_pk', level='peak_group')
colnow2.rename(columns={'trip_id': 'num_SB_e_pk_trips'},inplace=True)
colsnow = colnow1.merge(colnow2, on='fips')

In [None]:
# - Define forward dir trips
colnow1 = fips_peak_grouped.agg({'trip_id': 'nunique'}).xs('NB_e_pk', level='peak_group')
colnow1.rename(columns={'trip_id': 'num_NB_e_pk_trips'},inplace=True)
colnow2 = fips_peak_grouped.agg({'trip_id': 'nunique'}).xs('SB_m_pk', level='peak_group')
colnow2.rename(columns={'trip_id': 'num_SB_m_pk_trips'},inplace=True)

In [None]:
# - Combine all defns together + calc tot trips
colsnow = colsnow.merge(
    colnow1, on='fips').merge(
    colnow2, on='fips').merge(
    fips_grouped.agg({'trip_id': 'nunique'}), on='fips')
colsnow.rename(columns={'trip_id': 'tot_trips'}, inplace=True)

In [None]:
# - Calc tot trips/household and frac forward/reverse dir trips for each bg
colsnow = colsnow.merge(acs[['households','fips']], on='fips', how='left')
colsnow['tot_trips_ph'] = colsnow['tot_trips']/colsnow['households']
# fd = forward commute direction
colsnow['frac_fd_trips'] = (colsnow['num_NB_e_pk_trips']+colsnow['num_SB_m_pk_trips']
                          )/colsnow['tot_trips']
# rd = reverse commute direction
colsnow['frac_rd_trips'] = (colsnow['num_NB_m_pk_trips']+colsnow['num_SB_e_pk_trips']
                          )/colsnow['tot_trips']

In [None]:
# - Look at the distrib of frac forward/reverse dir trips among all bgs
fig,axes=plt.subplots(1,2,figsize=(7,3))
axes[0].hist(colsnow['frac_fd_trips']); axes[0].set_title('frac_fd_trips');
axes[1].hist(colsnow['frac_rd_trips']); axes[1].set_title('frac_rd_trips');

In [None]:
# - Look at the distrib of tot trips among all bgs
fig,axes=plt.subplots(1,2,figsize=(7,3))
axes[0].hist(colsnow[colsnow['tot_trips']>100]['tot_trips'], bins=30); axes[0].set_title('tot_trips, >100');
axes[1].hist(colsnow[colsnow['tot_trips']>1000]['tot_trips'], bins=30); axes[1].set_title('tot_trips, >1000');

In [None]:
# - Join frac_rd_trips, frac_fd_trips to cbgs + calc centroids
cbgs_dfnow = cbgs.merge(colsnow, on='fips')
#cbgs_dfnow.dropna(subset = ['geometry'], inplace=True)
#cbgs_dfnow['geometry'].geom_type.unique()
cbgs_dfnow['lon'] = cbgs_dfnow.centroid.x
cbgs_dfnow['lat'] = cbgs_dfnow.centroid.y

In [None]:
# - Plot maps of frac forward/reverse dir trips by bgs + above/below E-W line --> defining reverse/forward commute block groups
fig,axes = plt.subplots(2, 2, figsize = (16,20))

vmin=0; vmax=1

axes[0][0].set_ylim([47.25, 48.22]); axes[0][0].set_xlim([-122.45, -121.85]); axes[0][0].set_title('Frac forward commute direction trips')
cbgs_dfnow.plot(column = 'frac_fd_trips', ax = axes[0][0], legend = True, vmin = vmin, vmax = vmax)
majlks.plot(ax=axes[0][0], color='paleturquoise', linewidth=3); majrds.plot(ax=axes[0][0], color='gray', linewidth=3)
majtpsnb.plot(ax=axes[0][0], color='magenta', markersize=100); majcps.plot(ax=axes[0][0], color='white')
for x, y, label in zip(majcps.geometry.x, majcps.geometry.y, majcps.NAME):
    axes[0][0].annotate(label, xy=(x, y), xytext=(3, 3), textcoords='offset points', color='white')

axes[0][1].set_ylim([47.25, 48.22]); axes[0][1].set_xlim([-122.45, -121.85]); axes[0][1].set_title('Frac reverse commute direction trips')
cbgs_dfnow.plot(column = 'frac_rd_trips', ax = axes[0][1], legend = True, vmin = vmin, vmax = vmax)
majlks.plot(ax=axes[0][1], color='paleturquoise', linewidth=3); majrds.plot(ax=axes[0][1], color='gray', linewidth=3)
majtpsnb.plot(ax=axes[0][1], color='magenta', markersize=100); majcps.plot(ax=axes[0][1], color='white')
for x, y, label in zip(majcps.geometry.x, majcps.geometry.y, majcps.NAME):
    axes[0][1].annotate(label, xy=(x, y), xytext=(3, 3), textcoords='offset points', color='white')

vmin=0; vmax=0.5

axes[1][0].set_ylim([47.25, 48.22]); axes[1][0].set_xlim([-122.45, -121.85]); axes[1][0].set_title('Frac forward commute direction trips')
cbgs_dfnow[cbgs_dfnow['lat']>47.6].plot(column = 'frac_fd_trips', ax = axes[1][0], legend = True, vmin = vmin, vmax = vmax)
majlks.plot(ax=axes[1][0], color='paleturquoise', linewidth=3); majrds.plot(ax=axes[1][0], color='gray', linewidth=3)
majtpsnb.plot(ax=axes[1][0], color='magenta', markersize=100); majcps.plot(ax=axes[1][0], color='white')
for x, y, label in zip(majcps.geometry.x, majcps.geometry.y, majcps.NAME):
    axes[1][0].annotate(label, xy=(x, y), xytext=(3, 3), textcoords='offset points', color='white')

axes[1][1].set_ylim([47.25, 48.22]); axes[1][1].set_xlim([-122.45, -121.85]); axes[1][1].set_title('Frac reverse commute direction trips')
cbgs_dfnow[cbgs_dfnow['lat']<47.7].plot(column = 'frac_rd_trips', ax = axes[1][1], legend = True, vmin = vmin, vmax = vmax)
majlks.plot(ax=axes[1][1], color='paleturquoise', linewidth=3); majrds.plot(ax=axes[1][1], color='gray', linewidth=3)
majtpsnb.plot(ax=axes[1][1], color='magenta', markersize=100); majcps.plot(ax=axes[1][1], color='white')
for x, y, label in zip(majcps.geometry.x, majcps.geometry.y, majcps.NAME):
    axes[1][1].annotate(label, xy=(x, y), xytext=(3, 3), textcoords='offset points', color='white')
    
fig.savefig('/opt/dssg-hot/notebooks/shirley/figs/fips_frac_forward_reverse_commute_direction_trips.pdf', bbox_inches='tight', pad_inches = 0)

In [None]:
# - Define majority forward/reverse dir bgs
# --> majority forward dir (fd) bg must:
# 1.) have >50% of trips be during SB evening peak or NB morning peak,
# 2.) have its centroid above 47.6 deg N
# --> majority reverse dir (rd) bg must:
# 1.) have >50% of trips be during SB morning peak or NB evening peak,
# 2.) have its centroid below 47.7 deg N
def maj_commute_dir_fxn(row):
    if (row['frac_fd_trips']>0.5) & (row['lat']>47.6):
        val = 'fd'
    elif (row['frac_rd_trips']>0.5) & (row['lat']<47.7):
        val = 'rd'
    else:
        val = 'nd'
    return val

cbgs_dfnow['maj_commute_dir'] = cbgs_dfnow.apply(maj_commute_dir_fxn, axis=1)

In [None]:
# - Get all forward/reverse commuting trips within the forward commuting bgs
# --> forward commute trip must:
# 1.) be during NB evening peak or SB morning peak,
# 2.) belong to an id that has >20 trips/id, and
# 3.) be performed by a user from a majority forward dir trip bg
# --> reverse commute trip must:
# 1.) be during NB morning peak or SB evening peak,
# 2.) belong to an id that has >20 trips/id, and
# 3.) be performed by a user from a majority reverse dir trip bg
dfnow = dfnow.merge(num_trips_by_id_fips[['freq_group','id']], on='id')
dfnow = dfnow.merge(cbgs_dfnow[['maj_commute_dir','fips']], on='fips')

pk_fc_filternow = (dfnow['peak_group']=='NB_e_pk'
                 ) | (dfnow['peak_group']=='SB_m_pk')
pk_rc_filternow = (dfnow['peak_group']=='NB_m_pk'
                 ) | (dfnow['peak_group']=='SB_e_pk')

freq_filternow = (dfnow['freq_group']=='21-70'
                 ) | (dfnow['freq_group']=='21-70'
                     ) | (dfnow['freq_group']=='71-200'
                         ) | (dfnow['freq_group']=='201-400'
                             ) | (dfnow['freq_group']=='>401')

dir_fc_filternow = (dfnow['maj_commute_dir']=='fd')
dir_rc_filternow = (dfnow['maj_commute_dir']=='rd')

fc_rc_trips = dfnow[(pk_fc_filternow | pk_rc_filternow) & 
                    freq_filternow & (dir_fc_filternow | dir_rc_filternow)]

In [None]:
len(fc_rc_trips)

In [None]:
test=fc_rc_trips.groupby(['inc3_group','maj_commute_dir'])

In [None]:
fc_rc_trips.boxplot(column='toll',by='')

In [None]:
#fc_rc_trips.merge(fips_sov_grouped.agg({'toll': 'mean'}), on='fips')

In [None]:
cbgs_dfnow = cbgs_acs.merge(fips_grouped.agg(
    {'Dist btwn entry & exit loop': 'mean'}), on='fips', how='left')

In [None]:
cbgs_dfnow = cbgs_dfnow.merge(fips_sov_grouped.agg(
    {'toll': 'mean'}), on='fips', how='left') # avg toll over sov

In [None]:
cbgs_dfnow.rename(
    columns={'Dist btwn entry & exit loop':'avg_dist','toll':'avg_toll'},inplace=True)

In [None]:
cbgs_dfnow = cbgs_dfnow.merge(
    fips_grouped['trip_id'].nunique(), on='fips', how='left')
cbgs_dfnow.rename(
    columns={'trip_id':'num_trips'}, inplace=True)

In [None]:
cbgs_dfnow['trips_pc'] = cbgs_dfnow['num_trips']/cbgs_dfnow['population']
cbgs_dfnow['trips_ph'] = cbgs_dfnow['num_trips']/cbgs_dfnow['households']

In [None]:
cbgs_dfnow = cbgs_dfnow.merge(
    fips_grouped['id'].nunique(), on='fips', how='left')
cbgs_dfnow.rename(
    columns={'id':'num_ids'}, inplace=True)
cbgs_dfnow['ids_pc'] = cbgs_dfnow['num_ids']/cbgs_dfnow['population']
cbgs_dfnow['ids_ph'] = cbgs_dfnow['num_ids']/cbgs_dfnow['households']

In [None]:
cbgs_dfnow = cbgs_dfnow.merge(
    num_trips_by_id_fips.groupby('fips').agg(
        {'num_trips_by_id':'mean'}), on='fips', how='left')
cbgs_dfnow.rename(
    columns={'num_trips_by_id':'avg_num_trips_by_id'}, inplace=True)

In [None]:
cbgs_dfnow['avg_num_trips_by_id_per_hh'] = cbgs_dfnow['avg_num_trips_by_id']/cbgs_dfnow['households']


In [None]:
fips_shov_grouped = dfnow.groupby(['fips','is_hov'])
perc_trips_within_bgnow = fips_shov_grouped.agg({'trip_id': 'nunique'}).div(
    fips_grouped.agg({'trip_id': 'nunique'}, level='fips'))*100
perc_trips_within_bgnow = perc_trips_within_bgnow.unstack(level='is_hov')
perc_trips_within_bgnow.columns = perc_trips_within_bgnow.columns.droplevel(0)
perc_trips_within_bgnow.reset_index(inplace=True)
cbgs_dfnow = cbgs_dfnow.merge(perc_trips_within_bgnow, on='fips', how='left')

In [None]:
cbgs_dfnow.rename(columns={0:'perc_sov_trips',1:'perc_hov_trips'}, inplace=True)

In [None]:
cbgs_dfnow = cbgs_dfnow.merge(
    colsnow[['fips','num_NB_m_pk_trips','num_SB_e_pk_trips',
             'num_NB_e_pk_trips','num_SB_m_pk_trips',
             'frac_fd_trips','frac_rd_trips']], on='fips', how='left')

In [None]:
cbgs_dfnow.dropna(subset = ['geometry'], inplace=True)
cbgs_dfnow['lon'] = cbgs_dfnow.centroid.x
cbgs_dfnow['lat'] = cbgs_dfnow.centroid.y

In [None]:
cbgs_dfnow_pd = pd.DataFrame(cbgs_dfnow.drop(columns='geometry'))

In [None]:
cbgs_dfnow_pd.to_csv(r'cbgs_fip_vars.csv')

In [None]:
cbgs_dfnow_pd.head(2)

#### Avg SOV toll vs. avg HOT trip length

In [None]:
cbgs_joinnow = cbgs.merge(fips_grouped.agg(
    {'Dist btwn entry & exit loop': 'mean'}), on='fips') # avg trip length
cbgs_joinnow = cbgs_joinnow.merge(fips_sov_grouped.agg(
    {'toll': 'mean'}), on='fips') # avg toll over sov
plt.plot(cbgs_joinnow['Dist btwn entry & exit loop'],cbgs_joinnow['toll'],'.');

#### Avg SOV toll vs. med inc

In [None]:
cbgs_joinnow = cbgs_acs.merge(fips_sov_grouped.agg(
    {'toll': 'mean'}), on='fips') # avg toll over sov
plt.plot(cbgs_joinnow['med_inc'],cbgs_joinnow['toll'],'.');

#### Avg length trip vs. med inc

In [None]:
cbgs_joinnow = cbgs_acs.merge(fips_grouped.agg(
    {'Dist btwn entry & exit loop': 'mean'}), on='fips') # avg trip length
plt.plot(cbgs_joinnow['med_inc'],cbgs_joinnow['Dist btwn entry & exit loop'],'.');

# Spatial clustering