# Log of changes from original calculation

* Urban/rural split is now based on MSASIZE instead of URBRUR variable from NHTS. Now "rural" is outside of any MSA, instead of just not in an urbanized region.
* Added an option to include demand from transit trips in addition to private vehicles and taxis.
* Now breaking down into three different times of week: Sa/Su, M/F, and Tu/W/Th.

# Important things to note:
* This provides the *trip* demand, where a trip is point-to-point travel for one or more members of the same household. So if a family of six all takes the bus, that's *one* trip. This has implications for our assumptions about the sharing factor.
* Trips >=300 mi. are thrown out
* This produces *annual* trip demand values. So for the TU/WE/TH midweek time period, this is all trips on all Tuesdays, Wednesdays, and Thursdays for the whole year. Need to multiply by 7/3./365. to get a daily value.

# To do
* Seasonal variation

In [192]:
import pandas as pd
import numpy as np

import matplotlib.gridspec as gridspec
%matplotlib notebook

## Set up

In [193]:
#Point to a directory containing the NHTS trippub.csv dataset
data_dir = 'C:\\Users\\bgerke.DOMINO0\\Desktop\\NHTS\\'

#Should we include demand from trips on mass transit (public transit, school buses, private shuttles, etc.)?
include_transit = True



In [194]:
#Read in main dataset
trippub_all = pd.read_csv(data_dir+'trippub.csv')
trippub_all.head()
#Trips

Unnamed: 0,HOUSEID,PERSONID,TDTRPNUM,STRTTIME,ENDTIME,TRVLCMIN,TRPMILES,TRPTRANS,TRPACCMP,TRPHHACC,...,HTRESDN,SMPLSRCE,R_AGE,EDUC,R_SEX,PRMACT,PROXY,WORKER,DRIVER,WTTRDFIN
0,30000007,1,1,1000,1015,15,5.244,3,0,0,...,750,2,67,3,2,6,1,2,1,75441.905796
1,30000007,1,2,1510,1530,20,5.149,3,0,0,...,750,2,67,3,2,6,1,2,1,75441.905796
2,30000007,2,1,700,900,120,84.004,6,0,0,...,750,2,66,3,1,1,2,1,1,71932.645806
3,30000007,2,2,1800,2030,150,81.628,6,0,0,...,750,2,66,3,1,1,2,1,1,71932.645806
4,30000007,3,1,845,900,15,2.25,3,0,0,...,750,2,28,2,2,5,2,2,1,80122.686739


In [195]:
trippub_all.columns.values

array(['HOUSEID', 'PERSONID', 'TDTRPNUM', 'STRTTIME', 'ENDTIME',
       'TRVLCMIN', 'TRPMILES', 'TRPTRANS', 'TRPACCMP', 'TRPHHACC',
       'VEHID', 'TRWAITTM', 'NUMTRANS', 'TRACCTM', 'DROP_PRK', 'TREGRTM',
       'WHODROVE', 'WHYFROM', 'LOOP_TRIP', 'TRPHHVEH', 'HHMEMDRV',
       'HH_ONTD', 'NONHHCNT', 'NUMONTRP', 'PSGR_FLG', 'PUBTRANS',
       'TRIPPURP', 'DWELTIME', 'TDWKND', 'VMT_MILE', 'DRVR_FLG',
       'WHYTRP1S', 'WHYTRP90', 'ONTD_P1', 'ONTD_P2', 'ONTD_P3', 'ONTD_P4',
       'ONTD_P5', 'ONTD_P6', 'ONTD_P7', 'ONTD_P8', 'ONTD_P9', 'ONTD_P10',
       'ONTD_P11', 'ONTD_P12', 'ONTD_P13', 'TDCASEID', 'TRACC_WLK',
       'TRACC_POV', 'TRACC_BUS', 'TRACC_CRL', 'TRACC_SUB', 'TRACC_OTH',
       'TREGR_WLK', 'TREGR_POV', 'TREGR_BUS', 'TREGR_CRL', 'TREGR_SUB',
       'TREGR_OTH', 'WHYTO', 'TRAVDAY', 'HOMEOWN', 'HHSIZE', 'HHVEHCNT',
       'HHFAMINC', 'DRVRCNT', 'HHSTATE', 'HHSTFIPS', 'NUMADLT',
       'WRKCOUNT', 'TDAYDATE', 'HHRESP', 'LIF_CYC', 'MSACAT', 'MSASIZE',
       'RAIL', 'URBAN', '

In [196]:
#Personal motor vehicle codes from code book
#This is car, suv, van, pickup truck,  motorcycle, RV, rental car
#LEAVES OUT 17-taxis/TNCs, because (I believe) we cannot weight these correctly since must weight by driver. 

pmvcodes = [3,4,5,6,8,9,18]
selection = (trippub_all['TRPTRANS'].isin(pmvcodes)) & (trippub_all['DRVR_FLG']==1)


    
trippub = trippub_all.loc[selection]
print(trippub['TRPTRANS'].count())
print(trippub['WTTRDFIN'].sum())


611342
220429661377.4252


In [197]:
taxitrips = trippub_all.loc[trippub_all['TRPTRANS']==17].groupby(
    ['HOUSEID','TDAYDATE','STRTTIME'], as_index=False).first()
#The above cuts out duplicate trips, where two people in the same household reported the same taxi trip. Serves
#a similar purpose to restricting by driver flag
trippub = trippub.append(taxitrips)
trippub.reset_index(drop=True, inplace=True)
print(taxitrips['TRPTRANS'].count())
print(taxitrips['WTTRDFIN'].sum())
#Surprisingly few taxi trips...

2394
1615969202.3435988


In [198]:
if include_transit:
    print('Including transit trips in total.')
    #Transit vehicle codes from code book are 10-16 
    trnstcodes = list(range(10,17))
    trnsttrips = trippub_all.loc[trippub_all['TRPTRANS'].isin(trnstcodes)].groupby(
        ['HOUSEID','TDAYDATE','STRTTIME'], as_index=False).first()
    trippub = trippub.append(trnsttrips)
    trippub.reset_index(drop=True, inplace=True)
    print(trnsttrips['TRPTRANS'].count())
    print(trnsttrips['WTTRDFIN'].sum())
else:
    print('Excluding transit trips from total')

Including transit trips in total.
21890
15763885231.569706


In [199]:
#Total raw and weighted trip numbers
print(trippub['TRPTRANS'].count())
print(trippub['WTTRDFIN'].sum())

635626
237809515811.33847


In [200]:
#Print avg mileages by trip type
print(trippub['TRPMILES'].mul(trippub['WTTRDFIN']).sum()/trippub['WTTRDFIN'].sum())
print(taxitrips['TRPMILES'].mul(taxitrips['WTTRDFIN']).sum()/taxitrips['WTTRDFIN'].sum())
if include_transit: 
    print(trnsttrips['TRPMILES'].mul(trnsttrips['WTTRDFIN']).sum()/trnsttrips['WTTRDFIN'].sum())

9.541925770577617
8.222858752112181
9.524062680108994


In [201]:
trippub.tail()

Unnamed: 0,CDIVMSAR,CENSUS_D,CENSUS_R,DRIVER,DROP_PRK,DRVRCNT,DRVR_FLG,DWELTIME,EDUC,ENDTIME,...,VEHID,VMT_MILE,WHODROVE,WHYFROM,WHYTO,WHYTRP1S,WHYTRP90,WORKER,WRKCOUNT,WTTRDFIN
635621,33,3,2,-1,-1,2,-1,420,-1,730,...,-1,-1.0,-1,1,8,20,5,-1,2,87985.85
635622,33,3,2,-1,-1,2,-1,-9,-1,1500,...,-1,-1.0,-1,8,1,1,5,-1,2,87985.85
635623,21,2,1,1,2,3,-1,505,4,935,...,-1,-1.0,-1,1,3,10,1,1,3,1314464.0
635624,21,2,1,1,-1,3,-1,-9,4,1955,...,-1,-1.0,-1,3,1,1,1,1,3,1314464.0
635625,53,5,3,-1,-1,2,-1,-9,1,1545,...,-1,-1.0,-1,8,1,1,5,-1,2,159615.3


In [202]:
#Trim out long road trips
trippub = trippub.loc[trippub['TRPMILES'] < 300]
print(len(trippub))
print(trippub['WTTRDFIN'].sum())

print(trippub['TRPMILES'].mul(trippub['WTTRDFIN']).sum()/trippub['WTTRDFIN'].sum())

635052
237581417926.02374
8.910031495709765


In [203]:
#Create Census Division/ Large State category with urban/rural split.
#Also Census region urban/rural split
cdiv = {1:'NENG', 2:'MAT', 3:'ENC', 4:'WNC', 5:'SAT', 6:'ESC', 7:'WSC', 8:'MTN', 9:'PAC'}
creg = {1:'NEAST', 2:'MIDW', 3:'SOUTH', 4:'WEST'}

for k in cdiv.keys():
    trippub.loc[trippub['CENSUS_D']==k, 'CDIVLS'] = cdiv[k]
for k in creg.keys():
    trippub.loc[trippub['CENSUS_R']==k, 'REGION'] = creg[k]

lgst = ['CA', 'NY','FL','TX']

for s in lgst:
    div = cdiv[trippub.loc[trippub['HHSTATE']==s, 'CENSUS_D'].unique()[0]]
    #print reg
    trippub.loc[(trippub['HHSTATE']==s), 'CDIVLS'] = div+'-'+s
    trippub.loc[(trippub['CDIVLS']==div) & (trippub['HHSTATE']!=s), 'CDIVLS'] = div+'-NL' 
    

#Turn urban/rural codes into strings    
trippub['URBRURS'] = 'RUR'
#trippub.loc[trippub['URBRUR'] == 1, 'URBRURS'] = 'URB'
#Instead of the above, divide urban vs rural according to metropolitan statistical area size: all MSAs are urban
trippub.loc[trippub['MSASIZE']<6,'URBRURS'] = 'URB'


print trippub['CDIVLS'].unique()
print trippub['REGION'].unique()
print trippub['URBRURS'].unique()

['SAT-NL' 'ENC' 'MAT-NY' 'MAT-NL' 'PAC-CA' 'WSC-TX' 'PAC-NL' 'ESC' 'MTN'
 'WNC' 'NENG' 'SAT-FL' 'WSC-NL']
['SOUTH' 'MIDW' 'NEAST' 'WEST']
['URB' 'RUR']


In [204]:
#Code different times of week.
wktime = {'SA/SU':[1,7], 'MO/FR':[2,6], 'TU/WE/TH': [3,4,5]}
trippub['WKTIME'] = ''
for k in wktime.keys():
    trippub.loc[trippub['TRAVDAY'].isin(wktime[k]), 'WKTIME'] = k
print trippub['WKTIME'].unique()

['MO/FR' 'TU/WE/TH' 'SA/SU']


In [205]:
#Set mileage bin edges
mibins=[0,2,5,10,20,30,50,100,300]
mibin_labels=pd.Series(mibins[:-1]).astype(str).str.cat(pd.Series(mibins[1:]).astype(str), sep='-')

In [206]:
#Categorize trips by mileage

trippub['MILEBIN'] = pd.cut(trippub['TRPMILES'], mibins, labels=mibin_labels)


In [207]:
#Compute distance histograms (and average distances) by CDLS

dist_hists = trippub.groupby(['CDIVLS', 'URBRURS',
                              'WKTIME','MILEBIN']).agg({'WTTRDFIN':[len, np.sum], 
                                                               'TRPMILES':np.mean,
                                                               'REGION':'first'}
                                                       ).rename(columns=
                                                                {'mean':'AVGDIST',
                                                                 'first':'REGION', 
                                                                 'len':'COUNTSRAW',
                                                                 'sum':'COUNTSWTD'}, level=1)

dist_hists.columns = dist_hists.columns.droplevel(0)
dist_hists

#NOTE: COUNTSWTD here represents the total ANNNUAL number of trips in each bin.

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,AVGDIST,REGION,COUNTSRAW,COUNTSWTD
CDIVLS,URBRURS,WKTIME,MILEBIN,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ENC,RUR,MO/FR,0-2,0.977429,MIDW,2053.0,7.365273e+08
ENC,RUR,MO/FR,2-5,3.231085,MIDW,1298.0,3.772505e+08
ENC,RUR,MO/FR,5-10,7.281436,MIDW,1002.0,2.685436e+08
ENC,RUR,MO/FR,10-20,14.354197,MIDW,879.0,2.740703e+08
ENC,RUR,MO/FR,20-30,24.290520,MIDW,369.0,1.134880e+08
ENC,RUR,MO/FR,30-50,38.296500,MIDW,294.0,7.924429e+07
ENC,RUR,MO/FR,50-100,65.612557,MIDW,149.0,3.309224e+07
ENC,RUR,MO/FR,100-300,163.518387,MIDW,75.0,1.288694e+07
ENC,RUR,SA/SU,0-2,0.990881,MIDW,904.0,5.914655e+08
ENC,RUR,SA/SU,2-5,3.264059,MIDW,522.0,3.314264e+08


In [208]:
#Check avg mileages
print(dist_hists['AVGDIST'].mul(dist_hists['COUNTSWTD']).sum()/dist_hists['COUNTSWTD'].sum())

8.907404971826649


In [209]:
trippub['STRTHOUR'] = pd.cut(trippub['STRTTIME'], np.arange(25)*100, labels=np.arange(24))

In [210]:
#Compute hourly trip volume profiles by region and urb/rural

hourly_profiles = trippub.groupby(['REGION', 
                                   'URBRURS', 
                                   'WKTIME', 
                                   'MILEBIN',
                                   'STRTHOUR'])['WTTRDFIN'].agg([len, 
                                                                 np.sum]).rename(columns=
                                                                                 {'len':'COUNTSRAW',
                                                                                  'sum':'COUNTSWTD'})
#NOTE: COUNTSWTD here represents the total ANNNUAL number of trips in each bin.

In [211]:
hourly_profiles

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,COUNTSRAW,COUNTSWTD
REGION,URBRURS,WKTIME,MILEBIN,STRTHOUR,Unnamed: 5_level_1,Unnamed: 6_level_1
MIDW,RUR,MO/FR,0-2,0,2.0,2.815297e+06
MIDW,RUR,MO/FR,0-2,1,1.0,3.220029e+05
MIDW,RUR,MO/FR,0-2,2,1.0,3.220029e+05
MIDW,RUR,MO/FR,0-2,3,1.0,4.899141e+06
MIDW,RUR,MO/FR,0-2,4,16.0,2.741188e+06
MIDW,RUR,MO/FR,0-2,5,35.0,1.733291e+07
MIDW,RUR,MO/FR,0-2,6,74.0,3.852619e+07
MIDW,RUR,MO/FR,0-2,7,169.0,1.068498e+08
MIDW,RUR,MO/FR,0-2,8,151.0,9.021114e+07
MIDW,RUR,MO/FR,0-2,9,186.0,1.055746e+08


In [212]:
hourly_profiles['TRIPPCT'] = 0
for row in [(a,b,c,d) 
            for a in hourly_profiles.index.levels[0].unique() 
            for b in hourly_profiles.index.levels[1].unique() 
            for c in hourly_profiles.index.levels[2].unique() 
            for d in hourly_profiles.index.levels[3].unique()]:
    pct=hourly_profiles.loc[row,'COUNTSWTD']/(hourly_profiles.loc[row,'COUNTSWTD'].sum(skipna=True)+1.e-10)
    for h in pct.index.values: 
        hourly_profiles.loc[row+(h,), 'TRIPPCT'] = pct[h]

In [213]:
hourly_profiles.unstack('STRTHOUR', fill_value=0)['TRIPPCT'].T

REGION,MIDW,MIDW,MIDW,MIDW,MIDW,MIDW,MIDW,MIDW,MIDW,MIDW,...,WEST,WEST,WEST,WEST,WEST,WEST,WEST,WEST,WEST,WEST
URBRURS,RUR,RUR,RUR,RUR,RUR,RUR,RUR,RUR,RUR,RUR,...,URB,URB,URB,URB,URB,URB,URB,URB,URB,URB
WKTIME,MO/FR,MO/FR,MO/FR,MO/FR,MO/FR,MO/FR,MO/FR,MO/FR,SA/SU,SA/SU,...,SA/SU,SA/SU,TU/WE/TH,TU/WE/TH,TU/WE/TH,TU/WE/TH,TU/WE/TH,TU/WE/TH,TU/WE/TH,TU/WE/TH
MILEBIN,0-2,2-5,5-10,10-20,20-30,30-50,50-100,100-300,0-2,2-5,...,50-100,100-300,0-2,2-5,5-10,10-20,20-30,30-50,50-100,100-300
STRTHOUR,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4
0,0.001968,0.000942,0.004287,0.009805,0.023352,0.000112,0.06721,0.0,0.000559,0.005031,...,0.008078,0.00637,0.000427,0.001371,0.001316,0.001945,0.001857,0.006958,0.000613,0.0
1,0.000225,0.00161,0.000276,0.000164,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00394,0.0,0.000417,0.001225,0.002555,0.002701,0.002078,0.002581,0.006676,0.002026
2,0.000225,0.0,0.000407,0.000394,0.0,0.0,0.0,0.0,0.001205,0.0,...,0.0,0.0,0.000212,0.000623,0.000522,0.000276,0.003782,0.0,0.003193,0.0
3,0.003424,0.000173,0.000217,0.0,0.000626,0.000291,0.00122,0.0,0.003252,0.0,...,0.0,0.001894,0.000422,0.000747,0.0007,0.00065,0.000577,0.001937,0.001612,0.0
4,0.001916,0.011317,0.0285,0.005154,0.023082,0.017189,0.023917,0.013611,0.009119,0.008179,...,0.024303,0.011887,0.004666,0.002121,0.01017,0.015894,0.013808,0.039915,0.053045,0.057278
5,0.012115,0.010799,0.005468,0.032767,0.022143,0.035281,0.005015,0.1851,0.010472,0.010602,...,0.017214,0.038426,0.010764,0.017212,0.026505,0.034193,0.084883,0.061876,0.075502,0.024218
6,0.026929,0.042229,0.048411,0.075674,0.111509,0.201178,0.083333,0.029019,0.017996,0.030094,...,0.054573,0.035242,0.025711,0.034267,0.056317,0.076133,0.082862,0.081699,0.07276,0.084606
7,0.074685,0.097363,0.094418,0.103394,0.105384,0.069065,0.006231,0.059962,0.034459,0.025058,...,0.021396,0.024849,0.087838,0.090914,0.097012,0.106979,0.086659,0.068607,0.07931,0.049711
8,0.063055,0.042089,0.051273,0.043895,0.024613,0.051323,0.049672,0.104679,0.050591,0.067775,...,0.042858,0.103351,0.075453,0.075626,0.072526,0.065406,0.067937,0.068784,0.054514,0.038475
9,0.073793,0.059159,0.060742,0.031881,0.037657,0.072721,0.05315,0.014323,0.094035,0.071187,...,0.093366,0.075207,0.054609,0.055554,0.047544,0.044528,0.033163,0.039359,0.045671,0.082481


In [214]:
hourly_profiles_agg = trippub.groupby(['URBRURS', 
                                   'WKTIME', 
                                   'MILEBIN',
                                   'STRTHOUR'])['WTTRDFIN'].agg([len, 
                                                                 np.sum]).rename(columns=
                                                                                 {'len':'COUNTSRAW',
                                                                                  'sum':'COUNTSWTD'})

In [215]:
hourly_profiles_agg['TRIPPCT'] = 0
for row in [(a,b,c) 
            for a in hourly_profiles_agg.index.levels[0].unique() 
            for b in hourly_profiles_agg.index.levels[1].unique() 
            for c in hourly_profiles_agg.index.levels[2].unique()]:
    pct=hourly_profiles_agg.loc[row,'COUNTSWTD']/(hourly_profiles_agg.loc[row,'COUNTSWTD'].sum(skipna=True)+1.e-10)
    for h in pct.index.values: 
        hourly_profiles_agg.loc[row+(h,), 'TRIPPCT'] = pct[h]

In [216]:
hourly_profiles_agg

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,COUNTSRAW,COUNTSWTD,TRIPPCT
URBRURS,WKTIME,MILEBIN,STRTHOUR,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RUR,MO/FR,0-2,0,6.0,3.686144e+06,0.000966
RUR,MO/FR,0-2,1,8.0,9.804437e+06,0.002571
RUR,MO/FR,0-2,2,1.0,3.220029e+05,0.000084
RUR,MO/FR,0-2,3,1.0,4.899141e+06,0.001285
RUR,MO/FR,0-2,4,29.0,3.587461e+06,0.000941
RUR,MO/FR,0-2,5,88.0,3.292373e+07,0.008632
RUR,MO/FR,0-2,6,232.0,9.493033e+07,0.024890
RUR,MO/FR,0-2,7,680.0,2.988308e+08,0.078351
RUR,MO/FR,0-2,8,546.0,2.453281e+08,0.064323
RUR,MO/FR,0-2,9,709.0,2.261331e+08,0.059290


In [217]:
def plot_dists_by_region(plotdata, regions=['NEAST','SOUTH','MIDW','WEST'], urbrur='URB', wktime='WEEKDAY',
                         alldata = None,
                         colors=None):
    fig = plt.figure()
    gs = gridspec.GridSpec(nrows=4, ncols=2, bottom=0.2)

    for i in range(len(mibins[:-1])):
        fig.add_subplot(gs[i/2, i%2])

    axs = fig.axes
    
    if not colors:
        colors=['forestgreen','red','dodgerblue','purple']

    #plotdata = dists.unstack('STRTHOUR', fill_value=0)['TRIPPCT'].T
    for i, reg in enumerate(regions):
        sel=(reg,urbrur,wktime)
        print sel
        #plotdata = dists.loc[sel].unstack('STRTHOUR', fill_value=0)['TRIPPCT'].T
        #plotdata.index = np.arange(24)
        plotdata[sel].plot(subplots=True, legend=None, color=colors[i], ax=axs, linewidth=1)

    if alldata is not None:
        alldata[(urbrur,wktime)].plot(subplots=True, legend=None,color='k', ax=axs, linewidth=2)
    for i, ax in enumerate(axs):
        col = plotdata[sel].columns[i]
        miles = col.split('-')
        ax.annotate(str(miles[0])+'-'+str(miles[1])+' mi.', [0.03,0.8], xycoords='axes fraction')
        if i > 5:
            ax.set_xlabel('Hour of day')
            
        if i % 2 == 0:
            ax.set_ylabel('Annual trips')

    plt.legend(ax.lines, regions,ncol=4, loc=2, bbox_to_anchor=(0.15,0.1), bbox_transform=fig.transFigure)
    
    if urbrur == 'URB': 
        urname='URBAN' 
    else: 
        urname='RURAL'
    fig.suptitle(urname+' '+wktime)
    
plt.rcParams['figure.figsize'] = [8, 8]    
plotdata = hourly_profiles.unstack('STRTHOUR', fill_value=0)['TRIPPCT'].T
alldata = hourly_profiles_agg.unstack('STRTHOUR', fill_value=0)['TRIPPCT'].T
plot_dists_by_region(plotdata, urbrur='URB', wktime='TU/WE/TH', alldata=alldata)

<IPython.core.display.Javascript object>

('NEAST', 'URB', 'TU/WE/TH')
('SOUTH', 'URB', 'TU/WE/TH')
('MIDW', 'URB', 'TU/WE/TH')
('WEST', 'URB', 'TU/WE/TH')


In [218]:
dist_hour_hists = dist_hists.copy()
dist_hour_hists.rename(columns={'COUNTSRAW':'NRAW', 'COUNTSWTD':'NWTD'},inplace=True)
dist_hour_hists

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,AVGDIST,REGION,NRAW,NWTD
CDIVLS,URBRURS,WKTIME,MILEBIN,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ENC,RUR,MO/FR,0-2,0.977429,MIDW,2053.0,7.365273e+08
ENC,RUR,MO/FR,2-5,3.231085,MIDW,1298.0,3.772505e+08
ENC,RUR,MO/FR,5-10,7.281436,MIDW,1002.0,2.685436e+08
ENC,RUR,MO/FR,10-20,14.354197,MIDW,879.0,2.740703e+08
ENC,RUR,MO/FR,20-30,24.290520,MIDW,369.0,1.134880e+08
ENC,RUR,MO/FR,30-50,38.296500,MIDW,294.0,7.924429e+07
ENC,RUR,MO/FR,50-100,65.612557,MIDW,149.0,3.309224e+07
ENC,RUR,MO/FR,100-300,163.518387,MIDW,75.0,1.288694e+07
ENC,RUR,SA/SU,0-2,0.990881,MIDW,904.0,5.914655e+08
ENC,RUR,SA/SU,2-5,3.264059,MIDW,522.0,3.314264e+08


In [219]:
hcols=[]
for h in range(24):
    col='NWTD_'+format(h,'02d')
    hcols.append(col)
    dist_hour_hists[h]=0
    
dist_hour_hists

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,AVGDIST,REGION,NRAW,NWTD,0,1,2,3,4,5,...,14,15,16,17,18,19,20,21,22,23
CDIVLS,URBRURS,WKTIME,MILEBIN,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
ENC,RUR,MO/FR,0-2,0.977429,MIDW,2053.0,7.365273e+08,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENC,RUR,MO/FR,2-5,3.231085,MIDW,1298.0,3.772505e+08,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENC,RUR,MO/FR,5-10,7.281436,MIDW,1002.0,2.685436e+08,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENC,RUR,MO/FR,10-20,14.354197,MIDW,879.0,2.740703e+08,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENC,RUR,MO/FR,20-30,24.290520,MIDW,369.0,1.134880e+08,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENC,RUR,MO/FR,30-50,38.296500,MIDW,294.0,7.924429e+07,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENC,RUR,MO/FR,50-100,65.612557,MIDW,149.0,3.309224e+07,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENC,RUR,MO/FR,100-300,163.518387,MIDW,75.0,1.288694e+07,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENC,RUR,SA/SU,0-2,0.990881,MIDW,904.0,5.914655e+08,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENC,RUR,SA/SU,2-5,3.264059,MIDW,522.0,3.314264e+08,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [220]:
for reg in dist_hists.index.levels[0]:
    print reg
    dist_hour_hists.loc[reg,range(24)] = \
        hourly_profiles_agg['TRIPPCT'].unstack('STRTHOUR').T.mul(\
                                            dist_hists.loc[reg,'COUNTSWTD']).fillna(0.).T.loc[dist_hists.loc[reg].index].values
        #The final reindexing by dist_hists.loc[reg].index is essential to get the rows in the right order!

dist_hour_hists
#hourly_profiles_agg['TRIPPCT'].unstack('STRTHOUR').T.mul(dist_hists.loc[reg,'COUNTSWTD']).fillna(0.).T

ENC
ESC
MAT-NL
MAT-NY
MTN
NENG
PAC-CA
PAC-NL
SAT-FL
SAT-NL
WNC
WSC-NL
WSC-TX


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,AVGDIST,REGION,NRAW,NWTD,0,1,2,3,4,5,...,14,15,16,17,18,19,20,21,22,23
CDIVLS,URBRURS,WKTIME,MILEBIN,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
ENC,RUR,MO/FR,0-2,0.977429,MIDW,2053.0,7.365273e+08,7.118388e+05,1.893355e+06,6.218262e+04,9.460831e+05,6.927819e+05,6.357968e+06,...,5.432261e+07,7.202412e+07,5.930942e+07,4.856075e+07,3.495794e+07,1.747881e+07,1.436761e+07,1.139699e+07,5.313715e+06,4.097542e+06
ENC,RUR,MO/FR,2-5,3.231085,MIDW,1298.0,3.772505e+08,7.322380e+05,2.489112e+05,0.000000e+00,5.181101e+04,2.181472e+06,4.608214e+06,...,3.213945e+07,3.905788e+07,2.668777e+07,2.706394e+07,1.755234e+07,1.278226e+07,9.880033e+06,5.552027e+06,5.339637e+06,1.127161e+06
ENC,RUR,MO/FR,5-10,7.281436,MIDW,1002.0,2.685436e+08,1.496770e+06,1.397469e+05,8.671463e+04,1.547319e+05,2.979315e+06,3.396590e+06,...,2.158101e+07,2.104960e+07,2.408824e+07,2.092796e+07,1.413438e+07,7.861311e+06,3.880433e+06,2.145437e+06,3.815172e+06,1.918710e+06
ENC,RUR,MO/FR,10-20,14.354197,MIDW,879.0,2.740703e+08,1.590864e+06,6.842560e+04,8.579867e+04,1.407593e+04,1.671303e+06,7.927602e+06,...,1.917182e+07,2.655533e+07,3.334682e+07,2.051089e+07,8.828664e+06,4.680423e+06,4.708502e+06,9.573806e+06,2.798040e+06,3.579550e+06
ENC,RUR,MO/FR,20-30,24.290520,MIDW,369.0,1.134880e+08,1.169090e+06,6.415990e+04,2.667856e+04,2.210892e+05,1.552895e+06,2.566231e+06,...,5.430065e+06,6.935118e+06,1.177731e+07,1.130890e+07,5.093538e+06,2.524112e+06,1.908660e+06,2.085145e+06,1.550561e+06,1.652065e+06
ENC,RUR,MO/FR,30-50,38.296500,MIDW,294.0,7.924429e+07,9.267111e+04,1.706350e+05,1.388824e+04,1.669152e+04,1.310885e+06,2.744821e+06,...,3.131776e+06,6.845368e+06,8.999008e+06,5.264953e+06,3.134456e+06,1.922564e+06,1.840017e+06,9.747078e+05,6.365631e+05,2.307092e+05
ENC,RUR,MO/FR,50-100,65.612557,MIDW,149.0,3.309224e+07,6.963952e+05,6.024538e+04,1.388401e+04,1.464938e+04,6.857561e+05,7.011256e+05,...,2.224961e+06,3.548177e+06,2.904726e+06,1.478684e+06,9.129873e+05,1.329555e+06,4.912695e+04,1.616612e+05,2.003012e+05,5.958254e+04
ENC,RUR,MO/FR,100-300,163.518387,MIDW,75.0,1.288694e+07,7.363645e+04,0.000000e+00,0.000000e+00,8.099001e+04,1.174687e+05,6.076780e+05,...,1.720467e+06,6.188757e+05,1.340219e+05,1.399204e+06,2.084837e+05,3.028815e+04,8.583925e+04,3.680482e+04,0.000000e+00,0.000000e+00
ENC,RUR,SA/SU,0-2,0.990881,MIDW,904.0,5.914655e+08,1.569396e+06,5.796793e+04,4.563162e+05,4.788266e+06,3.044293e+06,6.471778e+06,...,5.156601e+07,4.434974e+07,3.476513e+07,3.000566e+07,2.724543e+07,1.861133e+07,1.011332e+07,1.074417e+07,7.907654e+06,3.448187e+06
ENC,RUR,SA/SU,2-5,3.264059,MIDW,522.0,3.314264e+08,9.562455e+05,1.508814e+05,0.000000e+00,0.000000e+00,1.955736e+06,1.485769e+06,...,2.429265e+07,2.404732e+07,2.252215e+07,2.146486e+07,1.950209e+07,9.919507e+06,5.447646e+06,7.106126e+06,3.825010e+06,2.806811e+06


In [221]:
hourly_profiles_agg['TRIPPCT'].unstack('STRTHOUR').T.mul(dist_hists.loc[reg,'COUNTSWTD']).fillna(0.).T.loc[dist_hour_hists.loc[reg].index]

Unnamed: 0_level_0,Unnamed: 1_level_0,STRTHOUR,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
URBRURS,WKTIME,MILEBIN,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
RUR,MO/FR,0-2,229905.2,611503.8,20083.35,305560.0,223750.3,2053456.0,5920814.0,18638110.0,15301140.0,14103950.0,...,17544770.0,23261900.0,19155380.0,15683840.0,11290500.0,5645197.0,4640361.0,3680929.0,1716190.0,1323398.0
RUR,MO/FR,2-5,279655.8,95063.99,0.0,19787.62,833146.3,1759966.0,6400198.0,16763500.0,6423790.0,8428140.0,...,12274680.0,14916960.0,10192570.0,10336240.0,6703577.0,4881793.0,3773375.0,2120426.0,2039310.0,430484.5
RUR,MO/FR,5-10,400720.0,37413.48,23215.52,41425.31,797631.7,909345.9,4834290.0,7757952.0,3840130.0,4489716.0,...,5777735.0,5635466.0,6448981.0,5602900.0,3784100.0,2104655.0,1038882.0,574383.1,1021410.0,513683.1
RUR,MO/FR,10-20,354678.5,15255.29,19128.57,3138.189,372612.2,1767436.0,3882346.0,5541945.0,2793676.0,2959141.0,...,4274303.0,5920434.0,7434579.0,4572846.0,1968326.0,1043487.0,1049747.0,2134453.0,623815.0,798050.6
RUR,MO/FR,20-30,264899.7,14537.75,6044.997,50095.78,351864.7,581472.7,2177757.0,2818514.0,677095.3,958942.1,...,1230378.0,1571403.0,2668576.0,2562443.0,1154126.0,571929.2,432476.3,472465.3,351335.9,374335.2
RUR,MO/FR,30-50,33695.14,62042.76,5049.754,6069.024,476636.8,998015.0,4085103.0,1504854.0,1011417.0,1863159.0,...,1138711.0,2488971.0,3272032.0,1914333.0,1139686.0,699042.8,669028.7,354403.0,231453.9,83885.7
RUR,MO/FR,50-100,495217.7,42841.44,9873.138,10417.4,487652.0,498581.5,1204747.0,836763.5,1904253.0,1733572.0,...,1582205.0,2523165.0,2065597.0,1051516.0,649239.7,945467.3,34934.95,114959.8,142437.3,42370.09
RUR,MO/FR,100-300,47291.44,0.0,0.0,52014.11,75441.74,390268.2,447517.3,578233.0,586414.2,1048459.0,...,1104933.0,397459.7,86072.7,898608.7,133894.2,19451.92,55128.43,23637.11,0.0,0.0
RUR,SA/SU,0-2,433495.6,16011.79,126042.8,1322606.0,840888.9,1787622.0,2004889.0,5569748.0,9485593.0,14399650.0,...,14243470.0,12250200.0,9602759.0,8288107.0,7525682.0,5140786.0,2793483.0,2967734.0,2184237.0,952451.7
RUR,SA/SU,2-5,231456.9,36520.47,0.0,0.0,473381.0,359626.6,2715058.0,2489893.0,5461080.0,6397814.0,...,5879976.0,5820593.0,5451430.0,5195516.0,4720432.0,2400992.0,1318589.0,1720020.0,925834.2,679381.6


In [222]:
#double-check avg mileage again
dist_hour_hists[range(24)].mul(dist_hour_hists['AVGDIST'], axis=0).sum(axis=1).sum()/dist_hour_hists[range(24)].sum(axis=1).sum()


8.907404971826649

In [223]:
dist_hour_hists[range(24)].sum(axis=1)

CDIVLS  URBRURS  WKTIME    MILEBIN
ENC     RUR      MO/FR     0-2        7.365273e+08
                           2-5        3.772505e+08
                           5-10       2.685436e+08
                           10-20      2.740703e+08
                           20-30      1.134880e+08
                           30-50      7.924429e+07
                           50-100     3.309224e+07
                           100-300    1.288694e+07
                 SA/SU     0-2        5.914655e+08
                           2-5        3.314264e+08
                           5-10       2.181214e+08
                           10-20      2.276380e+08
                           20-30      9.867586e+07
                           30-50      7.459379e+07
                           50-100     4.167240e+07
                           100-300    1.720882e+07
                 TU/WE/TH  0-2        8.716086e+08
                           2-5        5.865779e+08
                           5-10       4.749664e

In [224]:
regions = dist_hour_hists.index.levels[0]
len(regions)
colors=['forestgreen','limegreen','gray', 'orange', 'goldenrod','darkblue','dodgerblue','magenta','rebeccapurple','plum',
       'red','firebrick','darksalmon']
plot_dists_by_region(dist_hour_hists[range(24)].T,regions=regions,colors=colors, urbrur='URB',wktime='TU/WE/TH')

<IPython.core.display.Javascript object>

('ENC', 'URB', 'TU/WE/TH')
('ESC', 'URB', 'TU/WE/TH')
('MAT-NL', 'URB', 'TU/WE/TH')
('MAT-NY', 'URB', 'TU/WE/TH')
('MTN', 'URB', 'TU/WE/TH')
('NENG', 'URB', 'TU/WE/TH')
('PAC-CA', 'URB', 'TU/WE/TH')
('PAC-NL', 'URB', 'TU/WE/TH')
('SAT-FL', 'URB', 'TU/WE/TH')
('SAT-NL', 'URB', 'TU/WE/TH')
('WNC', 'URB', 'TU/WE/TH')
('WSC-NL', 'URB', 'TU/WE/TH')
('WSC-TX', 'URB', 'TU/WE/TH')


In [225]:
if include_transit:
    tag = 'with_transit'
else:
    tag='no_transit'
hourly_profiles_agg.to_csv(data_dir+'\\binned_dists\\hourly_profiles_urb_rur_'+tag+'.csv')
dist_hists.drop('REGION', axis=1).to_csv(data_dir+'\\binned_dists\\dist_hists_by_region_'+tag+'.csv') #drop region to avoid confusion

In [226]:
hourly_profiles_agg

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,COUNTSRAW,COUNTSWTD,TRIPPCT
URBRURS,WKTIME,MILEBIN,STRTHOUR,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RUR,MO/FR,0-2,0,6.0,3.686144e+06,0.000966
RUR,MO/FR,0-2,1,8.0,9.804437e+06,0.002571
RUR,MO/FR,0-2,2,1.0,3.220029e+05,0.000084
RUR,MO/FR,0-2,3,1.0,4.899141e+06,0.001285
RUR,MO/FR,0-2,4,29.0,3.587461e+06,0.000941
RUR,MO/FR,0-2,5,88.0,3.292373e+07,0.008632
RUR,MO/FR,0-2,6,232.0,9.493033e+07,0.024890
RUR,MO/FR,0-2,7,680.0,2.988308e+08,0.078351
RUR,MO/FR,0-2,8,546.0,2.453281e+08,0.064323
RUR,MO/FR,0-2,9,709.0,2.261331e+08,0.059290


## Compute seasonal variation

In [227]:
trippub['MONTH'] = trippub['TDAYDATE'].astype(str).str.slice(-2).astype(int)
trippub['SEASON'] = 'DEC-FEB'
trippub.loc[trippub['MONTH'].isin([3,4,5]), 'SEASON'] = 'MAR-MAY'
trippub.loc[trippub['MONTH'].isin([6,7,8]), 'SEASON'] = 'JUN-AUG'
trippub.loc[trippub['MONTH'].isin([9,10,11]), 'SEASON'] = 'SEP-NOV'

#specify number of days in season and in weektime
trippub.loc[trippub['SEASON']=='DEC-FEB', 'SEASDAYS'] = 31+31+28
trippub.loc[trippub['SEASON']=='MAR-MAY', 'SEASDAYS'] = 31+30+31
trippub.loc[trippub['SEASON']=='JUN-AUG', 'SEASDAYS'] = 30+31+31
trippub.loc[trippub['SEASON']=='SEP-NOV', 'SEASDAYS'] = 31+30+31
trippub['WKTIMEDAYS'] = 2
trippub.loc[trippub['WKTIME']=='TU/WE/TH', 'WKTIMEDAYS'] = 3

trippub.groupby('SEASON')['WTTRDFIN'].sum()

SEASON
DEC-FEB    5.555627e+10
JUN-AUG    6.098954e+10
MAR-MAY    6.319647e+10
SEP-NOV    5.783914e+10
Name: WTTRDFIN, dtype: float64

In [228]:
#Now plot seasonal variation in trip profiles
hourly_profiles_season = trippub.groupby(['SEASON', 
                                   'URBRURS', 
                                   'WKTIME', 
                                   'MILEBIN',
                                   'STRTHOUR'])['WTTRDFIN'].agg([len, 
                                                                 np.sum]).rename(columns=
                                                                                 {'len':'COUNTSRAW',
                                                                                  'sum':'COUNTSWTD'})
hourly_profiles_season['TRIPPCT'] = 0
for row in [(a,b,c,d) 
            for a in hourly_profiles_season.index.levels[0].unique() 
            for b in hourly_profiles_season.index.levels[1].unique() 
            for c in hourly_profiles_season.index.levels[2].unique() 
            for d in hourly_profiles_season.index.levels[3].unique()]:
    pct=hourly_profiles_season.loc[row,'COUNTSWTD']/(hourly_profiles_season.loc[row,'COUNTSWTD'].sum(skipna=True)+1.e-10)
    for h in pct.index.values: 
        hourly_profiles_season.loc[row+(h,), 'TRIPPCT'] = pct[h]
plotdata = hourly_profiles_season.unstack('STRTHOUR', fill_value=0)['TRIPPCT'].T
alldata = hourly_profiles_agg.unstack('STRTHOUR', fill_value=0)['TRIPPCT'].T


In [229]:
plot_dists_by_region(plotdata, regions=['DEC-FEB','MAR-MAY','JUN-AUG','SEP-NOV'], urbrur='RUR', wktime='TU/WE/TH', alldata=alldata)

<IPython.core.display.Javascript object>

('DEC-FEB', 'RUR', 'TU/WE/TH')
('MAR-MAY', 'RUR', 'TU/WE/TH')
('JUN-AUG', 'RUR', 'TU/WE/TH')
('SEP-NOV', 'RUR', 'TU/WE/TH')


In [230]:
#Compute total trips per day in mileage bins by season
daily_trips_season = trippub.groupby(['SEASON',#'CDIVLS', 
                                   'URBRURS', 
                                   #'WKTIME', 
                                   'MILEBIN'])['WTTRDFIN'].agg([len, 
                                                                 np.sum]).rename(columns=
                                                                                 {'len':'COUNTSRAW',
                                                                                  'sum':'COUNTSWTD'})
days = trippub.groupby(['SEASON',#'CDIVLS', 
                                   'URBRURS', 
                                   #'WKTIME', 
                                   'MILEBIN'])['SEASDAYS','WKTIMEDAYS'].first()
daily_trips_season['DAILYTRIPS'] = daily_trips_season['COUNTSWTD'] * 1/days['SEASDAYS']#7/days['WKTIMEDAYS']/days['SEASDAYS']
daily_trips_season

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,COUNTSRAW,COUNTSWTD,DAILYTRIPS
SEASON,URBRURS,MILEBIN,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DEC-FEB,RUR,0-2,8349.0,2.718310e+09,3.020344e+07
DEC-FEB,RUR,2-5,5465.0,1.737677e+09,1.930752e+07
DEC-FEB,RUR,5-10,4011.0,1.230154e+09,1.366838e+07
DEC-FEB,RUR,10-20,3101.0,1.046188e+09,1.162431e+07
DEC-FEB,RUR,20-30,1236.0,4.373098e+08,4.858997e+06
DEC-FEB,RUR,30-50,1025.0,3.687831e+08,4.097590e+06
DEC-FEB,RUR,50-100,601.0,2.392821e+08,2.658691e+06
DEC-FEB,RUR,100-300,217.0,6.579422e+07,7.310469e+05
DEC-FEB,URB,0-2,39679.0,1.323470e+10,1.470522e+08
DEC-FEB,URB,2-5,41599.0,1.422500e+10,1.580555e+08


In [231]:
#Also compute daily average trips over the whole year
daily_trips_year = trippub.groupby([#'CDIVLS',
                                    'URBRURS', 
                                   #'WKTIME', 
                                   'MILEBIN'])['WTTRDFIN'].agg([len, 
                                                                 np.sum]).rename(columns=
                                                                                 {'len':'COUNTSRAW',
                                                                                  'sum':'COUNTSWTD'})
#days = trippub.groupby([#'CDIVLS',
#                        'URBRURS', 'WKTIME', 
#                                   'MILEBIN'])['SEASDAYS', 'WKTIMEDAYS'].first()
daily_trips_year['DAILYTRIPS'] = daily_trips_year['COUNTSWTD'] * 1/365#7/days['WKTIMEDAYS']/365
daily_trips_year

Unnamed: 0_level_0,Unnamed: 1_level_0,COUNTSRAW,COUNTSWTD,DAILYTRIPS
URBRURS,MILEBIN,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RUR,0-2,31673.0,11638800000.0,31887140.0
RUR,2-5,21647.0,7527446000.0,20623140.0
RUR,5-10,15817.0,5370307000.0,14713170.0
RUR,10-20,12533.0,4556484000.0,12483520.0
RUR,20-30,4840.0,1820890000.0,4988739.0
RUR,30-50,4000.0,1425374000.0,3905133.0
RUR,50-100,2345.0,884968600.0,2424572.0
RUR,100-300,928.0,332981200.0,912277.3
URB,0-2,149651.0,55336640000.0,151607200.0
URB,2-5,158398.0,59855860000.0,163988700.0


In [232]:
#Divide to get multipliers
daily_trips_season['TRIPSCALESEASON'] = 1
for s in trippub['SEASON'].unique():
    daily_trips_season.loc[s, 'TRIPSCALESEASON'] = (
        daily_trips_season.loc[s,'DAILYTRIPS'].div(daily_trips_year['DAILYTRIPS']).loc[daily_trips_season.loc[s].index]).values
    
daily_trips_season

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,COUNTSRAW,COUNTSWTD,DAILYTRIPS,TRIPSCALESEASON
SEASON,URBRURS,MILEBIN,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
DEC-FEB,RUR,0-2,8349.0,2.718310e+09,3.020344e+07,0.947198
DEC-FEB,RUR,2-5,5465.0,1.737677e+09,1.930752e+07,0.936207
DEC-FEB,RUR,5-10,4011.0,1.230154e+09,1.366838e+07,0.928989
DEC-FEB,RUR,10-20,3101.0,1.046188e+09,1.162431e+07,0.931173
DEC-FEB,RUR,20-30,1236.0,4.373098e+08,4.858997e+06,0.973993
DEC-FEB,RUR,30-50,1025.0,3.687831e+08,4.097590e+06,1.049283
DEC-FEB,RUR,50-100,601.0,2.392821e+08,2.658691e+06,1.096561
DEC-FEB,RUR,100-300,217.0,6.579422e+07,7.310469e+05,0.801343
DEC-FEB,URB,0-2,39679.0,1.323470e+10,1.470522e+08,0.969955
DEC-FEB,URB,2-5,41599.0,1.422500e+10,1.580555e+08,0.963820


In [233]:
daily_trips_season.reorder_levels(['URBRURS',#'WKTIME',
                                   'MILEBIN','SEASON']).loc['RUR',#'TU/WE/TH', 
                                                            '10-20']['TRIPSCALESEASON'].plot.bar()

  This is separate from the ipykernel package so we can avoid doing imports until


<matplotlib.axes._subplots.AxesSubplot at 0x38435160>

In [234]:

#Mapping to use for sorting bars by season below
seasons = ['DEC-FEB','MAR-MAY','JUN-AUG','SEP-NOV']
mapping = {seas:i for i,seas in enumerate(seasons)}

plotdata = daily_trips_season.reorder_levels(['URBRURS',#'WKTIME',
                                              'MILEBIN','SEASON'])
ur = 'RUR'
#wt = 'TU/WE/TH'
mibins = trippub['MILEBIN'].unique().sort_values()

fig = plt.figure()
gs = gridspec.GridSpec(nrows=4, ncols=2, bottom=0.2)

for i in range(len(mibins[:-1])):
    fig.add_subplot(gs[i/2, i%2])

axs = fig.axes   
#if not colors:
colors=['forestgreen','red','dodgerblue','purple']

#plotdata.loc[ur,'0-2']
for i, milebin in enumerate(mibins):
    if type(milebin)!=type(''):
        continue
    ax=axs[i]
    pd = plotdata.loc[ur,#wt,
                      milebin]
    pd.loc[pd.index[np.argsort(pd.index.map(mapping))],'TRIPSCALESEASON'].plot.bar(ax=ax, color=colors)#subplots=True, ax=axs)
    
    miles = milebin.split('-')
    ax.annotate(str(miles[0])+'-'+str(miles[1])+' mi.', [0.03,0.8], xycoords='axes fraction')
    ax.set_ylim([0.6,1.4])
    if i <= 5:
        ax.set_xlabel('')
        ax.set_xticklabels('')
            
    if i % 2 == 0:
        ax.set_ylabel('Relative trips')

<IPython.core.display.Javascript object>



In [235]:
daily_trips_season.to_csv(data_dir+'\\binned_dists\\seasonal_scaling_'+tag+'.csv')