###### Tour formation code - extract home based tours and work based sub tours from trip file
###### Create following (major) additional variables for both types of tours
###### 1. tour id
###### 2. number of trips that constitite the tour
###### 3. concatenated trip ids that constitite the tour
###### 3. tour purpose
###### 4. primary activity dwell time
###### 5. stop count by purpose
###### 6. tour occupancy type (fully joint or partially joint or individual) 
###### 7. tour occupant list (list of the member ids)
###### 8. member list and mandatory tour id of the escort for each 'half tour' of home based school tours


###### Originally created by Annesha Enam, 2016
###### Modified by Jingyue Zhang for California Household Travel Survey, 2017

In [None]:
# Read file from pickle
from pandas import *
import numpy as np
from collections import Counter
import csv
import matplotlib.pyplot as plt
from savReaderWriter import * 
from pickle import *

#### survey_trip1

In [None]:
'''Read the data from text file and convert in into a dataframe'''
myTrip=read_pickle('C:/Users/jiz13007/Documents/CAMMSE/California House Travel Survey/code/pickle data/Full Survey Trip/survey_trip1.pkl')
print myTrip.head()
print myTrip.shape
# Sort the trip file
myTrip.sort_values(['sampno','perno','tripno'], ascending=[True, True, True], inplace=True)
myTrip.reset_index(drop=True,inplace = True)

In [None]:
# 01. IDENTIFY HOME BASED TOURS

def extractTour (group):  # Grouped by hhid, personid
    '''Extracts home_based tours
       Assign a person specific unique ID'''
    
    global o_pur, d_pur, what_to_extract
    
    o_purpose = group[o_pur].tolist()  # list of origin purpose
    d_purpose= group[d_pur].tolist()  # list of destination purpose
    
    tour = 0
    o_purpose_id = 0
    d_purpose_id = 0
    tour_id = [tour for i in xrange(len(o_purpose))] #start with zero tours
    flag1 = 0
    flag2 = 0
    flag3 = 0
    
    while (d_purpose_id < len(d_purpose)):
        try:  # Catch origin
            o_purpose_id = o_purpose.index(what_to_extract)  # Try to find the origin home              
            
            if (o_purpose_id > 0):  #if the first trip does not start from home, update d_purpose and o_purpose
                d_purpose[:o_purpose_id] = [-999 for i in range(0,o_purpose_id)]  # update d_purpose
                o_purpose[:o_purpose_id] = [-999 for i in range(0,o_purpose_id)] # update o_purpose
                
                if ((tour == 0)&(flag1==0)):
                    flag1 = 1   # First trip does not start at home	
                    
            try:  # Catch destination
                d_purpose_id = d_purpose.index(what_to_extract)  # Found a tour, d_purpose_id should always be greater than o_purpose_id
                    
                if (d_purpose_id > o_purpose_id): # This check is done to avoid 1 leg tours   
                    tour += 1 
                    tour_id[o_purpose_id:d_purpose_id+1] = [tour for i in range(o_purpose_id,d_purpose_id+1)] # Assign tour id
                    
                o_purpose[o_purpose_id:d_purpose_id+1] = [-999 for i in range(o_purpose_id,d_purpose_id+1)] # update o_purpose
                d_purpose[o_purpose_id:d_purpose_id+1]  = [-999 for i in range(o_purpose_id,d_purpose_id+1)] # update d_purpose
                    
            except:
                flag2 = 1 # Return home trip not found
                break                          
        
        except:
            if (tour == 0):
                flag3 = 1  # No trip starts from home, no trips were extracted for this person
            break

    '''Add new columns'''		
    group.loc[:,'Flag1'] = flag1
    group.loc[:,'Flag2'] = flag2
    group.loc[:,'Flag3'] = flag3
    group.loc[:,'Tour_ID'] = tour_id 
    return group

print 'Done1'

In [None]:
 # H1. Call to the extractTour function - purpose is to identify home based tours
o_pur = 'o_pur'
d_pur = 'd_pur'
what_to_extract = -9  # This code is for home based tour extraction
    
result = myTrip.groupby(['sampno','perno'],group_keys=False).apply(extractTour)  # Call to the function
myTrip['Tour_ID'] = result.Tour_ID
myTrip['Flag1'] = result.Flag1  # First trip does not start at home
myTrip['Flag2'] = result.Flag2  # Return home trip not found
myTrip['Flag3'] = result.Flag3  # No trip starts at home
print myTrip['Flag1'].value_counts()
print myTrip['Flag2'].value_counts()
print myTrip['Flag3'].value_counts()
del result, o_pur, d_pur, what_to_extract
print "DONE!!! Home based tour extraction"
    

In [None]:
print myTrip.loc[myTrip['Flag1']==1,('sampno','perno','tripno','o_pur','d_pur')].head()
print myTrip.loc[myTrip['Flag2']==1,('sampno','perno','tripno','o_pur','d_pur')].head(10)
print myTrip.loc[myTrip['Flag3']==1,('sampno','perno','tripno','o_pur','d_pur')].head()

In [None]:
print myTrip['Flag1'].value_counts().sort_index()
print myTrip['Flag2'].value_counts().sort_index()
print myTrip['Flag3'].value_counts().sort_index()
print myTrip['Tour_ID'].value_counts().sort_index()

In [None]:
# 02. IDENTIFY WORK BASED SUB-TOURS
def extractTour2 (group):  # Grouped by hhid, personid, Tour_ID
    '''Extracts work based subtours within home-based tours
       Only identifies work based subtours at the primary work place (i.e. from d_purpose_recode_2_Linked == 2 to d_purpose_recode_2_Linked == 2)
       Assign a person specific unique ID
       
       Also identify the id of the home based tour of which the work based tour is a part of'''
    
    global o_pur, d_pur, what_to_extract, hbTourId, what_var_update, hb_tour_id
    
    o_purpose = group[o_pur].tolist()  # list of origin purpose
    d_purpose= group[d_pur].tolist()  # list of destination purpose
    hbTour = group[hbTourId].tolist()[0]  # This is the home based tour_id, unique for each individual
    hbid = group[hb_tour_id].tolist()[0]  # This is the home based tour id, created by joining hbTour with personid, would contain the same value for the full group
    
    sze = len(group[hbTourId].tolist()) # This is the size of the group
    
    tour = 0
    o_purpose_id = 0
    d_purpose_id = 0
    tour_id = [tour for i in xrange(sze)] #start with zero tours
    hb_tour_id_list = [-99 for i in xrange(sze)]  # All hb_tour_id would be -99
    flag4 = 0
    flag5 = 0
    
    if (hbTour>0):  # Do this only if this is a part of valid home-based tour
        while (d_purpose_id < len(d_purpose)):
            try:  # Catch origin
                o_purpose_id = o_purpose.index(what_to_extract)  # Try to find the origin work              
            
                if (o_purpose_id > 0):  #if the first trip does not start from work, update d_purpose and o_purpose
                    d_purpose[:o_purpose_id] = [-999 for i in range(0,o_purpose_id)]  # update d_purpose
                    o_purpose[:o_purpose_id] = [-999 for i in range(0,o_purpose_id)] # update o_purpose
                                    
                try:  # Catch destination
                    d_purpose_id = d_purpose.index(what_to_extract)  # Found a tour, d_purpose_id should always be greater than o_purpose_id
                    
                    if (d_purpose_id > o_purpose_id): # This check is done to avoid 1 leg tours   
                        tour += 1 
                        tour_id[o_purpose_id:d_purpose_id+1] = [(tour+20) for i in range(o_purpose_id,d_purpose_id+1)] # Assign tour id
                        hb_tour_id_list[o_purpose_id:d_purpose_id+1] = [hbid for i in range(o_purpose_id,d_purpose_id+1)]  
                        
                    o_purpose[o_purpose_id:d_purpose_id+1] = [-999 for i in range(o_purpose_id,d_purpose_id+1)] # update o_purpose
                    d_purpose[o_purpose_id:d_purpose_id+1]  = [-999 for i in range(o_purpose_id,d_purpose_id+1)] # update d_purpose
                    
                except:
                    flag4 = 1 # Return work trip not found
                    break                          
        
            except:
                if (tour == 0):
                    flag5 = 1  # No trip starts from primary work, no trips were extracted for this person
                break

    '''Add new columns'''		
    group.loc[:,'Flag4'] = flag4
    group.loc[:,'Flag5'] = flag5
    group.loc[:,'WB_Tour_ID'] = tour_id 
    group.loc[:,what_var_update] = hb_tour_id_list 
    return group

print 'Done2'

In [None]:
# H3. Create home based tour id - concatenate personid with the Tour_ID
myTrip['home_based_tourid'] = [(str(int(a))+'20'+str(int(b))) for a,b in zip(myTrip.perid.tolist(),myTrip.Tour_ID.tolist())]
print myTrip[['sampno','perno','trpid','Tour_ID','home_based_tourid']].head()

In [None]:
# W1. Call to the extractTour2 function - purpose is to identify work based sub-tours within home based tours 
o_pur = 'o_pur'
d_pur = 'd_pur'
hbTourId = 'Tour_ID'   # This is the unique id for each individual
what_to_extract = 2  # This code is for work based sub-tour extraction
what_var_update = 'which_home_based_tour' # This variable will be updated for all work_based sub tours
hb_tour_id = 'home_based_tourid'          # what_var_update will be updated with home_based_tourid
    
result = myTrip.groupby(['sampno','perno','Tour_ID'],group_keys=False).apply(extractTour2)  # Call to the function
myTrip['WB_Tour_ID'] = result.WB_Tour_ID
myTrip['Flag4'] = result.Flag4  
myTrip['Flag5'] = result.Flag5
myTrip[what_var_update] = result[what_var_update]  # Home based tour id 
del result, o_pur, d_pur, what_to_extract, hbTourId, what_var_update, hb_tour_id
print "DONE!!! Work based tour extraction" 

In [None]:
print myTrip['WB_Tour_ID'].value_counts().sort_index()
print myTrip['Flag4'].value_counts().sort_index()
print myTrip['Flag5'].value_counts().sort_index()

In [None]:
print myTrip.loc[myTrip['WB_Tour_ID']!=0,('sampno','perno','home_based_tourid','WB_Tour_ID','which_home_based_tour','o_pur','d_pur')].head(10)
print myTrip.loc[myTrip['Flag4']==1,('sampno','perno','tripno','o_pur','d_pur')].head()
print myTrip.loc[myTrip['Flag5']==1,('sampno','perno','tripno','o_pur','d_pur')].head()

In [None]:
# 03 Modify work based tour id

def modWorkId(group): # grouped by hhid and personid
    # purpose of the function is to modify the work id for the people
    # wid='WB_Tour_ID'
    # hid = 'Tour_ID'
    # var_to_update = 'WB_Tour_ID_New'
    
    global wid, hid, var_to_update, count_wb
    
    wid_list = group[wid].tolist()
    hid_max = max(group[hid].tolist())
    hbid = group[hid].tolist()
    
    wb_list_new = []
    
    # taking care of the initial zeros
    if (hbid.count(0)>0):  # if contains trips that are not part of tours
        value = 0
        for val in hbid:
            if ((val==0)&(value==0)):
                wb_list_new.append(0)
            
            elif ((val>0)&(value==0)):
                value = val
                break
                
            elif ((val==0)&(value>0)):
                print 'check'
    
    max_wid = 0
    if (sum(wid_list)>0):
        count_wb = count_wb + 1
        for i in xrange(hid_max):  # enumerate the household ids
            group_sub = group[group[hid]==(i+1)]  # take each home based tours
            wb_list_sub = group_sub[wid].tolist()
            
            if ((sum(wb_list_sub)>0)&(max_wid==0)):  # This is for the work based sub tours that belong to the first home based tours
                max_wid = max(wb_list_sub)
                wb_list_new.extend(wb_list_sub)
                
            else:  # if max_wid is not equal to zero
                wb_list_sub = group_sub[wid].tolist()
                min_id_01 = filter(lambda x: x != 0, wb_list_sub) 
                
                if (len(min_id_01)>0):
                    min_id = min(min_id_01)
                    diff = (min_id-max_wid)+1; print 'difference: ', diff
                    
                    wb_to_update = [0 if a == 0 else (a+diff) for a in wb_list_sub]
                    max_wid = max(wb_to_update)
                    wb_list_new.extend(wb_to_update)
                
                else:  # no work based sub tours, nothing to update
                    wb_list_new.extend(wb_list_sub)
                
        #print 'hid: ', group[hid].tolist(), 'old work list: ', wid_list, 'new work list: ', wb_list_new     
        if (max(wb_list_new)>max(wid_list)):
            print group[hid].tolist(), wid_list, wb_list_new 
        
    else: # if there is not work based sub tours
        wb_list_new = list(wid_list)
            
    # Taking care of the last zeros in the hb tour id        
    if (len(wid_list)>len(wb_list_new)):
        diff_last = len(wid_list) - len(wb_list_new); #print 'diff_last: ', diff_last
        diff_list = [0 for x in xrange(diff_last)]
        wb_list_new.extend(diff_list)
    
    if (len(wid_list)!=len(wb_list_new)):
        print 'issue: ', wb_list_new, wid_list
    
    group.loc[:,var_to_update] = wb_list_new
    
    return group
                
print 'DONE 3'

In [None]:
# W2. Next step is to correct the generated work based tour id, so that the id is unique for person and not for home based tour
wid = 'WB_Tour_ID'
hid = 'Tour_ID'
var_to_update = 'WB_Tour_ID_New'

count_wb = 0
result = myTrip.groupby(['sampno','perno'], group_keys = False).apply(modWorkId)
myTrip[var_to_update] = result[var_to_update]
print 'DONE!!! updated work based ID'
print 'how many pesons have work based sub tours: ', count_wb
del result, wid, hid, var_to_update, count_wb


# W3.  Create work based tour id - concatenate personid with the Tour_ID
myTrip['work_based_tourid'] = [(str(int(a))+'2'+str(int(b))) for a,b in zip(myTrip.perid.tolist(),myTrip.WB_Tour_ID_New.tolist())]
    

In [None]:
print myTrip.loc[myTrip['WB_Tour_ID_New']>21,('sampno','perno','home_based_tourid','WB_Tour_ID','WB_Tour_ID_New','work_based_tourid','which_home_based_tour','o_pur','d_pur')].head(10)

In [None]:
# 04. Modify the home based tour id, so that the trips that are part of the wb sub-tour are taken out

def modHbTrID(group): # grouped by hhid, personid and Tour_ID
    # modify the home based tour id; i.e. take out the trips that belong to the work based sub-tour
    # which_tour = 'Tour_ID'
    # sub_tour = 'WB_Tour_ID_New'
    # var_to_update = 'Tour_ID_New'
    global which_tour, sub_tour, var_to_update
    
    tid = group[which_tour].tolist()  # this is the home based tourid
    stid = group[sub_tour].tolist()  # this is the work based sub tourid
    
    new_id = [a if b==0 else 0 for a,b in zip(tid, stid)]
    
    group.loc[:,var_to_update] = new_id
    
    return group

print 'DONE 4'

In [None]:
# H2. Modify the home based tour id based on work-based tours, this id is the final for home based tours
which_tour = 'Tour_ID'
sub_tour = 'WB_Tour_ID_New'
var_to_update = 'Tour_ID_New'
    
result = myTrip.groupby(['sampno','perno',which_tour],group_keys=False).apply(modHbTrID)  # Call to the function
myTrip[var_to_update] = result[var_to_update]
del result, which_tour, sub_tour, var_to_update
print "DONE!!! modify home based tour id"

In [None]:
print myTrip.loc[myTrip['Tour_ID_New']!=myTrip['Tour_ID'],('sampno','perno','home_based_tourid','Tour_ID','Tour_ID_New','WB_Tour_ID_New','work_based_tourid','which_home_based_tour','o_pur','d_pur')].head(10)

In [None]:
# 05. Modify the home based tour id, so that the trips that are part of the wb sub-tour are take out
# This id is created for the creation of the home based tour purpose related variables
# Ths last trip of any work based sub tour id is identified as a trip within home based consideration

def modHbTrID02(group): # grouped by hhid, personid and Tour_ID
    # modify the home based tour id; i.e. take out the trips that belong to the work based sub-tour
    # which_tour = 'Tour_ID'
    # sub_tour = 'WB_Tour_ID_New'
    # var_to_update = 'Tour_ID_New_purpose'
   
    global which_tour, sub_tour, var_to_update
    
    sze = len(group[which_tour].tolist())  # this is the size of the group
    tid = group[which_tour].tolist()  # this is the home based tourid
    stid = group[sub_tour].tolist()  # this is the work based sub tourid
    stid2 = list(stid)
    
    # purpose is to identify the last trip of the work based sub tour
    start_id = 0
    
    for i in xrange(sze):
        if (start_id==0):  # got the 1st trip
            start_id = stid[i]
            
        elif (start_id==stid[i]):  # same work based tour continuing
            pass
        
        elif ((stid[i]==0)|(stid[i]>start_id)):  # got the 1st trip
            index = (i-1)
                
            if (index>0):
                stid2[index] = 0
            else:
                print 'issue!!!'
                
            start_id = stid[i]
                
    '''if (sum(stid)>0):
        print 'stid: ', stid, 'stid2: ', stid2'''
    
    new_id = [a if b==0 else 0 for a,b in zip(tid, stid2)]
    
    group.loc[:,var_to_update] = new_id
    
    return group

print 'DONE 5'

In [None]:
# H3. Update home based tour id - concatenate personid with the Tour_ID_New 
myTrip['home_based_tourid_updated'] = [(str(int(a))+'20'+str(int(b))) for a,b in zip(myTrip.perid.tolist(),myTrip.Tour_ID_New.tolist())]
    
# H4. Modify the home based tour id based on work-based tours - for use in the creation of purpose,& not for anything else
which_tour = 'Tour_ID'
sub_tour = 'WB_Tour_ID_New'
var_to_update = 'Tour_ID_New_purpose'
    
result = myTrip.groupby(['sampno','perid',which_tour],group_keys=False).apply(modHbTrID02)  # Call to the function
myTrip[var_to_update] = result[var_to_update]
del result, which_tour, sub_tour, var_to_update
print "DONE!!! modify home based tour id for purpose creation"

In [None]:
print myTrip.loc[myTrip['Tour_ID']!=myTrip['Tour_ID_New_purpose'],('sampno','perno','home_based_tourid','Tour_ID','Tour_ID_New','Tour_ID_New_purpose','WB_Tour_ID_New','work_based_tourid','which_home_based_tour','which_home_based_tour','o_pur','d_pur')].head(10)

In [None]:
# 06.  Calculate the number of trips that constitute the tour
def calTrp(group): # Grouped by hhid, personid and Tour_ID or WB_Tour_ID
    
    '''Purpose of this function is to calculate the number of trips within each tour
       For non-valid tours a value of -99 is used'''
    
    # which_tour = 'Tour_ID_New'  # Identify which tour is treated, home_based or work_based
    # which_tour_update = 'HB_trip_count'
    
    global which_tour, which_tour_update  # Contains the name for the tour_id variable
    
    sze = len(group[which_tour].tolist())
    tid = group[which_tour].tolist()[0]  # Whole group contains same value
    
    if (tid==0):  # If the trip is not part of a tour
        trpCnt = [-99 for i in xrange(sze)]
    elif(tid>0):
        trpCnt = [sze for i in xrange(sze)]
        
    group.loc[:,which_tour_update] = trpCnt
    return group

print 'Done6'

In [None]:
# H5. Next step is to calculate the number of trips within the tour - home based tour
which_tour = 'Tour_ID_New'  # Identify which tour is treated, home_based or work_based
which_tour_update = 'HB_trip_count'
    
result = myTrip.groupby(['sampno','perno',which_tour],group_keys=False).apply(calTrp)  # Call to the function
myTrip[which_tour_update] = result[which_tour_update]
del result, which_tour, which_tour_update
print "DONE!!! Trip calculation home based"
    
# W4. Next step is to calculate the number of trips within the tour - work based tour
which_tour = 'WB_Tour_ID_New'  # Identify which tour is treated, home_based or work_based
which_tour_update = 'WB_trip_count'
    
result = myTrip.groupby(['sampno','perno',which_tour],group_keys=False).apply(calTrp)  # Call to the function
myTrip[which_tour_update] = result[which_tour_update]
del result, which_tour, which_tour_update
print "DONE!!! Trip calculation work based"

In [None]:
print myTrip[['sampno','perno','tripno','Tour_ID_New','HB_trip_count','o_pur','d_pur']].head(10)
print myTrip.loc[myTrip['HB_trip_count']==29,('sampno','perno','tripno','Tour_ID_New','HB_trip_count','o_pur','d_pur')]
print myTrip['HB_trip_count'].value_counts()
print myTrip.loc[myTrip['WB_Tour_ID_New']!=0,('sampno','perno','tripno','Tour_ID_New','WB_trip_count','o_pur','d_pur')].head(10)
print myTrip['WB_trip_count'].value_counts()
print myTrip.loc[myTrip['WB_trip_count']==15,('sampno','perno','tripno','Tour_ID_New','WB_trip_count','o_pur','d_pur')]

### Calculate the number of trips that constitute the tour without the trips made for changing mode

In [None]:
def calTrp1(group): # Grouped by hhid, personid and Tour_ID or WB_Tour_ID
    
    '''Purpose of this function is to calculate the number of trips within each tour
       For non-valid tours a value of -99 is used'''
    
    # which_tour = 'Tour_ID_New'  # Identify which tour is treated, home_based or work_based
    # which_tour_update = 'HB_trip_count'
    
    global which_tour, which_tour_update  # Contains the name for the tour_id variable
    
    sze = len(group[which_tour].tolist())
    tid = group[which_tour].tolist()[0]  # Whole group contains same value
    dpur= group.d_pur.tolist()
    ModeCnt=0
    
    if (tid==0):  # If the trip is not part of a tour
        Mode = [-99 for i in xrange(sze)]
    elif(tid>0):
        ModeCnt=dpur.count(9)
        Mode = [ModeCnt for i in xrange(sze)]
        
    group.loc[:,which_tour_update] = Mode
    return group

print 'Done6.1'

In [None]:
# H5. Next step is to calculate the number of trips within the tour - home based tour
which_tour = 'Tour_ID_New'  # Identify which tour is treated, home_based or work_based
which_tour_update = 'HB_change_mode_trip_count'
    
result = myTrip.groupby(['sampno','perno',which_tour],group_keys=False).apply(calTrp1)  # Call to the function
myTrip[which_tour_update] = result[which_tour_update]
del result, which_tour, which_tour_update
print "DONE!!! Trip calculation home based"
    
# W4. Next step is to calculate the number of trips within the tour - work based tour
which_tour = 'WB_Tour_ID_New'  # Identify which tour is treated, home_based or work_based
which_tour_update = 'WB_change_mode_trip_count'
    
result = myTrip.groupby(['sampno','perno',which_tour],group_keys=False).apply(calTrp1)  # Call to the function
myTrip[which_tour_update] = result[which_tour_update]
del result, which_tour, which_tour_update
print "DONE!!! Trip calculation work based"

In [None]:
print myTrip.loc[myTrip['HB_change_mode_trip_count']>0,('sampno','perno','tripno','Tour_ID_New','HB_trip_count','HB_change_mode_trip_count','o_pur','d_pur')].head(10)
print myTrip['HB_trip_count'].value_counts()
print myTrip.loc[(myTrip['WB_Tour_ID_New']!=0)&(myTrip['WB_change_mode_trip_count']>0),('sampno','perno','tripno','Tour_ID_New','WB_trip_count','WB_change_mode_trip_count','o_pur','d_pur')].head(10)
print myTrip['WB_trip_count'].value_counts()

In [None]:
print sum((myTrip['HB_trip_count']-myTrip['HB_change_mode_trip_count'])==1)
print sum((myTrip['WB_trip_count']-myTrip['WB_change_mode_trip_count'])==1)

# 07. Function to calculate activity dwell time
def dwlTme(group): # Grouped by hhid and personid
    global arvl_time, dept_time, which_var_update 
    
    arvTme = group[arvl_time].tolist()[:-1] # Taking all but the last one
    dptTme = group[dept_time].tolist()[1:] # Taking all but the first one
    
    ls = [(a-b) for a,b in zip (dptTme,arvTme)]
    ls.append(-99)  # appending -99 for the last dwel time
    
    group.loc[:,which_var_update] = ls
    
    return group

print 'Done7'

# A1. Calculate activity dwell time
    arvl_time = 'act_arr_time'   # These two variables would be used for calculating dwell time
    dept_time = 'act_dep_time'  # These two variables would be used for calculating dwell time
    which_var_update = 'Dwell_Time'
    
    result = myTrip.groupby(['hhid','personid'],group_keys=False).apply(dwlTme)  # Call to the function
    myTrip[which_var_update] = result[which_var_update]
    del result, arvl_time, dept_time, which_var_update
    print "DONE!!! Dwell time calculation"

In [None]:
 # A2. Next step is to reocde d_purpose into broader categories
o_pur = 'o_pur'
d_pur = 'd_pur'

# Recoded origin and destination purposes into broad categories
# 99 - return home, 1 - school, 2 - work, 3 - meal, 4 - shop, 5 - personal business, 6 - social recreation, 7 - escorting, 8 - other

# Recode origin purposes into broad categories
myTrip['o_pur_broad'] = (myTrip[o_pur] == -9)*99 + (myTrip[o_pur] == 1)*1 + ((myTrip[o_pur] == 2)|(myTrip[o_pur] == 3))*2 + (myTrip[o_pur] == 4)*3  + (myTrip[o_pur] ==5 )*4 + (myTrip[o_pur] ==6 )*5 + (myTrip[o_pur] ==7 )*6 + (myTrip[o_pur] ==8 )*7 + (myTrip[o_pur] ==9 )*9 + (myTrip[o_pur] ==10)*8

# Recode destination purposes into broad categories
myTrip['d_pur_broad'] = (myTrip[d_pur] == -9)*99 + (myTrip[d_pur] == 1)*1 + ((myTrip[d_pur] == 2)|(myTrip[d_pur] == 3))*2 + (myTrip[d_pur] == 4)*3  + (myTrip[d_pur] ==5 )*4 + (myTrip[d_pur] ==6 )*5 + (myTrip[d_pur] ==7 )*6 + (myTrip[d_pur] ==8 )*7 + (myTrip[d_pur] ==9 )*9 + (myTrip[d_pur] ==10)*8 

print myTrip['o_pur_broad'].value_counts().sort_index()
print myTrip['o_pur'].value_counts().sort_index()
print myTrip['d_pur_broad'].value_counts().sort_index()
print myTrip['d_pur'].value_counts().sort_index()

In [None]:
# 08. Function to determinte the tour purpose

def tourPur(group):  # Grouped by hhid, personid, Tour_ID or WB_Tour_ID
    '''This function identifies the purpose of the tour based on purpose hierarchy
       It uses activity dwell time to break tie in case of multiple activity with same purpose
       
       1. Identify the purpose of the tour
       2. Identify the trip after which the purpose is defined
       3. Identify the duration of the primary activity of the tour'''
    
    # Hierarchy used
    # 99 - return home, 1 - school, 2 - work, 3 - meal, 4 - shop, 5 - personal business, 6 - social recreation, 7 - escorting, 8 - Other, 9 - Change mode
    
    global d_pur, dwl_tme, which_tour, which_var_update_01, which_var_update_02, which_var_update_03, check
    
    sze = len(group[which_tour].tolist())  # The length of the group
    tid = group[which_tour].tolist()[0]             # The tour id, full group contains same value
    
    pur = group[d_pur].tolist()[:-1]  # Taking all but the last one, last one should always be return home/return work
    dwlTme = group[dwl_tme].tolist()[:-1] # Not taking the last one to make it compatible with pur
    
    if (tid == 0):   # If this is not a valid tour - home based or work based
        tour_pur = [-99 for i in xrange(sze)]
        tour_pur_index = [-99 for i in xrange(sze)]
        prime_act_dur = [-99 for i in xrange(sze)]
        
    elif (tid>0):  # For a valid tour - home based or work based
        minPur = min(pur)  # Purpose with the lowest value has the highest priority
        
        if (pur.count(minPur)==1):
            tour_pur = [minPur for i in xrange(sze)]                              # This will be the tour purpose
        
            tour_pur_index = [1 if (a==minPur) else 0 for a in pur]               # identify the trip with primary activity
            tour_pur_index.append(0)                                              # append 0 to make the size equal to pur
            
            prm_act_index = pur.index(minPur)                             # get the index of the primary activity
            prime_act_dur = [dwlTme[prm_act_index] for i in xrange(sze)]  # primary activity duration
                                                                          # append 0 to make size equal to pur
        
        elif (pur.count(minPur)>1):  # If primary purpose occurs more than once
            tour_pur = [minPur for i in xrange(sze)]                              # This will be the tour purpose 
        
            dwlTmeSel = [a if (b==minPur) else -99 for a,b in zip(dwlTme,pur)]    # replacing other dwell times with -99 
            maxDwlIndex = dwlTmeSel.index(max(dwlTmeSel))                         # get the index for the maximum dwell time
            
            tour_pur_index = [0 for i in xrange(sze)] # identify the trip with primary activity
            tour_pur_index[maxDwlIndex] = 1                                              
        
            prime_act_dur = [dwlTmeSel[maxDwlIndex] for i in xrange(sze)]  # primary activity duration
                                                           
            
            #check = check + 1
    # check
    #if (prime_act_dur[0] == 0):
        #print 'hid', 'd_pur', 'tour_purpose','dwel time','prime index', 'Tour Dwel time'
        #print tid, pur, tour_pur, dwlTme, tour_pur_index, prime_act_dur 
            
                
    group.loc[:,which_var_update_01] = tour_pur
    group.loc[:,which_var_update_02] = tour_pur_index
    group.loc[:,which_var_update_03] = prime_act_dur
    
    return group
        
print 'Done8'

In [None]:
# H6. Next step is to identify primary tour purpose - home based tours
d_pur = 'd_pur_broad'   # This is the variable based on which the tour purpose would be determined
dwl_tme = 'Dwell_Time'   # This is the variable based on which the tour purpose would be determined
which_tour = 'Tour_ID_New_purpose'   # For which tour, this id is used only for identifying primary activity
which_var_update_01 = 'HB_tour_purpose'  # This will carry the purpose code
which_var_update_02 = 'HB_primary_activity_id'  # This will carry the purpose code
which_var_update_03 = 'HB_primary_activity_dwell_time'  # This will carry the purpose code
    
check = 0
result = myTrip.groupby(['sampno','perno', which_tour],group_keys=False).apply(tourPur)  # Call to the function
myTrip[which_var_update_01] = result[which_var_update_01]
myTrip[which_var_update_02] = result[which_var_update_02]
myTrip[which_var_update_03] = result[which_var_update_03]
print "DONE!!! primary activity home based"
#print 'How many have multiple primary activity? ', check
del result, d_pur, dwl_tme, which_tour, which_var_update_01,which_var_update_02,which_var_update_03, check

In [None]:
print myTrip[['sampno','perno','tripno','act_arr_time','act_dep_time','Dwell_Time','act_dur','d_pur_broad','HB_tour_purpose','HB_primary_activity_id','HB_primary_activity_dwell_time']].head(10)

In [None]:
 # W6. Next step is to identify primary tour purpose - work based tours
d_pur = 'd_pur_broad'   # This is the variable based on which the tour purpose would be determined
dwl_tme = 'Dwell_Time'   # This is the variable based on which the tour purpose would be determined
which_tour = 'WB_Tour_ID_New'   # For which tour
which_var_update_01 = 'WB_tour_purpose'  # This will carry the purpose code
which_var_update_02 = 'WB_primary_activity_id'  # This will carry the purpose code
which_var_update_03 = 'WB_primary_activity_dwell_time'  # This will carry the purpose code
    
check = 0
result = myTrip.groupby(['sampno','perno', which_tour],group_keys=False).apply(tourPur)  # Call to the function
myTrip[which_var_update_01] = result[which_var_update_01]
myTrip[which_var_update_02] = result[which_var_update_02]
myTrip[which_var_update_03] = result[which_var_update_03]
print "DONE!!! primary activity work based"
#print 'How many had to break tie from dwell time? ', check
del result, d_pur, dwl_tme, which_tour, which_var_update_01,which_var_update_02,which_var_update_03, check

In [None]:
myTrip['WB_tour_purpose'].value_counts().sort_index()

In [None]:
# 09. Calculate stop count by purpose

def stopCount(group):
    '''Counts the number of additional stops by purpose other than the primary activity'''
    #stop_type = ['school_stop','work_stop','meal_stop','shop_stop','PerBus_stop','SocRec_stop','escort_stop','other_stop','ChaMod_stop']
    #tour_type = 'HB_'  # This suffix would be used before the stop names
    
    #prime_act_id = 'HB_primary_activity_id'   # This is the id of the primary activity
    #d_pur = 'd_pur_broad'                     # This is the variable based on which the stop purposes would be determined
    #which_tour = 'Tour_ID_New'
    global d_pur, which_tour, stop_type, prime_act_id
    
    sze = len(group[which_tour].tolist())     # Length of the group
    tid = group[which_tour].tolist()[0]       # All cases in the group contains the same value
    
    prm_act_id = group[prime_act_id].tolist()
    pur = group[d_pur].tolist()
    
    add_pur = [-1 if (b==1) else a for a,b in zip(pur,prm_act_id)]  # Replace purpose with primary activity with -1
    
    if (tid>0):  # If this is a valid tour
        for i in xrange(len(stop_type)):
            #print "i: ", i
            group.loc[:,stop_type[i]] = [add_pur.count(i+1) for j in xrange(sze)]
    
    elif(tid==0):  # if this is not a valid tour
        for i in xrange(len(stop_type)):
            #print "i: ", i
            group.loc[:,stop_type[i]] = [-99 for j in xrange(sze)]
    
    return group

print 'Done9'

In [None]:
# H7. Next step is to count the additional stops within the tour by purpose - home based
stop_type = ['school_stop','work_stop','meal_stop','shop_stop','PerBus_stop','SocRec_stop','escort_stop','other_stop','ChaMod_stop']
tour_type = 'HB_'  # This suffix would be used before the stop names
    
prime_act_id = 'HB_primary_activity_id'   # This is the id of the primary activity
d_pur = 'd_pur_broad'                     # This is the variable based on which the stop purposes would be determined
which_tour = 'Tour_ID_New'                 # For which tour
    
result = myTrip.groupby(['sampno','perno', which_tour],group_keys=False).apply(stopCount)  # Call to the function

for val in stop_type:
    var_name = tour_type+val # print 'var_name: ', var_name 
    myTrip[var_name] = result[val]
        
del stop_type, tour_type, d_pur, which_tour, prime_act_id, result
print "DONE!!! home based tour stop count by purpose"

In [None]:
print myTrip['HB_school_stop'].value_counts().sort_index()
print myTrip['HB_work_stop'].value_counts().sort_index()
print myTrip['HB_meal_stop'].value_counts().sort_index()
print myTrip['HB_shop_stop'].value_counts().sort_index()
print myTrip['HB_PerBus_stop'].value_counts().sort_index()
print myTrip['HB_SocRec_stop'].value_counts().sort_index()
print myTrip['HB_escort_stop'].value_counts().sort_index()
print myTrip['HB_other_stop'].value_counts().sort_index()
print myTrip['HB_ChaMod_stop'].value_counts().sort_index()

In [None]:
print myTrip.loc[myTrip['HB_ChaMod_stop']==2,('sampno','perno','tripno','d_pur_broad','HB_primary_activity_id','HB_other_stop')].head(10)

In [None]:
# W6. Next step is to count the additional stops within the tour by purpose - work based
stop_type = ['school_stop','work_stop','meal_stop','shop_stop','PerBus_stop','SocRec_stop','escort_stop','other_stop','ChaMod_stop']
tour_type = 'WB_'  # This suffix would be used before the stop names
    
prime_act_id = 'WB_primary_activity_id'   # This is the id of the primary activity
d_pur = 'd_pur_broad'                     # This is the variable based on which the stop purposes would be determined
which_tour = 'WB_Tour_ID_New'                 # For which tour
    
result = myTrip.groupby(['sampno','perno', which_tour],group_keys=False).apply(stopCount)  # Call to the function

for val in stop_type:
    var_name = tour_type+val # print 'var_name: ', var_name 
    myTrip[var_name] = result[val]
        
del stop_type, tour_type, d_pur, which_tour, prime_act_id, result
print "DONE!!! work based tour stop count by purpose"

In [None]:
# 09.1. Calculate stop count by purpose: work based sub-tour do not include the last return to work trip

def stopCount(group):
    '''Counts the number of additional stops by purpose other than the primary activity'''
    #stop_type = ['school_stop','work_stop','meal_stop','shop_stop','PerBus_stop','SocRec_stop','escort_stop','other_stop','ChaMod_stop']
    #tour_type = 'HB_'  # This suffix would be used before the stop names
    
    #prime_act_id = 'HB_primary_activity_id'   # This is the id of the primary activity
    #d_pur = 'd_pur_broad'                     # This is the variable based on which the stop purposes would be determined
    #which_tour = 'Tour_ID_New'
    global d_pur, which_tour, stop_type, prime_act_id
    
    sze = len(group[which_tour].tolist())     # Length of the group
    tid = group[which_tour].tolist()[0]       # All cases in the group contains the same value
    
    prm_act_id = group[prime_act_id].tolist()
    pur = group[d_pur].tolist()[:-1]
    
    add_pur = [-1 if (b==1) else a for a,b in zip(pur,prm_act_id)]  # Replace purpose with primary activity with -1
    
    if (tid>0):  # If this is a valid tour
        for i in xrange(len(stop_type)):
            #print "i: ", i
            group.loc[:,stop_type[i]] = [add_pur.count(i+1) for j in xrange(sze)]
    
    elif(tid==0):  # if this is not a valid tour
        for i in xrange(len(stop_type)):
            #print "i: ", i
            group.loc[:,stop_type[i]] = [-99 for j in xrange(sze)]
    
    return group

print 'Done9.1'

In [None]:
# W6. Next step is to count the additional stops within the tour by purpose - work based
stop_type = ['school_stop','work_stop','meal_stop','shop_stop','PerBus_stop','SocRec_stop','escort_stop','other_stop','ChaMod_stop']
tour_type = 'WB_r_'  # This suffix would be used before the stop names
    
prime_act_id = 'WB_primary_activity_id'   # This is the id of the primary activity
d_pur = 'd_pur_broad'                     # This is the variable based on which the stop purposes would be determined
which_tour = 'WB_Tour_ID_New'                 # For which tour
    
result = myTrip.groupby(['sampno','perno', which_tour],group_keys=False).apply(stopCount)  # Call to the function

for val in stop_type:
    var_name = tour_type+val # print 'var_name: ', var_name 
    myTrip[var_name] = result[val]
        
del stop_type, tour_type, d_pur, which_tour, prime_act_id, result
print "DONE!!! work based tour stop count by purpose"

In [None]:
print myTrip.loc[myTrip['WB_ChaMod_stop']==2,('sampno','perno','tripno','WB_Tour_ID_New','WB_Tour_ID','o_pur_broad','d_pur_broad','WB_primary_activity_id','WB_other_stop','WB_r_other_stop')].head(10)

In [None]:
print sum(myTrip['WB_school_stop']!=myTrip['WB_r_school_stop'])
print sum(myTrip['WB_work_stop']!=myTrip['WB_r_work_stop'])
print sum(myTrip['WB_meal_stop']!=myTrip['WB_r_meal_stop'])
print sum(myTrip['WB_shop_stop']!=myTrip['WB_r_shop_stop'])
print sum(myTrip['WB_PerBus_stop']!=myTrip['WB_r_PerBus_stop'])
print sum(myTrip['WB_SocRec_stop']!=myTrip['WB_r_SocRec_stop'])
print sum(myTrip['WB_escort_stop']!=myTrip['WB_r_escort_stop'])
print sum(myTrip['WB_ChaMod_stop']!=myTrip['WB_r_ChaMod_stop'])

In [None]:
# 10. Concatenate the trip ids that constitute the tour
def concTrpId (group): # grouped by hhid, personid, Tour_ID or WB_Tour_ID
    # the purpose is to create conactenated string of linked_tripid that constitute the tour
    
    global which_tour, trip_id, char, what_var_update
    
    tid = group[which_tour].tolist()[0]     # Full group should contain the same value
    tripid = group[trip_id].tolist()
    sze = len(group[which_tour].tolist())   # This is the length of the group
    
    trip_id_str = [str(a) for a in tripid]   # converting the trip ids into string
    
    if (tid==0):
        var_list = [-99 for i in xrange(sze)]
    elif(tid>0):
        var_list = [char.join(trip_id_str) for i in xrange(sze)]
        
    group.loc[:,what_var_update] = var_list
    
    return group

print 'Done10'

In [None]:
# H8. Next create a column by concatenating tripids - home based tour
which_tour = 'Tour_ID_New'
trip_id = 'trpid_string'
char = '-'

what_var_update = 'HB_tripids'
result = myTrip.groupby(['sampno','perno',which_tour],group_keys=False).apply(concTrpId)
myTrip.loc[:,what_var_update] = result[what_var_update]
del result
print "DONE!!! tripid for home based tour"
    
# W8. Next create a column by concatenating tripids - work based tour
which_tour = 'WB_Tour_ID_New'
trip_id = 'trpid_string'
char = '-'

what_var_update = 'WB_tripids'
result = myTrip.groupby(['sampno','perno',which_tour],group_keys=False).apply(concTrpId)
myTrip.loc[:,what_var_update] = result[what_var_update]
del result
print "DONE!!! tripid for work based tour"

In [None]:
print myTrip[['sampno','perno','tripno','Tour_ID_New','WB_Tour_ID_New','HB_tripids','WB_tripids']].head(10)
print myTrip.loc[(myTrip['WB_Tour_ID_New']==22),('sampno','perno','tripno','Tour_ID_New','WB_Tour_ID_New','HB_tripids','WB_tripids')].head(15)

In [None]:
# 11. Next step is to identify joint, partially joint tours and individual tours

def occupancy(group):  # grouped by hhid, personid and Tour_ID or WB_Tour_ID
    # purpose is to identify whether the tour is partially joint, fully joint or individual
    # which_tour = 'Tour_ID_New'
    # trv_agg = 'travelers_hh_cal'
    # mem_list = ['hhmember1','hhmember2','hhmember3','hhmember4','hhmember5','hhmember6','hhmember7','hhmember8']
    # per_num = 'perno'
    # hh_id = 'sampno'
    # char = ','

    global trv_agg, which_tour, what_var_update, mem_list, hh_id, per_num, what_var_update_2, char
    
    tid = group[which_tour].tolist()[0]      # This is the tour id; full group should contain the same value
    sze = len(group[which_tour].tolist())    # This is the length of the group
    hid = group[hh_id].tolist()[0]           # Full group should have the same value
    
    trv_agg_list = group[trv_agg].tolist()   # Each trip in the tour can have a different value
    self = group[per_num].tolist()[0]         # Full group should contain same value
    
    full_joint = [1 if a>=2 else 0 for a in trv_agg_list]
    
    if (tid==0):
        var_list = [-99 for i in xrange(sze)]
        var_list_member = [-99 for i in xrange(sze)]
        
    else:
        if (sum(full_joint)==sze):
            var_list = [1 for x in xrange(sze)]  # 1 stands for fully joint
            
        elif(sum(full_joint)==0):
            var_list = [3 for x in xrange(sze)]  # 3 stands for individual
            
        else:
            var_list = [2 for x in xrange(sze)]  # 2 stands for partially joint
        
        # Create member variable
        member = []  # Will collect member personid
        per_num_mem = 0  # member number
        per_num_counted = []
        
        if (sum(full_joint)!=0):  # if not individual tour
            for val in mem_list:  # Go through the variables in the mem_list
                mem_group = group[val].tolist()  # Can have different value for different trip in the tour
                per_num_mem = per_num_mem + 1    # this is the person number of the member
                
                for val_mem in mem_group:  # go through the list of member variable
                    # 1. member indicattor is 1, 2. member is not the 'person' herself, 3. member already not counted
                    if ((val_mem==1) & (per_num_mem!=self) & (per_num_counted.count(per_num_mem)==0)):
                        #member_id = myTrip[(myTrip[hh_id]==hid)&(myTrip[per_num]==per_num_mem)]
                        per_num_counted.append(per_num_mem)
                        member.append(str(int(hid))+'0'+str(per_num_mem))
            
            var_list_member = [char.join(member) for i in xrange(sze)]
            #print 'member list: ', member , 'self: ', self
            #print 'counter on per_num_counted: ', Counter(per_num_counted)
        
        else:
            var_list_member = [-99 for i in xrange(sze)]
            
    group.loc[:,what_var_update] = var_list
    group.loc[:,what_var_update_2] = var_list_member
    
    return group

print 'Done11'

In [None]:
# H9. Next determine whether the tour is fully joint, partially joint or individual & create member list (based on hh member only) - home based tour
which_tour = 'Tour_ID_New'
trv_agg = 'travelers_hh_cal'
mem_list = ['hhmember1','hhmember2','hhmember3','hhmember4','hhmember5','hhmember6','hhmember7','hhmember8']
per_num = 'perno'
hh_id = 'sampno'
char = ','

what_var_update = 'HB_occupancy'
what_var_update_2 = 'HB_occupancy_member'
result = myTrip.groupby(['sampno','perno',which_tour],group_keys=False).apply(occupancy)

myTrip.loc[:,what_var_update] = result[what_var_update]
myTrip.loc[:,what_var_update_2] = result[what_var_update_2]
del result, which_tour, mem_list, per_num, hh_id, char, what_var_update, what_var_update_2
print "DONE!!! home based tour occupancy and member list"
    
# W9. Next determinne whether the tour is fully joint, partially joint or individual & create member list , hhmember - work based tour
which_tour = 'WB_Tour_ID_New'
trv_agg = 'travelers_hh_cal'
mem_list = ['hhmember1','hhmember2','hhmember3','hhmember4','hhmember5','hhmember6','hhmember7','hhmember8']
per_num = 'perno'
hh_id = 'sampno'
char = ','

what_var_update = 'WB_occupancy'
what_var_update_2 = 'WB_occupancy_member'
result = myTrip.groupby(['sampno','perno',which_tour],group_keys=False).apply(occupancy)
#myTrip.groupby(['hhid','personid',which_tour],group_keys=False).apply(occupancy)
myTrip.loc[:,what_var_update] = result[what_var_update]
myTrip.loc[:,what_var_update_2] = result[what_var_update_2]
del result, which_tour, mem_list, per_num, hh_id, char, what_var_update, what_var_update_2
print "DONE!!! work based tour occupancy and member list"

In [None]:
myTrip.loc[(myTrip['travelers_hh_cal']==3),('sampno','perno','tripno','per1','per2','per3','per4','per5','HB_occupancy','HB_occupancy_member','hhmember1','hhmember2','hhmember3','hhmember4','hhmember5','hhmember6','hhmember7','hhmember8')].head(10)

In [None]:
myTrip.loc[(myTrip['WB_Tour_ID_New']>0),('sampno','perno','tripno','WB_Tour_ID_New','per1','per2','per3','per4','per5','WB_occupancy','WB_occupancy_member')].head(10)

In [None]:
# 12. Next step is to identify joint, partially joint tours and individual tours

def occupancy_total(group):  # grouped by hhid, personid and Tour_ID or WB_Tour_ID
    # purpose is to identify whether the tour is partially joint, fully joint or individual based on the presence on non-hh members
    
    global trv_agg, which_tour, what_var_update, hh_id
    
    tid = group[which_tour].tolist()[0]      # This is the tour id; full group should contain the same value
    sze = len(group[which_tour].tolist())    # This is the length of the group
    hid = group[hh_id].tolist()[0]           # Full group should have the same value
    
    trv_agg_list = group[trv_agg].tolist()   # Each trip in the tour can have a different value
    
    full_joint = [1 if a>=2 else 0 for a in trv_agg_list]
    
    if (tid==0):
        var_list = [-99 for i in xrange(sze)]
        var_list_member = [-99 for i in xrange(sze)]
        
    else:
        if (sum(full_joint)==sze):
            var_list = [1 for x in xrange(sze)]  # 1 stands for fully joint
            
        elif(sum(full_joint)==0):
            var_list = [3 for x in xrange(sze)]  # 3 stands for individual
            
        else:
            var_list = [2 for x in xrange(sze)]  # 2 stands for partially joint
            
    group.loc[:,what_var_update] = var_list
    
    return group

print 'Done12'

In [None]:
# H10. Next determine whether the tour is fully joint, partially joint or individual & create member list (based on total) - home based tour
which_tour = 'Tour_ID_New'
trv_agg = 'travelers_total_cal'
hh_id ='sampno'   
what_var_update = 'HB_occupancy_total'
result = myTrip.groupby(['sampno','perno',which_tour],group_keys=False).apply(occupancy_total)
myTrip.loc[:,what_var_update] = result[what_var_update]
del result, which_tour, trv_agg, what_var_update, hh_id
print "DONE!!! home based tour occupancy for hh and nonhh members"
    
# W10. Next determinne whether the tour is fully joint, partially joint or individual & create member list , hh&nonhh - work based tour
which_tour = 'WB_Tour_ID_New'
trv_agg = 'travelers_total_cal'
hh_id ='sampno'     
what_var_update = 'WB_occupancy_total'
result = myTrip.groupby(['sampno','perno',which_tour],group_keys=False).apply(occupancy_total)
#myTrip.groupby(['hhid','personid',which_tour],group_keys=False).apply(occupancy)
myTrip.loc[:,what_var_update] = result[what_var_update]
del result, which_tour, trv_agg, what_var_update, hh_id
print "DONE!!! work based tour occupancy for hh and nonhh members"

In [None]:
myTrip.loc[(myTrip['travelers_hh_cal']==3),('sampno','perno','tripno','per1','per2','per3','per4','per5','HB_occupancy','HB_occupancy_total','HB_occupancy_member','hhmember1','hhmember2','hhmember3','hhmember4','hhmember5','hhmember6','hhmember7','hhmember8')].head(10)

In [None]:
myTrip.loc[(myTrip['WB_Tour_ID_New']>0),('sampno','perno','tripno','WB_Tour_ID_New','per1','per2','per3','per4','per5','WB_occupancy','WB_occupancy_total','WB_occupancy_member')].head(10)

In [None]:
# 13. Identify the escort member id and tourid for school tours
def schl_escrt2(group): # grouped by hhid, personid, Tour_ID
    global d_pur_b, d_pur, arv_time, dept_time, per_num, trv_hh_lnk, hh_mem_lst, which_tour, tour_id, hid, mde, per_id, prm_act
    global what_var_update_1,what_var_update_2,what_var_update_3,what_var_update_4
    # global what_var_update_5,what_var_update_6,what_var_update_7,what_var_update_8
    
    global count_1_school, count_accompany_1_to, count_accompany_1_from, check_to, check_from 
    
    sze = len(group[which_tour].tolist())  # This is size of the group
    tid = group[which_tour].tolist()[0]    # This is the tour id, full group should have the same value
    hh = group[hid].tolist()[0]            # This is the hhid of the student, full group should have the same value
    prm_act_lst = group[prm_act].tolist()  # list of the primary activity id
    
    pur_det = group[d_pur_b].tolist()  # This is the broad d_purpose
    pur_fine = group[d_pur].tolist()   # This is the finer d_purpose
    
    person = group[per_num].tolist()[0]  # Person number of the 'school goer', full group should contain same value
    trv_hh = group[trv_hh_lnk].tolist()   # This is the travelers_hh variable
    
    # Variables for checking inter household consistency
    mde_chld = group[mde].tolist()      # This is the primary mode of the group of the student, different trip in the group will have different mode
    arv_chld = group[arv_time].tolist() # This is the arrival time of the child's trips
    dept_chld = group[dept_time].tolist() # This is the departure time of the child's trips
     
    group.loc[:,what_var_update_1] = [-99 for i in xrange(sze)]
    group.loc[:,what_var_update_2] = [-99 for i in xrange(sze)]
    group.loc[:,what_var_update_3] = [-99 for i in xrange(sze)]
    group.loc[:,what_var_update_4] = [-99 for i in xrange(sze)]
    #group.loc[:,what_var_update_5] = [-99 for i in xrange(sze)]
    #group.loc[:,what_var_update_6] = [-99 for i in xrange(sze)]
    #group.loc[:,what_var_update_7] = [-99 for i in xrange(sze)]
    #group.loc[:,what_var_update_8] = [-99 for i in xrange(sze)]
    
        
    #print 'Counter on what_var_update_1: ', group[what_var_update_1]
        
    if ((tid==0)|(pur_det.count(1)==0)):  # If this is not a valid tour, or if there is no school stop within the tour
        pass
    
    elif (pur_det.count(1)>=1):  # If there is 1 or more school stops within the tour
        #to_index = pur_det.index(1)   # Get the index of the trip leading to the school
        to_index = prm_act_lst.index(1)   # Get the index of the trip leading to the primary activity
        from_index = to_index + 1     # Get the index of the trip leaving from primary activity
        
        get_type = pur_fine[to_index] # Get the finer purpose to identify whether its a 4 or 5
        
        #count_1_school = count_1_school + 1
                
        if (trv_hh[to_index]>=2):
            count_accompany_1_to.append(trv_hh[to_index])
            
            mem_list = None  # to be appended the escort's personid
            tour_list = None # to be appended the escort's tourid

            # go through the hhmember list
            for i in xrange(len(hh_mem_lst)):
                mem_var = group[hh_mem_lst[i]].tolist()[to_index]  # Take the member information of the to trip
                if ((mem_var==1)&((i+1)!=person)):  # If the member is reported and the memebr is not the person
                    mem_trip = myTrip[((myTrip[hid]==hh)&(myTrip[per_num]==(i+1)))]   # get all the trips of the accompanying member
                    mem_trip.sort_values(['trpid'], ascending=[True], inplace=True)  # Sort the members trips by linked_tripid
                    sze_prnt = mem_trip.shape[0]  # Length of the parent's trip record
                    
                    if (sze_prnt>0):  # If some trips are reported for the parent
                        mde_prnt = mem_trip[mde].tolist()         # Mode of all trips of the parent
                        arv_prnt = mem_trip[arv_time].tolist()    # Arrival time of all trips of the parent
                        dept_prnt = mem_trip[dept_time].tolist()  # Departure time of all trips of the parent
                        person_prnt = mem_trip[hh_mem_lst[(int(person)-1)]].tolist()     # This is the student's indicator on person's trip record
                        tourid_prnt = mem_trip[tour_id].tolist()   # This is the parent's home based tour id
                        perid_prnt = mem_trip[per_id].tolist()[0]   # This is the parent's personid, whole group should contain the same value
                    
                        #print 'mode_child: ', mde_chld[to_index], 'mode_parent: ', mde_prnt
                        for j in xrange(sze_prnt):
                            # Check, matches start time, end time and mode and the parent's purpose os not school or school related
                            if ((mde_chld[to_index]==mde_prnt[j])&(arv_chld[to_index]==arv_prnt[j])&(dept_chld[to_index]==dept_prnt[j])&(person_prnt[j]==1)):
                                if (mem_list is None):
                                    mem_list = str(perid_prnt)
                                    tour_list= str(tourid_prnt[j])
                                elif (mem_list is not None):
                                    mem_list = mem_list + (' , '+str(perid_prnt))
                                    tour_list = tour_list + (' , '+str(tourid_prnt[j]))
                        
                        #print 'mem_list: ', mem_list
                        del mem_var, mem_trip, sze_prnt, mde_prnt, arv_prnt, dept_prnt, person_prnt, tourid_prnt, perid_prnt
                    
                    
            # Outside the for loop of the household member list - inside the if of travelers_hh check
            
            if (mem_list is not None):  
                if (get_type==1):
                    #print 'yes_to_4'
                    group.loc[:,what_var_update_1] = [mem_list for x in xrange(sze)]
                    group.loc[:,what_var_update_2] = [tour_list for x in xrange(sze)]
                    
                    #print 'Counter on group: ', group[what_var_update_1]
                    
                #elif(get_type==5):
                    #print 'yes_to_5'
                    #group.loc[:,what_var_update_5] = [mem_list for x in xrange(sze)]
                    #group.loc[:,what_var_update_6] = [tour_list for x in xrange(sze)]
                
            del mem_list, tour_list
            #print 'mem_list: ', mem_list, 'get_type: ', get_type, 'sze: ', sze, 'Counter on group: ', group[what_var_update_1]
            
        
        if (trv_hh[from_index]>=2):
            count_accompany_1_from.append(trv_hh[from_index])
            
            # For the from half tour
            mem_list = None
            tour_list = None
            
            # go through the hhmember list
            for i in xrange(len(hh_mem_lst)):
                mem_var = group[hh_mem_lst[i]].tolist()[from_index]  # Take the member information of the to trip
                if ((mem_var==1)&((i+1)!=person)):  # If the member is reported and the memebr is not the person
                    mem_trip = myTrip[((myTrip[hid]==hh)&(myTrip[per_num]==(i+1)))]   # get all the trips of the accompanying member
                    mem_trip.sort_values(['trpid'], ascending=[True], inplace=True)  # Sort the members trips by linked_tripid
                    sze_prnt = mem_trip.shape[0]  # Length of the parent's trip record
                                
                    if (sze_prnt>0):  # If some trips are reported for the parent
                        mde_prnt = mem_trip[mde].tolist()         # Mode of all trips of the parent
                        arv_prnt = mem_trip[arv_time].tolist()    # Arrival time of all trips of the parent
                        dept_prnt = mem_trip[dept_time].tolist()  # Departure time of all trips of the parent
                        person_prnt = mem_trip[hh_mem_lst[(int(person)-1)]].tolist()     # This is the student's indicator on person's trip record
                        tourid_prnt = mem_trip[tour_id].tolist()   # This is the parent's home based tour id
                        perid_prnt = mem_trip[per_id].tolist()[0]   # This is the parent's personid, whole group should contain the same value
                    
                        
                        for j in xrange(sze_prnt):
                            # Check, matches start time, end time and mode and the parent's purpose os not school or school related
                            if ((mde_chld[from_index]==mde_prnt[j])&(arv_chld[from_index]==arv_prnt[j])&(dept_chld[from_index]==dept_prnt[j])&(person_prnt[j]==1)):
                                if (mem_list is None):
                                    mem_list = str(perid_prnt)
                                    tour_list= str(tourid_prnt[j])
                                elif (mem_list is not None):
                                    mem_list = mem_list + (' , '+str(perid_prnt))
                                    tour_list = tour_list + (' , '+str(tourid_prnt[j]))
                        
                        #print 'mem_list: ', mem_list
                        del mem_var, mem_trip, sze_prnt, mde_prnt, arv_prnt, dept_prnt, person_prnt, tourid_prnt, perid_prnt 
                    
            # Outside the for loop of the household member list, this is inside the for of travelers_hh[from_index] >= 2
            if (mem_list is not None):
                if (get_type==1):
                    #print 'yes_from_4'
                    group.loc[:,what_var_update_3] = [mem_list for x in xrange(sze)]
                    group.loc[:,what_var_update_4] = [tour_list for x in xrange(sze)]
                    
                #elif(get_type==1):
                    #print 'yes_from_5'
                    #group.loc[:,what_var_update_7] = [mem_list for x in xrange(sze)]
                    #group.loc[:,what_var_update_8] = [tour_list for x in xrange(sze)]
                    
            del mem_list, tour_list
        
    
    '''for i in xrange(8):
        var_name = 'what_var_update_'+str(i+1); print var_name
        print 'Counter on: ', var_name, "\n", group[var_name].value_counts().sort_index()'''
    
    return group

print 'Done13'

In [None]:
# H13. Next create the member list and mandatory tour id for school tours - home based tours
d_pur_b = 'd_pur_broad'  # This is the broad d_purpose
d_pur = 'd_pur_broad'  # This is the finer d_purpose

arv_time = 'trp_arr_time'       # This is the arrival time
dept_time = 'trp_dep_time'    # This is the departure time
mde = 'mode'            # This is the mode variable
prm_act = 'HB_primary_activity_id'            # This is the mode variable

hid = 'sampno'                           # This is the household id 
per_num = 'perno'                 # This is the number of the person who is making the trip
per_id = 'perid'                 # This is the person id variable
trv_hh_lnk = 'travelers_hh_cal'         # This is the travelers hh_linked
hh_mem_lst = ['hhmember1','hhmember2','hhmember3','hhmember4','hhmember5','hhmember6','hhmember7','hhmember8']

which_tour = 'Tour_ID_New'  # This is the home based tour id, unique for each individual
tour_id = 'home_based_tourid_updated'  # This is the home based tour id created by appending persondid with Tour_ID


# Variables that need to be updated - school activity
what_var_update_1 = 'memberid_to_school_04'
what_var_update_2 = 'tourid_to_school_04'
what_var_update_3 = 'memberid_from_school_04'
what_var_update_4 = 'tourid_from_school_04'

# Variables that need to be updated - school related
#what_var_update_5 = 'memberid_to_school_05'
#what_var_update_6 = 'tourid_to_school_05'
#what_var_update_7 = 'memberid_from_school_05'
#what_var_update_8 = 'tourid_from_school_05'

# Call to the function
check_to = 0                       # how many have 0 trip for the parent
check_from = 0                       # how many have 0 trip for the parent
count_1_school = 0
count_accompany_1_to = []
count_accompany_1_from = []
result = myTrip.groupby(['sampno','perno',which_tour],group_keys = False).apply(schl_escrt2)
    
#print 'How many had 1 leg: ', count_1_school  # 3120
print 'Counter on accompaniment to school: ', Counter(count_accompany_1_to)
print 'Counter on accompaniment from school: ', Counter(count_accompany_1_from)
#print 'How many have 0 trip for the parent, to school: ', check_to
#print 'How many have 0 trip for the parent, from school: ', check_from

myTrip.loc[:,what_var_update_1] = result[what_var_update_1]
myTrip.loc[:,what_var_update_2] = result[what_var_update_2]
myTrip.loc[:,what_var_update_3] = result[what_var_update_3]
myTrip.loc[:,what_var_update_4] = result[what_var_update_4]
#myTrip.loc[:,what_var_update_5] = result[what_var_update_5]
#myTrip.loc[:,what_var_update_6] = result[what_var_update_6]
#myTrip.loc[:,what_var_update_7] = result[what_var_update_7]
#myTrip.loc[:,what_var_update_8] = result[what_var_update_8]

print "DONE!!! home based tour school escort"

del result

In [None]:
myTrip[['sampno','perno','tripno','Tour_ID_New','memberid_to_school_04','tourid_to_school_04']].head()

In [None]:
# W13. Next create the member list and mandatory tour id for school tours
d_pur_b = 'd_pur_broad'  # This is the broad d_purpose
d_pur = 'd_pur_broad'  # This is the finer d_purpose

arv_time = 'trp_arr_time'       # This is the arrival time
dept_time = 'trp_dep_time'    # This is the departure time
mde = 'mode'            # This is the mode variable
prm_act = 'WB_primary_activity_id'            # This is the primary activity id

hid = 'sampno'                           # This is the household id 
per_num = 'perno'                 # This is the number of the person who is making the trip
per_id = 'perid'                 # This is the person id variable
trv_hh_lnk = 'travelers_hh_cal'         # This is the travelers hh_linked
hh_mem_lst = ['hhmember1','hhmember2','hhmember3','hhmember4','hhmember5','hhmember6','hhmember7','hhmember8']

which_tour = 'WB_Tour_ID_New'  # This is the home based tour id, unique for each individual
tour_id = 'work_based_tourid'  # This is the home based tour id created by appending persondid with Tour_ID


# Variables that need to be updated - school activity
what_var_update_1 = 'WB_memberid_to_school_04'
what_var_update_2 = 'WB_tourid_to_school_04'
what_var_update_3 = 'WB_memberid_from_school_04'
what_var_update_4 = 'WB_tourid_from_school_04'

# Variables that need to be updated - school related
#what_var_update_5 = 'WB_memberid_to_school_05'
#what_var_update_6 = 'WB_tourid_to_school_05'
#what_var_update_7 = 'WB_memberid_from_school_05'
#what_var_update_8 = 'WB_tourid_from_school_05'

# Call to the function
check_to = 0                       # how many have 0 trip for the parent
check_from = 0                       # how many have 0 trip for the parent
count_1_school = 0
count_accompany_1_to = []
count_accompany_1_from = []
result = myTrip.groupby(['sampno','perno',which_tour],group_keys = False).apply(schl_escrt2)
    
#print 'How many had 1 leg: ', count_1_school  # 3120
print 'Counter on accompaniment to school: ', Counter(count_accompany_1_to)
print 'Counter on accompaniment from school: ', Counter(count_accompany_1_from)
#print 'How many have 0 trip for the parent, to school: ', check_to
#print 'How many have 0 trip for the parent, from school: ', check_from

myTrip.loc[:,what_var_update_1] = result[what_var_update_1]
myTrip.loc[:,what_var_update_2] = result[what_var_update_2]
myTrip.loc[:,what_var_update_3] = result[what_var_update_3]
myTrip.loc[:,what_var_update_4] = result[what_var_update_4]
#myTrip.loc[:,what_var_update_5] = result[what_var_update_5]
#myTrip.loc[:,what_var_update_6] = result[what_var_update_6]
#myTrip.loc[:,what_var_update_7] = result[what_var_update_7]
#myTrip.loc[:,what_var_update_8] = result[what_var_update_8]

print "DONE!!! work based tour school escort"
    
print ":-)"

del result


# Change trip1

In [None]:
# This is to save the generated data
# From previous version of data this corrects the dwell time function and also corrects the work_based tour id
import pickle
myTrip.to_pickle('C:/Users/jiz13007/Documents/CAMMSE/California House Travel Survey/code/pickle data/Trip Tour/survey_trip1_after_tour.pkl')

In [None]:
# Read file from pickle
from pandas import *
import numpy as np
from collections import Counter
import csv
import matplotlib.pyplot as plt
from savReaderWriter import * 

myTrip = read_pickle('C:/Users/jiz13007/Documents/CAMMSE/California House Travel Survey/code/pickle data/Trip Tour/survey_trip1_after_tour.pkl')
myTrip.head()

In [None]:
# Write the tour file
# Take the tours
valid_hb_tours = myTrip[myTrip.Tour_ID_New>0]; print 'shape of the valid home based tours: ', valid_hb_tours.shape 
hb_all_row = valid_hb_tours[['sampno', 
'perid',
'Tour_ID_New',
'which_home_based_tour',                             
'home_based_tourid_updated',
'HB_trip_count',
'HB_change_mode_trip_count',
'HB_tour_purpose',
'HB_primary_activity_dwell_time',
'HB_school_stop',
'HB_work_stop',
'HB_meal_stop',
'HB_shop_stop',
'HB_PerBus_stop',
'HB_SocRec_stop',
'HB_escort_stop',
'HB_other_stop',
'HB_ChaMod_stop',  
'HB_tripids',
'HB_occupancy',
'HB_occupancy_member',
'HB_occupancy_total',
'memberid_to_school_04',
'tourid_to_school_04',
'memberid_from_school_04',
'tourid_from_school_04'
]]

# Get only the first row of each group of the lnkd trip group
hb_first = hb_all_row.groupby(['sampno','perid','Tour_ID_New']).first().reset_index()
#print 'See heads: ', lnkd_first.head()
print 'Shape of the dataframe: ', hb_first.shape

new_var_list = ['sampno',
'perid',
'tour_index',
'parent_home_based_tourid',
'tourid',
'tour_trip_count',
'tour_change_mode_trip_count',
'tour_purpose',
'tour_primary_activity_dwell_time',
'tour_stop_count_school',
'tour_stop_count_work',
'tour_stop_count_meal',
'tour_stop_count_shop',
'tour_stop_count_PerBus',
'tour_stop_count_SocRec',
'tour_stop_count_escort',
'tour_stop_count_other',
'tour_stop_count_ChaMod',
'tour_tripid',
'tour_occupancy_hh',
'tour_occupancy_hh_member',
'tour_occupancy_total',
'school_escort_member_personid_to_4',
'school_escort_member_tourid_to_4',
'school_escort_member_personid_from_4',
'school_escort_member_tourid_from_4'
]

hb_first.columns = new_var_list

####create work based tour file

valid_wb_tours = myTrip[myTrip.WB_Tour_ID_New>0]; print 'shape of the valid work based tours: ', valid_wb_tours.shape 
wb_all_row = valid_wb_tours[[
'sampno', 
'perid',
'WB_Tour_ID_New',
'which_home_based_tour',
'work_based_tourid',
'WB_trip_count',
'WB_change_mode_trip_count',
'WB_tour_purpose',
'WB_primary_activity_dwell_time',
'WB_r_school_stop',
'WB_r_work_stop',
'WB_r_meal_stop',
'WB_r_shop_stop',
'WB_r_PerBus_stop',
'WB_r_SocRec_stop',
'WB_r_escort_stop',
'WB_r_other_stop',
'WB_r_ChaMod_stop',
'WB_tripids',
'WB_occupancy',
'WB_occupancy_member',
'WB_occupancy_total',
'WB_memberid_to_school_04',
'WB_tourid_to_school_04',
'WB_memberid_from_school_04',
'WB_tourid_from_school_04'
]]

# Get only the first row of each group of the lnkd trip group
wb_first = wb_all_row.groupby(['sampno','perid','WB_Tour_ID_New']).first().reset_index()
#print 'See heads: ', lnkd_first.head()
print 'Shape of the dataframe: ', wb_first.shape

wb_first.columns = new_var_list

frames = [hb_first, wb_first]

# Full tour
full_tour = concat(frames, ignore_index = True);  print 'Final dataframe size: ', full_tour.shape

# Sort the dataframe
full_tour.sort_values(['tourid'], ascending=[True], inplace = True)

# Counter on tour_index
# print 'counter on tour index: ', full_tour.tour_index.value_counts().sort_index()  # no zero values
# Create tour id
full_tour['tour_type'] = (full_tour.tour_index<20)*1+(full_tour.tour_index>20)*2  # 1 stands for home based tour and 2 stands for work based sub tours

# Counter on tour_type
print 'counter on tour type: ', full_tour.tour_type.value_counts().sort_index()  # no zero values

#full_tour.to_csv(r'C:/Users/jiz13007/Documents/CAMMSE/California House Travel Survey/code/pickle data/Tour_file_trip1.csv', header=True, float_format = '%.f', index=None, sep=' ', mode='a')  
#full_tour.to_pickle('C:/Users/jiz13007/Documents/CAMMSE/California House Travel Survey/code/pickle data/Tour_file_trip1.pkl')
#del full_tour

# Change survey trip1

In [None]:
full_tour.to_csv(r'C:/Users/jiz13007/Documents/CAMMSE/California House Travel Survey/code/pickle data/Tour/Tour_file_trip1.csv', header=True, float_format = '%.f', index=None, sep=' ', mode='a')  
full_tour.to_pickle('C:/Users/jiz13007/Documents/CAMMSE/California House Travel Survey/code/pickle data//Tour/Tour_file_trip1.pkl')
del full_tour

In [None]:
# Read file from pickle
from pandas import *
import numpy as np
from collections import Counter
import csv
import matplotlib.pyplot as plt
from savReaderWriter import * 

myTrip = read_pickle('C:/Users/jiz13007/Documents/CAMMSE/California House Travel Survey/code/pickle data/Tour/Tour_file_trip1.pkl')
myTrip.head()


In [None]:
print myTrip.shape
print myTrip['tour_type'].value_counts()