In [6]:
import pandas as pd
import geopandas as gpd
linked = pd.read_csv('../Data/LINKED_Public.csv', low_memory=False)
household = pd.read_csv('../Data/HH_Public.csv')
person = pd.read_csv('../Data/PER_Public.csv')

### Feature selected for analysis:

LINKED_Public:
 
-       Origin / Destination Location : OTAZ or OTRACT
-       Origin / Destination Time of Day : DTAZ or DTRACT
-       Travel Mode : LTMODE_AGG
-       Travel distance : TRPDIST_HN
-       Travel duration : TRPDUR
-       Departure Time : TRP_DEP_HR, TRP_DEP_MIN
-       Arrival Time : TRP_ARR_HR, TRP_ARR_MIN
-       Trip purpose : TOUR_PURP

HH_Public:

-       Household Size : HHSIZ
-       Household Income : INCOM
-       Number of vehicles : HHVEH     
-       Number of children : HHCHD
-       Residence type : RESTY

PER_Public:

-       Gender : GENDER
-       Age : AGE_R
-       Race : RACE
-       Employed : EMPLY
-       Works : WORKS
-       Work days : WDAYS
-       Industry : INDUS
-       Occupation : OCCUP
-       WTAZ : WTAZ, WTRACT
-       Student : STUDE
-       Level of School : SCHOL
-       STAZ : STAZ, STRACT
-       number of vehicle trips : PTRIPS_V
-       number of transit trips : PTRIPS_T
-       number of walking trips : PTRIPS_NM
-       number of other trips : PTRIPS_O


In [7]:
# Select needed features from datasets

linked_clean = linked[['PLSAM', 'SAMPN', 'PERNO', 'PLANO', 'OTAZ', 'OTRACT', 'DTAZ', 'DTRACT', 'LTMODE_AGG', 'TRPDIST_HN', 'TRPDUR', 
                       'TRP_DEP_HR', 'TRP_DEP_MIN', 'TRP_ARR_HR', 'TRP_ARR_MIN', 'TOUR_PURP']]

household_clean = household[['SAMPN', 'HHSIZ', 'INCOM', 'HHVEH', 'HHCHD', 'RESTY']]

person_clean = person[['SAMPN', 'PERNO', 'GENDER', 'AGE_R', 'RACE', 'EMPLY', 'WORKS', 'WDAYS', 'INDUS', 'OCCUP', 
                       'WTAZ', 'WTRACT', 'STUDE', 'SCHOL', 'STAZ', 'STRACT', 'PTRIPS_V', 'PTRIPS_T', 'PTRIPS_NM',
                       'PTRIPS_O']]



In [8]:
linked_household = pd.merge(linked_clean, household_clean, how='left', on=['SAMPN'])
trip_diary = pd.merge(linked_household, person_clean, how='left', on=['SAMPN', 'PERNO'])
trip_diary.head()

Unnamed: 0,PLSAM,SAMPN,PERNO,PLANO,OTAZ,OTRACT,DTAZ,DTRACT,LTMODE_AGG,TRPDIST_HN,...,WTAZ,WTRACT,STUDE,SCHOL,STAZ,STRACT,PTRIPS_V,PTRIPS_T,PTRIPS_NM,PTRIPS_O
0,30000560102,3000056,1,2,2626,9001205100,2627,9001205200,1.0,2.81,...,2627,9000000000.0,No,,,,6,0,0,0
1,30000560103,3000056,1,3,2627,9001205200,2626,9001205100,1.0,2.81,...,2627,9000000000.0,No,,,,6,0,0,0
2,30000560104,3000056,1,4,2626,9001205100,2627,9001205200,1.0,2.81,...,2627,9000000000.0,No,,,,6,0,0,0
3,30000560105,3000056,1,5,2627,9001205200,2626,9001205100,2.1,2.81,...,2627,9000000000.0,No,,,,6,0,0,0
4,30000560106,3000056,1,6,2626,9001205100,2628,9001205300,2.1,3.47,...,2627,9000000000.0,No,,,,6,0,0,0


In [10]:
trip_diary.to_csv('../Data/trip_diary.csv')

### Modes selected for FM/LM (trip_diary_filtered.csv):
**
1 SOV  
3.1 Local Bus/P&R  
3.2 Local Bus/K&R  
4.1 Other Bus/P&R  
4.2 Other Bus/K&R  
5.1 Express Bus/P&R  
5.2 Express Bus/K&R  
6.1 Subway, LRT, Ferry, PATH-P&R  
6.2 Subway, LRT, Ferry, PATH-K&R  
7.1 Rail/P&R  
7.2 Rail/K&R  
8.2 Bike  
10 Taxi  
**

In [11]:
trip_diary_red = trip_diary.loc[trip_diary['LTMODE_AGG'].isin([1.0, 3.1, 3.2, 4.1, 4.2, 5.1, 5.2,
                                                               6.1, 6.2, 7.1, 7.2, 8.2, 10])]
trip_diary_red.head()

Unnamed: 0,PLSAM,SAMPN,PERNO,PLANO,OTAZ,OTRACT,DTAZ,DTRACT,LTMODE_AGG,TRPDIST_HN,...,WTAZ,WTRACT,STUDE,SCHOL,STAZ,STRACT,PTRIPS_V,PTRIPS_T,PTRIPS_NM,PTRIPS_O
0,30000560102,3000056,1,2,2626,9001205100,2627,9001205200,1.0,2.81,...,2627,9000000000.0,No,,,,6,0,0,0
1,30000560103,3000056,1,3,2627,9001205200,2626,9001205100,1.0,2.81,...,2627,9000000000.0,No,,,,6,0,0,0
2,30000560104,3000056,1,4,2626,9001205100,2627,9001205200,1.0,2.81,...,2627,9000000000.0,No,,,,6,0,0,0
6,30000560202,3000056,2,2,2626,9001205100,2676,34003016000,1.0,69.56,...,2676,34000000000.0,No,,,,2,0,0,0
7,30000560203,3000056,2,3,2676,34003016000,2626,9001205100,1.0,66.12,...,2676,34000000000.0,No,,,,2,0,0,0


In [15]:
trip_diary_red.to_csv('../Data/trip_diary_filtered.csv')