## Identify Respondents to Drop
implausible/illogical respondent behavior investigation + recommendations

## Set-Up

In [37]:
import geopandas as gpd
import numpy as np
import openmatrix as omx
import pandas as pd

import os

from matplotlib import pyplot as plt
import seaborn as sns

In [38]:
# input
geography_file = "T:/projects/sr15/geographies/TAZ15.shp"
# uses processed data
    # changes will be made to the underlying data set, making this notebook not replicable
    # this notebook should be saved and used to justify changes to the dataset but not run again
processed_survey_data_path = '../../data/processed/survey_data_matched_geographies_taz.csv'

# output
respondents_to_drop_file = '../../data/processed/respondents_to_not_model.csv'

In [39]:
# read in data
survey_data = (
    pd.read_csv(processed_survey_data_path)
    .query("validation_severity_person != 'Critical'")
    .query("validation_severity_trip != 'Critical'")
    .query("weight_departing_only > 0")
)
survey_data = survey_data.drop(list(survey_data.filter(regex='Unnamed')), axis=1)

geographies = gpd.read_file(geography_file).query('~TAZ.isin([3,11])') # remove external TAZs that do not exist
# geographies.head()

  pd.read_csv(processed_survey_data_path)


## Unknown Parking Location

In [40]:
# parkers =
survey_data.query('marketsegment_label!= "EMPLOYEE"').query('parking_location.notna()').shape

(221, 510)

In [41]:
(
    survey_data
    .query('main_mode_label.isin(["DROVE_ALONE_AND_PARKED","DROVE_WITH_OTHERS_AND_PARKED"])')
    .query('parking_location_label.isna() or parking_location_label.isin(["OTHER","REFUSED"])')
    ['marketsegment_label']
    .value_counts()
)

marketsegment_label
EMPLOYEE     86
PASSENGER    11
Name: count, dtype: int64

In [42]:
(
    survey_data
    .query('marketsegment_label== "EMPLOYEE"')
    ['parking_location_label']
    .value_counts(dropna=False)
)

parking_location_label
NaN                                     249
EMPLOYEE_LOT_3665_ADMIRAL_BOLAND_WAY    200
ADMIN_BUILDING_LOT_2417_MCCAIN_ROAD     116
OTHER                                    77
REFUSED                                   9
TERM2_PARKING_PLAZA                       6
TERM1_PARKING_PLAZA                       2
Name: count, dtype: int64

In [43]:
(
    survey_data
    .query('marketsegment_label!= "EMPLOYEE"')
    .query('main_mode_label.isin(["DROVE_ALONE_AND_PARKED","DROVE_WITH_OTHERS_AND_PARKED"])')
    ['parking_location_label']
    .value_counts(dropna=False)
)


parking_location_label
TERM2_PARKING_PLAZA     95
OFF_AIRPORT_PARKING     83
TERM1_PARKING_PLAZA     29
OTHER                    9
TERM2_CURBSIDE_VALET     3
NaN                      1
TERM1_CURBSIDE_VALET     1
REFUSED                  1
Name: count, dtype: int64

## Respondents to Drop
need to be dropped

In [44]:
# null transit times
null_transit_respondents = survey_data.query('transit_time.isna()')['respondentid'].tolist()
len(null_transit_respondents)

36

In [45]:
# OTHER main mode respondents w/ NULL main_mode_other
other_mode_respondents = survey_data.query('main_mode_label == "OTHER"')['respondentid'].tolist()#['main_mode_other']
len(other_mode_respondents)

21

In [46]:
# mode exclusion criteria
employee_mode_respondents = (
    survey_data
    .query('marketsegment_label== "EMPLOYEE"')
    .query('main_mode_label.isin(["RENTAL_CAR_PARKED","RENTAL_CAR_DROPPED_OFF"])')
    ['respondentid']
    .tolist()
)
visitor_modes_to_exclude = ["DROVE_ALONE_AND_PARKED",
                            "DROVE_WITH_OTHERS_AND_PARKED",
                            "RODE_WITH_OTHER_TRAVELERS_AND_PARKED",
                            "WALK"]
visitor_mode_respondents = (
    survey_data
    .query('is_sdia_home_airport == False')
    .query(f'main_mode_label.isin({visitor_modes_to_exclude})')
    ['respondentid']
    .tolist()
)
resident_mode_to_exclude = ["RENTAL_CAR_DROPPED_OFF",
                            "RENTAL_CAR_PARKED",
                            "HOTEL_SHUTTLE_VAN",
                            "WALK"]
resident_mode_respondents = (
    survey_data
    .query('is_sdia_home_airport == True')
    .query('marketsegment_label!= "EMPLOYEE"')
    .query(f'main_mode_label.isin({resident_mode_to_exclude})')
    ['respondentid']
    .tolist()
)

print(f'dropped employees: {len(employee_mode_respondents)}')
print(f'dropped visitors: {len(visitor_mode_respondents)}')
print(f'dropped residents: {len(resident_mode_respondents)}')

dropped employees: 2
dropped visitors: 19
dropped residents: 30


In [47]:
# remove resident respondents that do not provide number of nights away
    # not possible to estimate parking costs
    # would introduce method to estimate, but only 5 respondents

# to examine dropped respodnents:
# (
#     survey_data
#     .query('market_segment.str.contains("resident")')
#     .query('days_parked.isna()')
#     [['unique_id','market_segment','main_mode_label','next_flight_destination','parking_cost','parking_cost_frequency_label','parking_location_label']]
# )

residents_missing_parking_duration = (
    survey_data
    .query('is_sdia_home_airport == True')
    .query('marketsegment_label != "EMPLOYEE"')
    .query('number_of_nights.isna()')
    ['respondentid']
    .tolist()
)
len(residents_missing_parking_duration)

5

In [48]:
## passengers that drove but do not have clear parking location
# (
#     survey_data
#     .query('marketsegment_label!= "EMPLOYEE"')
#     .query('main_mode_label.isin(["DROVE_ALONE_AND_PARKED","DROVE_WITH_OTHERS_AND_PARKED"])')
#     .query('parking_location_label.isna() or parking_location_label.isin(["OTHER","REFUSED"])')
#     [['unique_id', 'respondentid', 'main_mode_label','airport_terminal','parking_location_label', 'parking_location_other','parking_cost_numeric','parking_cost_frequency_label','parking_cost_frequency_other']]
# )
pax_parkers_no_location = (
    survey_data
    .query('marketsegment_label!= "EMPLOYEE"')
    # .query('is_sdia_home_airport == True')
    .query('main_mode_label.isin(["DROVE_ALONE_AND_PARKED", "DROVE_WITH_OTHERS_AND_PARKED", "RODE_WITH_OTHER_TRAVELERS_AND_PARKED", "GET_IN_PARKED_VEHICLE_AND_DRIVE_ALONE"])')
    .query('parking_location_label.isna() or parking_location_label.isin(["OTHER","REFUSED"])')
    .query('parking_location_other not in ["Aladdin","Wally","San Diego airport parking"]')
    ['respondentid']
    .tolist()
)
len(pax_parkers_no_location)

13

In [49]:
black_car_respondents = (
    survey_data
    .query('main_mode == 11.0') # CAR_SERVICE_BLACK_LIMO
    ['respondentid']
    .tolist()
)
len(black_car_respondents)

46

In [50]:
respondents_to_drop = (
    null_transit_respondents +
    other_mode_respondents +
    employee_mode_respondents +
    visitor_mode_respondents +
    resident_mode_respondents +
    residents_missing_parking_duration +
    pax_parkers_no_location +
    black_car_respondents
)
print("Number of Respondents to Drop: ",len(respondents_to_drop))

Number of Respondents to Drop:  172


In [51]:
pd.Series(respondents_to_drop).rename('respondentid').to_csv(respondents_to_drop_file)