In [2]:
import numpy as np
import pandas as pd
import xlsxwriter
import time
import h5toDF
import xlautofit
import math
from summary_functions import *

In [3]:
#################################### WHERE ARE YOU RUNNING? ####################################
model_dir = 'N:/soundcast_dev/'


### OTHER PATHS. FOR A TYPICAL RUN, YOU DON'T HAVE TO CHANGE THESE ######################################
h5_results_file = 'outputs/daysim_outputs.h5'
h5_results_name = 'DaysimOutputs'
h5_comparison_file = 'scripts/summarize/survey.h5'
h5_comparison_name = 'Survey'
guidefile = 'scripts/summarize/inputs/calibration/CatVarDict.xlsx'
districtfile = 'scripts/summarize/inputs/calibration/TAZ_TAD_County.csv'
report_output_location = 'outputs'

output_parcels = 'inputs/buffered_parcels.dat'



h5_results_file = model_dir + h5_results_file
h5_comparison_file =  model_dir + h5_comparison_file
guidefile = model_dir + guidefile
districtfile = model_dir + districtfile
report_output_location = 'outputs'


In [4]:
#READ IN YOUR DATA
data1 = h5toDF.convert(h5_results_file,guidefile,h5_results_name)
data2 = h5toDF.convert(h5_comparison_file,guidefile,h5_comparison_name)
zone_district = pd.DataFrame.from_csv(districtfile, index_col = None)

---Begin DaysimOutputs conversion---
Guide import complete
Guide converted to dictionary in 0.0 seconds
Household File import/recode complete in 0.9 seconds
HouseholdDay File import/recode complete in 0.4 seconds
Person File import/recode complete in 4.1 seconds
PersonDay File import/recode complete in 1.7 seconds
Tour File import/recode complete in 0.3 seconds
Trip File import/recode complete in 0.4 seconds
---DaysimOutputs import/recode complete in 8.1 seconds---
---Begin Survey conversion---
Guide import complete
Guide converted to dictionary in 0.0 seconds
Household File import/recode complete in 0.9 seconds
HouseholdDay File import/recode complete in 0.6 seconds
Person File import/recode complete in 2.2 seconds
PersonDay File import/recode complete in 5.2 seconds
Tour File import/recode complete in 7.3 seconds
Trip File import/recode complete in 17.1 seconds
---Survey import/recode complete in 33.3 seconds---
Negative expansion factors set to zero for Survey data


In [5]:
trip_variables = ['otaz', 'dtaz', 'travtime', 'travcost', 'travdist', 'pno', 'mode', 'tour_id', 'opcl', 'dpcl', 'dorp']
hh_variables = ['hhno', 'hhincome', 'hhvehs', 'hhtaz']
person_variables = ['hhno', 'pno', 'pagey', 'pgend', 'id']

In [6]:
def get_variables_trips_model(output_df,trip_variables, hh_variables, person_variables):
    trip_data = output_df['Trip'][trip_variables]
    hh_data = output_df['Household'][hh_variables]
    person_data = output_df['Person'][person_variables]
    tour_data = output_df['Tour'][['hhno', 'pno', 'id']]
    tour_data.rename(columns = {'id': 'tour_id'}, inplace = True)

    merge_hh_person = pd.merge(hh_data, person_data, 'inner', on = 'hhno')
    merge_hh_person.reset_index()
    tour_data.reset_index()
    merge_hh_tour = pd.merge(merge_hh_person, tour_data, 'inner', on =('hhno', 'pno'))
    merge_trip_hh = pd.merge(merge_hh_tour, trip_data, 'outer', on= 'tour_id')
    return merge_trip_hh  

In [7]:
trips_model = get_variables_trips_model(data1, trip_variables, hh_variables, person_variables)

A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  **kwargs)


In [9]:
loc_trips_model = pd.merge(trips_model, zone_district, left_on = 'hhtaz', right_on = 'TAZ')

In [8]:
trips_model.groupby('mode').count()['otaz']

mode
Bike            958
HOV2          33633
HOV3+         28187
SOV           74395
School Bus     4224
Walk           5260
Name: otaz, dtype: int64

In [10]:
v_trips_model = loc_trips_model.loc[(loc_trips_model['dorp']=='Driver')]

In [11]:
distances_county = v_trips_model.groupby('County').sum()

In [24]:
def get_variables_persons(output_df, hh_variables, person_variables):
    
    person_variables = ['hhno', 'pno', 'pagey', 'pgend']
    hh_variables = ['hhno', 'hhincome', 'hhvehs', 'hhtaz', 'hhexpfac']
    hh_data = output_df['Household'][hh_variables]
    person_data = output_df['Person'][person_variables]

    merge_hh_person = pd.merge(hh_data, person_data, 'inner', on = 'hhno')
    merge_hh_person.reset_index()

    return merge_hh_person

In [25]:
people_model = get_variables_persons(data1, hh_variables, person_variables)

In [26]:
loc_peeps_model = pd.merge(people_model, zone_district, left_on = 'hhtaz', right_on = 'TAZ')

In [15]:
people_county = loc_peeps_model.groupby('County').count()

In [16]:
people_county['hhno'].to_clipboard()

In [30]:
people_survey = get_variables_persons(data2, hh_variables, person_variables)

In [31]:
people_county = pd.merge(people_survey, zone_district, left_on = 'hhtaz', right_on = 'TAZ')

In [32]:
people_county = people_county.groupby('County').sum()

In [34]:
people_county['hhexpfac'].to_clipboard()

In [5]:
trip_variables = ['otaz', 'dtaz', 'travtime', 'travcost', 'travdist', 'pno', 'mode',  'opcl', 'dpcl', 'dorp', 'hhno', 'trexpfac']
hh_variables = ['hhno', 'hhincome', 'hhvehs', 'hhtaz', 'hhexpfac']
person_variables = ['hhno', 'pno', 'pagey', 'pgend', 'id']

In [6]:
def get_variables_trips_survey(output_df,trip_variables, hh_variables):
    trip_data = output_df['Trip'][trip_variables]
    hh_data = output_df['Household'][hh_variables]

    merge_trip_hh = pd.merge(trip_data, hh_data, on = 'hhno')
    return merge_trip_hh

In [8]:
trips_survey = get_variables_trips(data2, trip_variables, hh_variables)

In [9]:
v_trips_survey =trips_survey.loc[(trips_survey['dorp']== 'Driver')]

In [10]:
v_trips_survey_loc= pd.merge(v_trips_survey, zone_district, left_on = 'hhtaz', right_on = 'TAZ')

In [11]:
v_trips_survey_loc['weighted_dist'] = v_trips_survey_loc['travdist']*v_trips_survey_loc['trexpfac']

In [13]:
v_trips_survey_loc.groupby('County').sum()['weighted_dist'].to_clipboard()