In [1]:
%load_ext autoreload
%autoreload 2

In [15]:
import orca
import pandas as pd
import os
from activitysim import defaults
from activitysim import activitysim as asim

# registering and merging tables

In [7]:
df_species = pd.DataFrame(
    {'species_name': ['dog', 'cat'],
    'age_rate': [7, 5]},
    index=['D', 'C'])

df_species

Unnamed: 0,age_rate,species_name
D,7,dog
C,5,cat


In [8]:
df_pet = pd.DataFrame(
    {'pet_name': ['wilkie', 'lassie', 'leo', 'felix', 'rex' ],
    'age': [14, 104, 3, 82, 7 ],
     'iq': [100, 140, 87, 120, 94],
     'species_id': ['D', 'D', 'C', 'C', 'D']},
    index=['p0', 'p1', 'p2', 'p3', 'p4'])

df_pet['init_age'] = df_pet['age']
df_pet

Unnamed: 0,age,iq,pet_name,species_id,init_age
p0,14,100,wilkie,D,14
p1,104,140,lassie,D,104
p2,3,87,leo,C,3
p3,82,120,felix,C,82
p4,7,94,rex,D,7


In [10]:
# register the tables
orca.add_table('species', df_species)
orca.add_table('pets', df_pet)

# broadcast so we can merge pets and species dataframes on pets.species_id
orca.broadcast(cast='species', onto='pets', cast_index=True, onto_on='species_id')

# this is a common merge so might as well define it once here and use it
@orca.table()
def pets_merged(pets, species):
    return orca.merge_tables(pets.name,
        tables=[pets, species])

# this is the orca registered version of the merged table
orca.get_table('pets_merged').to_frame()


Unnamed: 0,age,iq,pet_name,species_id,init_age,age_rate,species_name
p0,14,100,wilkie,D,14,7,dog
p1,104,140,lassie,D,104,7,dog
p4,7,94,rex,D,7,7,dog
p2,3,87,leo,C,3,5,cat
p3,82,120,felix,C,82,5,cat


# simple_simulate

In [16]:
# register our spec table (customarily stored in configs dir)
@orca.injectable()
def pet_spec(configs_dir):
    f = os.path.join(configs_dir, 'configs', "pet_activity.csv")
    return asim.read_model_spec(f).fillna(0)

orca.eval_variable('pet_spec')


Unnamed: 0_level_0,hunt,fetch,eat,sleep
Expression,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
iq>100,1,1,0.2,0.0
species_name == 'dog',1,1,0.2,0.3
species_name == 'cat',1,0,0.0,0.3
(age/age_rate)<=3,1,1,0.3,0.0
age>50,-1,-1,0.0,1.0
age>70,-2,-2,0.0,2.0


In [17]:

# step-by-step illustration of what simple_simulate does behind the scenes
@orca.step()
def pet_activity_simple_simulate(set_random_seed, pets_merged, pet_spec):

    # choosers: the choice model will be applied to each row of the choosers table (a pandas.DataFrame)
    choosers = pets_merged.to_frame()

    # spec: table of variable specifications and coefficient values of alternatives (a pandas.DataFrame table
    spec = pet_spec

    # locals whose values will be accessible to the execution context  when the expressions in spec are applied to choosers
    locals_d=None

    # eval_variables evaluates each of the expressions in spec in the context of each row in of the choosers dataframe
    model_design = asim.eval_variables(spec.index, choosers, locals_d)

    print "\n### model_design - results of the expressions for each row in choosers"
    print model_design

    utilities = model_design.dot(spec)

    print "\n### utilities - the net utility of each alternative for each row in choosers"
    print utilities

    probs = asim.utils_to_probs(utilities)

    print "\n### probs - utilities normalized as relative choice probablities (summing to 1)"
    print probs

    # Make choices for each chooser from among the set of alternatives based on probability
    choices = asim.make_choices(probs)

    print "\n### choices - choices expressed as zero-based index into set of alternatives"
    print choices

    # simple_simulate returns two dataframes: choices and model_design

orca.run(["pet_activity_simple_simulate"])

Running step 'pet_activity_simple_simulate'

### model_design - results of the expressions for each row in choosers
   iq>100 species_name == 'dog' species_name == 'cat' (age/age_rate)<=3  \
p0  False                  True                 False              True   
p1   True                  True                 False             False   
p4  False                  True                 False              True   
p2  False                 False                  True              True   
p3   True                 False                  True             False   

   age>50 age>70  
p0  False  False  
p1   True   True  
p4  False  False  
p2  False  False  
p3   True   True  

### utilities - the net utility of each alternative for each row in choosers
    hunt  fetch  eat  sleep
p0     2      2  0.5    0.3
p1    -1     -1  0.4    3.3
p4     2      2  0.5    0.3
p2     2      1  0.3    0.3
p3    -1     -2  0.2    3.3

### probs - utilities normalized as relative choice probablities (summin

In [18]:
# example of how simple_simulate based models work in activitysim (e.g. auto_ownership)
@orca.step()
def pet_activity_simulate(set_random_seed, pets_merged, pet_spec):

    # choosers: the choice model will be applied to each row of the choosers table (a pandas.DataFrame)
    choosers = pets_merged.to_frame()

    # spec: table of variable specifications and coefficient values of alternatives (a pandas.DataFrame table
    spec = pet_spec

    # locals whose values will be accessible to the execution context  when the expressions in spec are applied to choosers
    locals_d=None

    choices, model_design = asim.simple_simulate(choosers, spec)

    print "\n### model_design - results of the expressions for each row in choosers"
    print model_design

    print "\n### choices - choices expressed as zero-based index into set of alternatives"
    print choices

    # convert choice indexes to spec column names
    activity_names = spec.columns.values
    choice_names = choices.apply(lambda x : activity_names[x])

    print "\n### choice_names - choices expressed as names of columns of alternatives in spec"
    print choice_names

    # store the results so they are available to subsequent models, etc
    orca.add_column("pets", "pet_activity", choices)
    orca.add_column("pets", "pet_activity_names", choice_names)

orca.run(["pet_activity_simulate"])

Running step 'pet_activity_simulate'

### model_design - results of the expressions for each row in choosers
   iq>100 species_name == 'dog' species_name == 'cat' (age/age_rate)<=3  \
p0  False                  True                 False              True   
p1   True                  True                 False             False   
p4  False                  True                 False              True   
p2  False                 False                  True              True   
p3   True                 False                  True             False   

   age>50 age>70  
p0  False  False  
p1   True   True  
p4  False  False  
p2  False  False  
p3   True   True  

### choices - choices expressed as zero-based index into set of alternatives
p0    0
p1    3
p4    0
p2    1
p3    3
dtype: int64

### choice_names - choices expressed as names of columns of alternatives in spec
p0     hunt
p1    sleep
p4     hunt
p2    fetch
p3    sleep
dtype: object
Time to execute step 'pet_activity_simu

In [19]:
# pets_merged with pet_activity and pet_activity_names columns assigned by pet_activity_simulate
orca.get_table('pets_merged').to_frame()

Unnamed: 0,age,iq,pet_name,species_id,init_age,pet_activity,pet_activity_names,age_rate,species_name
p0,14,100,wilkie,D,14,0,hunt,7,dog
p1,104,140,lassie,D,104,3,sleep,7,dog
p4,7,94,rex,D,7,0,hunt,7,dog
p2,3,87,leo,C,3,1,fetch,5,cat
p3,82,120,felix,C,82,3,sleep,5,cat


In [20]:
# note that these new columns were added to the orca.DataFrameWrapper table, not the df_pet dataframe
df_pet

Unnamed: 0,age,iq,pet_name,species_id,init_age
p0,14,100,wilkie,D,14
p1,104,140,lassie,D,104
p2,3,87,leo,C,3
p3,82,120,felix,C,82
p4,7,94,rex,D,7


# orca run with iteration

In [21]:
# create a dataframe to hold the results of the iterations
df_history = pd.DataFrame(columns=['pet_name', 'timestamp', 'age', 'pet_activity_names'])
orca.add_table('history', df_history)

<orca.orca.DataFrameWrapper at 0x1030359d0>

In [22]:
# create a step to age pets at a rate appropriate to their species
@orca.step()
def age_simulate(pets):
    new_age = pets.age + orca.get_table('pets_merged').age_rate
    pets.update_col_from_series('age', new_age)

In [23]:
# create a step to checkpoint iteration values
@orca.step()
def summarize(pets_merged, history, iter_var):

    pets_row = pets_merged.to_frame()[['pet_name', 'age', 'pet_activity_names']]
    pets_row['timestamp'] = iter_var

    df = history.to_frame().append(pets_row, ignore_index=True)
    orca.add_table(history.name, df)

In [24]:
# now lets run thrree steps as an orca pipeline, iterated over a range
orca.run(['pet_activity_simulate', 'age_simulate', 'summarize'], iter_vars=range(2010, 2020))

Running iteration 1 with iteration value 2010
Running step 'pet_activity_simulate'

### model_design - results of the expressions for each row in choosers
   iq>100 species_name == 'dog' species_name == 'cat' (age/age_rate)<=3  \
p0  False                  True                 False              True   
p1   True                  True                 False             False   
p4  False                  True                 False              True   
p2  False                 False                  True              True   
p3   True                 False                  True             False   

   age>50 age>70  
p0  False  False  
p1   True   True  
p4  False  False  
p2  False  False  
p3   True   True  

### choices - choices expressed as zero-based index into set of alternatives
p0    0
p1    3
p4    1
p2    0
p3    3
dtype: int64

### choice_names - choices expressed as names of columns of alternatives in spec
p0     hunt
p1    sleep
p4    fetch
p2     hunt
p3    sleep
dtype: 

In [25]:
# this is the history file we wrote out while iterating
history = orca.get_table('history').to_frame()
history

Unnamed: 0,age,pet_activity_names,pet_name,timestamp
0,21,hunt,wilkie,2010
1,111,sleep,lassie,2010
2,14,fetch,rex,2010
3,8,hunt,leo,2010
4,87,sleep,felix,2010
5,28,fetch,wilkie,2011
6,118,sleep,lassie,2011
7,21,hunt,rex,2011
8,13,hunt,leo,2011
9,92,sleep,felix,2011


In [26]:
# the history table pivoted with counts by year of activities across pets
pd.pivot_table(history, index='timestamp', columns=['pet_activity_names'], values='age', fill_value=0, aggfunc='count')

pet_activity_names,eat,fetch,hunt,sleep
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,0,1,2,2
2011,0,1,2,2
2012,1,0,1,3
2013,1,2,0,2
2014,0,2,1,2
2015,0,2,1,2
2016,0,1,2,2
2017,0,1,1,3
2018,1,1,1,2
2019,1,0,1,3


In [27]:
# the update ages are are visible in the orca registered pets_merged table
orca.get_table('pets_merged').to_frame()[['pet_name', 'init_age', 'age']]

Unnamed: 0,pet_name,init_age,age
p0,wilkie,14,84
p1,lassie,104,174
p4,rex,7,77
p2,leo,3,53
p3,felix,82,132


In [28]:
# because we used orca.DataFrameWrapper.update_col_from_series to update age, it wrote through to the underlying df_pets dataframe
# if you are mixing dataframe access within and outside orca, you need to be mindful of how orca references tables
df_pet


Unnamed: 0,age,iq,pet_name,species_id,init_age
p0,84,100,wilkie,D,14
p1,174,140,lassie,D,104
p2,53,87,leo,C,3
p3,132,120,felix,C,82
p4,77,94,rex,D,7
